diff -Nru mesa-17.2.4/aclocal.m4 mesa-17.3.3/aclocal.m4
--- mesa-17.2.4/aclocal.m4 2017-10-30 14:49:56.000000000 +0000
+++ mesa-17.3.3/aclocal.m4 2018-01-18 21:30:37.000000000 +0000
@@ -1524,6 +1524,7 @@
AC_SUBST([am__untar])
]) # _AM_PROG_TAR
+m4_include([m4/ax_check_compile_flag.m4])
m4_include([m4/ax_check_gnu_make.m4])
m4_include([m4/ax_check_python_mako_module.m4])
m4_include([m4/ax_gcc_builtin.m4])
diff -Nru mesa-17.2.4/bin/git_sha1_gen.py mesa-17.3.3/bin/git_sha1_gen.py
--- mesa-17.2.4/bin/git_sha1_gen.py 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/bin/git_sha1_gen.py 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+"""
+Generate the contents of the git_sha1.h file.
+The output of this script goes to stdout.
+"""
+
+
+import os
+import os.path
+import subprocess
+import sys
+
+
+def get_git_sha1():
+ """Try to get the git SHA1 with git rev-parse."""
+ git_dir = os.path.join(os.path.dirname(sys.argv[0]), '..', '.git')
+ try:
+ git_sha1 = subprocess.check_output([
+ 'git',
+ '--git-dir=' + git_dir,
+ 'rev-parse',
+ 'HEAD',
+ ], stderr=open(os.devnull, 'w')).decode("ascii")
+ except:
+ # don't print anything if it fails
+ git_sha1 = ''
+ return git_sha1
+
+
+git_sha1 = os.environ.get('MESA_GIT_SHA1_OVERRIDE', get_git_sha1())[:10]
+if git_sha1:
+ git_sha1_h_in_path = os.path.join(os.path.dirname(sys.argv[0]),
+ '..', 'src', 'git_sha1.h.in')
+ with open(git_sha1_h_in_path , 'r') as git_sha1_h_in:
+ sys.stdout.write(git_sha1_h_in.read().replace('@VCS_TAG@', git_sha1))
diff -Nru mesa-17.2.4/bin/git_sha1_gen.sh mesa-17.3.3/bin/git_sha1_gen.sh
--- mesa-17.2.4/bin/git_sha1_gen.sh 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/bin/git_sha1_gen.sh 1970-01-01 00:00:00.000000000 +0000
@@ -1,12 +0,0 @@
-#!/bin/sh
-
-# run git from the sources directory
-cd "$(dirname "$0")"
-
-# don't print anything if git fails
-if ! git_sha1=$(git --git-dir=../.git rev-parse --short=10 HEAD 2>/dev/null)
-then
- exit
-fi
-
-printf '#define MESA_GIT_SHA1 "git-%s"\n' "$git_sha1"
diff -Nru mesa-17.2.4/bin/install_megadrivers.py mesa-17.3.3/bin/install_megadrivers.py
--- mesa-17.2.4/bin/install_megadrivers.py 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/bin/install_megadrivers.py 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# encoding=utf-8
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Script to install megadriver symlinks for meson."""
+
+from __future__ import print_function
+import argparse
+import os
+import shutil
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('megadriver')
+ parser.add_argument('libdir')
+ parser.add_argument('drivers', nargs='+')
+ args = parser.parse_args()
+
+ to = os.path.join(os.environ.get('MESON_INSTALL_DESTDIR_PREFIX'), args.libdir)
+ master = os.path.join(to, os.path.basename(args.megadriver))
+
+ if not os.path.exists(to):
+ os.makedirs(to)
+ shutil.copy(args.megadriver, master)
+
+ for each in args.drivers:
+ driver = os.path.join(to, each)
+ if os.path.exists(driver):
+ os.unlink(driver)
+ print('installing {} to {}'.format(args.megadriver, to))
+ os.link(master, driver)
+ os.unlink(master)
+
+
+if __name__ == '__main__':
+ main()
diff -Nru mesa-17.2.4/build-support/conftest.dyn mesa-17.3.3/build-support/conftest.dyn
--- mesa-17.2.4/build-support/conftest.dyn 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/build-support/conftest.dyn 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,3 @@
+{
+ radeon_drm_winsys_create;
+};
diff -Nru mesa-17.2.4/build-support/conftest.map mesa-17.3.3/build-support/conftest.map
--- mesa-17.2.4/build-support/conftest.map 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/build-support/conftest.map 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,6 @@
+VERSION_1 {
+ global:
+ main;
+ local:
+ *;
+};
diff -Nru mesa-17.2.4/configure mesa-17.3.3/configure
--- mesa-17.2.4/configure 2017-10-30 14:49:57.000000000 +0000
+++ mesa-17.3.3/configure 2018-01-18 21:30:38.000000000 +0000
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for Mesa 17.2.4.
+# Generated by GNU Autoconf 2.69 for Mesa 17.3.3.
#
# Report bugs to .
#
@@ -591,8 +591,8 @@
# Identity of this package.
PACKAGE_NAME='Mesa'
PACKAGE_TARNAME='mesa'
-PACKAGE_VERSION='17.2.4'
-PACKAGE_STRING='Mesa 17.2.4'
+PACKAGE_VERSION='17.3.3'
+PACKAGE_STRING='Mesa 17.3.3'
PACKAGE_BUGREPORT='https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa'
PACKAGE_URL=''
@@ -674,6 +674,8 @@
HAVE_OSMESA_TRUE
HAVE_LIBDRM_FALSE
HAVE_LIBDRM_TRUE
+USE_VC5_SIMULATOR_FALSE
+USE_VC5_SIMULATOR_TRUE
USE_VC4_SIMULATOR_FALSE
USE_VC4_SIMULATOR_TRUE
HAVE_GALLIUM_LLVM_FALSE
@@ -686,6 +688,8 @@
NEED_RADEON_DRM_WINSYS_TRUE
HAVE_INTEL_DRIVERS_FALSE
HAVE_INTEL_DRIVERS_TRUE
+HAVE_BROADCOM_DRIVERS_FALSE
+HAVE_BROADCOM_DRIVERS_TRUE
HAVE_AMD_DRIVERS_FALSE
HAVE_AMD_DRIVERS_TRUE
HAVE_INTEL_VULKAN_FALSE
@@ -709,6 +713,8 @@
HAVE_GALLIUM_STATIC_TARGETS_TRUE
HAVE_GALLIUM_VIRGL_FALSE
HAVE_GALLIUM_VIRGL_TRUE
+HAVE_GALLIUM_VC5_FALSE
+HAVE_GALLIUM_VC5_TRUE
HAVE_GALLIUM_VC4_FALSE
HAVE_GALLIUM_VC4_TRUE
HAVE_GALLIUM_SWRAST_FALSE
@@ -751,6 +757,8 @@
HAVE_SWR_AVX_TRUE
SWR_INVALID_LLVM_VERSION_FALSE
SWR_INVALID_LLVM_VERSION_TRUE
+VC5_SIMULATOR_LIBS
+VC5_SIMULATOR_CFLAGS
SIMPENROSE_LIBS
SIMPENROSE_CFLAGS
SWR_SKX_CXXFLAGS
@@ -766,7 +774,7 @@
NOUVEAU_CFLAGS
D3D_DRIVER_INSTALL_DIR
VA_LIB_INSTALL_DIR
-OMX_LIB_INSTALL_DIR
+OMX_BELLAGIO_LIB_INSTALL_DIR
VDPAU_LIB_INSTALL_DIR
HAVE_GALLIUM_TESTS_FALSE
HAVE_GALLIUM_TESTS_TRUE
@@ -790,10 +798,10 @@
HAVE_ST_VA_TRUE
VA_LIBS
VA_CFLAGS
-HAVE_ST_OMX_FALSE
-HAVE_ST_OMX_TRUE
-OMX_LIBS
-OMX_CFLAGS
+HAVE_ST_OMX_BELLAGIO_FALSE
+HAVE_ST_OMX_BELLAGIO_TRUE
+OMX_BELLAGIO_LIBS
+OMX_BELLAGIO_CFLAGS
HAVE_ST_VDPAU_FALSE
HAVE_ST_VDPAU_TRUE
VDPAU_LIBS
@@ -854,8 +862,10 @@
ANDROID_CFLAGS
XCB_DRI2_LIBS
XCB_DRI2_CFLAGS
-WAYLAND_LIBS
-WAYLAND_CFLAGS
+WAYLAND_SERVER_LIBS
+WAYLAND_SERVER_CFLAGS
+WAYLAND_CLIENT_LIBS
+WAYLAND_CLIENT_CFLAGS
WAYLAND_PROTOCOLS_DATADIR
WAYLAND_SCANNER
WAYLAND_SCANNER_LIBS
@@ -975,6 +985,7 @@
SSE41_CFLAGS
SSE41_SUPPORTED_FALSE
SSE41_SUPPORTED_TRUE
+WNO_OVERRIDE_INIT
VISIBILITY_CXXFLAGS
VISIBILITY_CFLAGS
MSVC2013_COMPAT_CXXFLAGS
@@ -1173,6 +1184,7 @@
enable_xvmc
enable_vdpau
enable_omx
+enable_omx_bellagio
enable_va
enable_opencl
enable_opencl_icd
@@ -1199,7 +1211,7 @@
with_clang_libdir
with_xvmc_libdir
with_vdpau_libdir
-with_omx_libdir
+with_omx_bellagio_libdir
with_va_libdir
with_d3d_libdir
with_swr_archs
@@ -1252,8 +1264,10 @@
DRIGL_LIBS
WAYLAND_SCANNER_CFLAGS
WAYLAND_SCANNER_LIBS
-WAYLAND_CFLAGS
-WAYLAND_LIBS
+WAYLAND_CLIENT_CFLAGS
+WAYLAND_CLIENT_LIBS
+WAYLAND_SERVER_CFLAGS
+WAYLAND_SERVER_LIBS
XCB_DRI2_CFLAGS
XCB_DRI2_LIBS
ANDROID_CFLAGS
@@ -1276,8 +1290,8 @@
XVMC_LIBS
VDPAU_CFLAGS
VDPAU_LIBS
-OMX_CFLAGS
-OMX_LIBS
+OMX_BELLAGIO_CFLAGS
+OMX_BELLAGIO_LIBS
VA_CFLAGS
VA_LIBS
NOUVEAU_CFLAGS
@@ -1288,6 +1302,8 @@
ETNAVIV_LIBS
SIMPENROSE_CFLAGS
SIMPENROSE_LIBS
+VC5_SIMULATOR_CFLAGS
+VC5_SIMULATOR_LIBS
VALGRIND_CFLAGS
VALGRIND_LIBS'
@@ -1840,7 +1856,7 @@
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures Mesa 17.2.4 to adapt to many kinds of systems.
+\`configure' configures Mesa 17.3.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1912,7 +1928,7 @@
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of Mesa 17.2.4:";;
+ short | recursive ) echo "Configuration of Mesa 17.3.3:";;
esac
cat <<\_ACEOF
@@ -1971,7 +1987,9 @@
--enable-nine enable build of the nine Direct3D9 API [default=no]
--enable-xvmc enable xvmc library [default=auto]
--enable-vdpau enable vdpau library [default=auto]
- --enable-omx enable OpenMAX library [default=disabled]
+ --enable-omx DEPRECATED: Use --enable-omx-bellagio instead
+ [default=auto]
+ --enable-omx-bellagio enable OpenMAX Bellagio library [default=disabled]
--enable-va enable va library [default=auto]
--enable-opencl enable OpenCL library [default=disabled]
--enable-opencl-icd Build an OpenCL ICD library to be loaded by an ICD
@@ -2007,7 +2025,7 @@
locations
--with-gallium-drivers[=DIRS...]
comma delimited Gallium drivers list, e.g.
- "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,virgl,etnaviv,imx"
+ "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,vc5,virgl,etnaviv,imx"
[default=r300,r600,svga,swrast]
--with-gl-lib-name[=NAME]
specify GL library name [default=GL]
@@ -2042,7 +2060,8 @@
--with-xvmc-libdir=DIR directory for the XVMC libraries [default=${libdir}]
--with-vdpau-libdir=DIR directory for the VDPAU libraries
[default=${libdir}/vdpau]
- --with-omx-libdir=DIR directory for the OMX libraries
+ --with-omx-bellagio-libdir=DIR
+ directory for the OMX_BELLAGIO libraries
--with-va-libdir=DIR directory for the VA libraries [${libdir}/dri]
--with-d3d-libdir=DIR directory for the D3D modules [${libdir}/d3d]
--with-swr-archs[=DIRS...]
@@ -2121,10 +2140,14 @@
C compiler flags for WAYLAND_SCANNER, overriding pkg-config
WAYLAND_SCANNER_LIBS
linker flags for WAYLAND_SCANNER, overriding pkg-config
- WAYLAND_CFLAGS
- C compiler flags for WAYLAND, overriding pkg-config
- WAYLAND_LIBS
- linker flags for WAYLAND, overriding pkg-config
+ WAYLAND_CLIENT_CFLAGS
+ C compiler flags for WAYLAND_CLIENT, overriding pkg-config
+ WAYLAND_CLIENT_LIBS
+ linker flags for WAYLAND_CLIENT, overriding pkg-config
+ WAYLAND_SERVER_CFLAGS
+ C compiler flags for WAYLAND_SERVER, overriding pkg-config
+ WAYLAND_SERVER_LIBS
+ linker flags for WAYLAND_SERVER, overriding pkg-config
XCB_DRI2_CFLAGS
C compiler flags for XCB_DRI2, overriding pkg-config
XCB_DRI2_LIBS
@@ -2159,8 +2182,10 @@
VDPAU_CFLAGS
C compiler flags for VDPAU, overriding pkg-config
VDPAU_LIBS linker flags for VDPAU, overriding pkg-config
- OMX_CFLAGS C compiler flags for OMX, overriding pkg-config
- OMX_LIBS linker flags for OMX, overriding pkg-config
+ OMX_BELLAGIO_CFLAGS
+ C compiler flags for OMX_BELLAGIO, overriding pkg-config
+ OMX_BELLAGIO_LIBS
+ linker flags for OMX_BELLAGIO, overriding pkg-config
VA_CFLAGS C compiler flags for VA, overriding pkg-config
VA_LIBS linker flags for VA, overriding pkg-config
NOUVEAU_CFLAGS
@@ -2179,6 +2204,10 @@
C compiler flags for SIMPENROSE, overriding pkg-config
SIMPENROSE_LIBS
linker flags for SIMPENROSE, overriding pkg-config
+ VC5_SIMULATOR_CFLAGS
+ C compiler flags for VC5_SIMULATOR, overriding pkg-config
+ VC5_SIMULATOR_LIBS
+ linker flags for VC5_SIMULATOR, overriding pkg-config
VALGRIND_CFLAGS
C compiler flags for VALGRIND, overriding pkg-config
VALGRIND_LIBS
@@ -2252,7 +2281,7 @@
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-Mesa configure 17.2.4
+Mesa configure 17.3.3
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2971,7 +3000,7 @@
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by Mesa $as_me 17.2.4, which was
+It was created by Mesa $as_me 17.3.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -3946,7 +3975,7 @@
# Define the identity of the package.
PACKAGE='mesa'
- VERSION='17.2.4'
+ VERSION='17.3.3'
cat >>confdefs.h <<_ACEOF
@@ -5430,7 +5459,7 @@
# in the first entry.
LIBDRM_REQUIRED=2.4.75
LIBDRM_RADEON_REQUIRED=2.4.71
-LIBDRM_AMDGPU_REQUIRED=2.4.82
+LIBDRM_AMDGPU_REQUIRED=2.4.85
LIBDRM_INTEL_REQUIRED=2.4.75
LIBDRM_NVVIEUX_REQUIRED=2.4.66
LIBDRM_NOUVEAU_REQUIRED=2.4.66
@@ -5455,7 +5484,7 @@
ZLIB_REQUIRED=1.2.3
LLVM_REQUIRED_GALLIUM=3.3.0
-LLVM_REQUIRED_OPENCL=3.6.0
+LLVM_REQUIRED_OPENCL=3.9.0
LLVM_REQUIRED_R600=3.9.0
LLVM_REQUIRED_RADEONSI=3.9.0
LLVM_REQUIRED_RADV=3.9.0
@@ -20155,25 +20184,15 @@
fi
-if test "x$GCC" = xyes; then
- CFLAGS="$CFLAGS -Wall"
-
- if test "x$USE_GNU99" = xyes; then
- CFLAGS="$CFLAGS -std=gnu99"
- else
- CFLAGS="$CFLAGS -std=c99"
- fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Wall" >&5
+$as_echo_n "checking whether C compiler accepts -Wall... " >&6; }
+if ${ax_cv_check_cflags___Wall+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
- # Enable -Werror=implicit-function-declaration and
- # -Werror=missing-prototypes, if available, or otherwise, just
- # -Wmissing-prototypes. This is particularly useful to avoid
- # generating a loadable driver module that has undefined symbols.
- save_CFLAGS="$CFLAGS"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC supports -Werror=missing-prototypes" >&5
-$as_echo_n "checking whether $CC supports -Werror=missing-prototypes... " >&6; }
- CFLAGS="$CFLAGS -Werror=implicit-function-declaration"
- CFLAGS="$CFLAGS -Werror=missing-prototypes"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -Wall"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
@@ -20184,24 +20203,31 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___Wall=yes
else
- CFLAGS="$save_CFLAGS -Wmissing-prototypes";
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
+ ax_cv_check_cflags___Wall=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___Wall" >&5
+$as_echo "$ax_cv_check_cflags___Wall" >&6; }
+if test x"$ax_cv_check_cflags___Wall" = xyes; then :
+ CFLAGS="$CFLAGS -Wall"
+else
+ :
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- # Enable -fvisibility=hidden if using a gcc that supports it
- save_CFLAGS="$CFLAGS"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC supports -fvisibility=hidden" >&5
-$as_echo_n "checking whether $CC supports -fvisibility=hidden... " >&6; }
- VISIBILITY_CFLAGS="-fvisibility=hidden"
- CFLAGS="$CFLAGS $VISIBILITY_CFLAGS"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Werror=implicit-function-declaration" >&5
+$as_echo_n "checking whether C compiler accepts -Werror=implicit-function-declaration... " >&6; }
+if ${ax_cv_check_cflags___Werror_implicit_function_declaration+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -Werror=implicit-function-declaration"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
@@ -20212,37 +20238,31 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___Werror_implicit_function_declaration=yes
else
- VISIBILITY_CFLAGS=""; { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
+ ax_cv_check_cflags___Werror_implicit_function_declaration=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___Werror_implicit_function_declaration" >&5
+$as_echo "$ax_cv_check_cflags___Werror_implicit_function_declaration" >&6; }
+if test x"$ax_cv_check_cflags___Werror_implicit_function_declaration" = xyes; then :
+ CFLAGS="$CFLAGS -Werror=implicit-function-declaration"
+else
+ :
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-
- # Restore CFLAGS; VISIBILITY_CFLAGS are added to it where needed.
- CFLAGS=$save_CFLAGS
- # We don't want floating-point math functions to set errno or trap
- CFLAGS="$CFLAGS -fno-math-errno -fno-trapping-math"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Werror=missing-prototypes" >&5
+$as_echo_n "checking whether C compiler accepts -Werror=missing-prototypes... " >&6; }
+if ${ax_cv_check_cflags___Werror_missing_prototypes+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
- # Flags to help ensure that certain portions of the code -- and only those
- # portions -- can be built with MSVC:
- # - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and
- # - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which
- # supports most of C99)
- # - the rest has no compiler compiler restrictions
- MSVC2013_COMPAT_CFLAGS="-Werror=pointer-arith"
- MSVC2013_COMPAT_CXXFLAGS="-Werror=pointer-arith"
-
- # Enable -Werror=vla if compiler supports it
- save_CFLAGS="$CFLAGS"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC supports -Werror=vla" >&5
-$as_echo_n "checking whether $CC supports -Werror=vla... " >&6; }
- CFLAGS="$CFLAGS -Werror=vla"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -Werror=missing-prototypes"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
@@ -20253,35 +20273,31 @@
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- MSVC2013_COMPAT_CFLAGS="$MSVC2013_COMPAT_CFLAGS -Werror=vla";
- MSVC2013_COMPAT_CXXFLAGS="$MSVC2013_COMPAT_CXXFLAGS -Werror=vla";
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___Werror_missing_prototypes=yes
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
+ ax_cv_check_cflags___Werror_missing_prototypes=no
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- CFLAGS="$save_CFLAGS"
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___Werror_missing_prototypes" >&5
+$as_echo "$ax_cv_check_cflags___Werror_missing_prototypes" >&6; }
+if test x"$ax_cv_check_cflags___Werror_missing_prototypes" = xyes; then :
+ CFLAGS="$CFLAGS -Werror=missing-prototypes"
+else
+ :
fi
-if test "x$GXX" = xyes; then
- CXXFLAGS="$CXXFLAGS -Wall"
- # Enable -fvisibility=hidden if using a gcc that supports it
- save_CXXFLAGS="$CXXFLAGS"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports -fvisibility=hidden" >&5
-$as_echo_n "checking whether $CXX supports -fvisibility=hidden... " >&6; }
- VISIBILITY_CXXFLAGS="-fvisibility=hidden"
- CXXFLAGS="$CXXFLAGS $VISIBILITY_CXXFLAGS"
- ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Wmissing-prototypes" >&5
+$as_echo_n "checking whether C compiler accepts -Wmissing-prototypes... " >&6; }
+if ${ax_cv_check_cflags___Wmissing_prototypes+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -Wmissing-prototypes"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
@@ -20292,162 +20308,653 @@
return 0;
}
_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___Wmissing_prototypes=yes
else
- VISIBILITY_CXXFLAGS="" ; { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
+ ax_cv_check_cflags___Wmissing_prototypes=no
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
- # Restore CXXFLAGS; VISIBILITY_CXXFLAGS are added to it where needed.
- CXXFLAGS=$save_CXXFLAGS
-
- # We don't want floating-point math functions to set errno or trap
- CXXFLAGS="$CXXFLAGS -fno-math-errno -fno-trapping-math"
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___Wmissing_prototypes" >&5
+$as_echo "$ax_cv_check_cflags___Wmissing_prototypes" >&6; }
+if test x"$ax_cv_check_cflags___Wmissing_prototypes" = xyes; then :
+ CFLAGS="$CFLAGS -Wmissing-prototypes"
+else
+ :
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-math-errno" >&5
+$as_echo_n "checking whether C compiler accepts -fno-math-errno... " >&6; }
+if ${ax_cv_check_cflags___fno_math_errno+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -fno-math-errno"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+int
+main ()
+{
-case "$host_os" in
-cygwin*)
- VISIBILITY_CFLAGS=""
- VISIBILITY_CXXFLAGS=""
- ;;
-esac
-
-
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___fno_math_errno=yes
+else
+ ax_cv_check_cflags___fno_math_errno=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___fno_math_errno" >&5
+$as_echo "$ax_cv_check_cflags___fno_math_errno" >&6; }
+if test x"$ax_cv_check_cflags___fno_math_errno" = xyes; then :
+ CFLAGS="$CFLAGS -fno-math-errno"
+else
+ :
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-trapping-math" >&5
+$as_echo_n "checking whether C compiler accepts -fno-trapping-math... " >&6; }
+if ${ax_cv_check_cflags___fno_trapping_math+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
-SSE41_CFLAGS="-msse4.1"
-case "$target_cpu" in
-i?86)
- SSE41_CFLAGS="$SSE41_CFLAGS -mstackrealign"
- ;;
-esac
-save_CFLAGS="$CFLAGS"
-CFLAGS="$SSE41_CFLAGS $CFLAGS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -fno-trapping-math"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include
-int param;
-int main () {
- __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
- c = _mm_max_epu32(a, b);
- return _mm_cvtsi128_si32(c);
+int
+main ()
+{
+
+ ;
+ return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- SSE41_SUPPORTED=1
+ ax_cv_check_cflags___fno_trapping_math=yes
+else
+ ax_cv_check_cflags___fno_trapping_math=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-CFLAGS="$save_CFLAGS"
-if test "x$SSE41_SUPPORTED" = x1; then
- DEFINES="$DEFINES -DUSE_SSE41"
+ CFLAGS=$ax_check_save_flags
fi
- if test x$SSE41_SUPPORTED = x1; then
- SSE41_SUPPORTED_TRUE=
- SSE41_SUPPORTED_FALSE='#'
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___fno_trapping_math" >&5
+$as_echo "$ax_cv_check_cflags___fno_trapping_math" >&6; }
+if test x"$ax_cv_check_cflags___fno_trapping_math" = xyes; then :
+ CFLAGS="$CFLAGS -fno-trapping-math"
else
- SSE41_SUPPORTED_TRUE='#'
- SSE41_SUPPORTED_FALSE=
+ :
fi
-SSE41_CFLAGS=$SSE41_CFLAGS
-
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fvisibility=hidden" >&5
+$as_echo_n "checking whether C compiler accepts -fvisibility=hidden... " >&6; }
+if ${ax_cv_check_cflags___fvisibility_hidden+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -fvisibility=hidden"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-int main() {
- int n;
- return __atomic_load_n(&n, __ATOMIC_ACQUIRE);
+int
+main ()
+{
+
+ ;
+ return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- GCC_ATOMIC_BUILTINS_SUPPORTED=1
+ ax_cv_check_cflags___fvisibility_hidden=yes
+else
+ ax_cv_check_cflags___fvisibility_hidden=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then
- DEFINES="$DEFINES -DUSE_GCC_ATOMIC_BUILTINS"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -latomic is needed" >&5
-$as_echo_n "checking whether -latomic is needed... " >&6; }
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___fvisibility_hidden" >&5
+$as_echo "$ax_cv_check_cflags___fvisibility_hidden" >&6; }
+if test x"$ax_cv_check_cflags___fvisibility_hidden" = xyes; then :
+ VISIBILITY_CFLAGS="-fvisibility=hidden"
+else
+ :
+fi
+
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -Wall" >&5
+$as_echo_n "checking whether C++ compiler accepts -Wall... " >&6; }
+if ${ax_cv_check_cxxflags___Wall+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CXXFLAGS
+ CXXFLAGS="$CXXFLAGS -Wall"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
- #include
- uint64_t v;
- int main() {
- return (int)__atomic_load_n(&v, __ATOMIC_ACQUIRE);
- }
+int
+main ()
+{
+
+ ;
+ return 0;
+}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ax_cv_check_cxxflags___Wall=yes
else
- GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes
+ ax_cv_check_cxxflags___Wall=no
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" >&5
-$as_echo "$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" >&6; }
- if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then
- LIBATOMIC_LIBS="-latomic"
- fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CXXFLAGS=$ax_check_save_flags
fi
- if test x$GCC_ATOMIC_BUILTINS_SUPPORTED = x1; then
- GCC_ATOMIC_BUILTINS_SUPPORTED_TRUE=
- GCC_ATOMIC_BUILTINS_SUPPORTED_FALSE='#'
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___Wall" >&5
+$as_echo "$ax_cv_check_cxxflags___Wall" >&6; }
+if test x"$ax_cv_check_cxxflags___Wall" = xyes; then :
+ CXXFLAGS="$CXXFLAGS -Wall"
else
- GCC_ATOMIC_BUILTINS_SUPPORTED_TRUE='#'
- GCC_ATOMIC_BUILTINS_SUPPORTED_FALSE=
+ :
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -fno-math-errno" >&5
+$as_echo_n "checking whether C++ compiler accepts -fno-math-errno... " >&6; }
+if ${ax_cv_check_cxxflags___fno_math_errno+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __sync_add_and_fetch_8 is supported" >&5
-$as_echo_n "checking whether __sync_add_and_fetch_8 is supported... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ ax_check_save_flags=$CXXFLAGS
+ CXXFLAGS="$CXXFLAGS -fno-math-errno"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include
-uint64_t v;
-int main() {
- return __sync_add_and_fetch(&v, (uint64_t)1);
+int
+main ()
+{
+
+ ;
+ return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- GCC_64BIT_ATOMICS_SUPPORTED=yes
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ax_cv_check_cxxflags___fno_math_errno=yes
else
- GCC_64BIT_ATOMICS_SUPPORTED=no
+ ax_cv_check_cxxflags___fno_math_errno=no
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-if test "x$GCC_64BIT_ATOMICS_SUPPORTED" != xyes; then
- DEFINES="$DEFINES -DMISSING_64BIT_ATOMICS"
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CXXFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___fno_math_errno" >&5
+$as_echo "$ax_cv_check_cxxflags___fno_math_errno" >&6; }
+if test x"$ax_cv_check_cxxflags___fno_math_errno" = xyes; then :
+ CXXFLAGS="$CXXFLAGS -fno-math-errno"
+else
+ :
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $GCC_64BIT_ATOMICS_SUPPORTED" >&5
-$as_echo "$GCC_64BIT_ATOMICS_SUPPORTED" >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
-$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
-if ${ac_cv_c_bigendian+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -fno-trapping-math" >&5
+$as_echo_n "checking whether C++ compiler accepts -fno-trapping-math... " >&6; }
+if ${ax_cv_check_cxxflags___fno_trapping_math+:} false; then :
$as_echo_n "(cached) " >&6
else
- ac_cv_c_bigendian=unknown
- # See if we're dealing with a universal compiler.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+
+ ax_check_save_flags=$CXXFLAGS
+ CXXFLAGS="$CXXFLAGS -fno-trapping-math"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#ifndef __APPLE_CC__
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ax_cv_check_cxxflags___fno_trapping_math=yes
+else
+ ax_cv_check_cxxflags___fno_trapping_math=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CXXFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___fno_trapping_math" >&5
+$as_echo "$ax_cv_check_cxxflags___fno_trapping_math" >&6; }
+if test x"$ax_cv_check_cxxflags___fno_trapping_math" = xyes; then :
+ CXXFLAGS="$CXXFLAGS -fno-trapping-math"
+else
+ :
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -fvisibility=hidden" >&5
+$as_echo_n "checking whether C++ compiler accepts -fvisibility=hidden... " >&6; }
+if ${ax_cv_check_cxxflags___fvisibility_hidden+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CXXFLAGS
+ CXXFLAGS="$CXXFLAGS -fvisibility=hidden"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ax_cv_check_cxxflags___fvisibility_hidden=yes
+else
+ ax_cv_check_cxxflags___fvisibility_hidden=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CXXFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___fvisibility_hidden" >&5
+$as_echo "$ax_cv_check_cxxflags___fvisibility_hidden" >&6; }
+if test x"$ax_cv_check_cxxflags___fvisibility_hidden" = xyes; then :
+ VISIBILITY_CXXFLAGS="-fvisibility=hidden"
+else
+ :
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+# Flags to help ensure that certain portions of the code -- and only those
+# portions -- can be built with MSVC:
+# - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and
+# - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which
+# supports most of C99)
+# - the rest has no compiler compiler restrictions
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Werror=pointer-arith" >&5
+$as_echo_n "checking whether C compiler accepts -Werror=pointer-arith... " >&6; }
+if ${ax_cv_check_cflags___Werror_pointer_arith+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -Werror=pointer-arith"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___Werror_pointer_arith=yes
+else
+ ax_cv_check_cflags___Werror_pointer_arith=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___Werror_pointer_arith" >&5
+$as_echo "$ax_cv_check_cflags___Werror_pointer_arith" >&6; }
+if test x"$ax_cv_check_cflags___Werror_pointer_arith" = xyes; then :
+ MSVC2013_COMPAT_CFLAGS="-Werror=pointer-arith"
+else
+ :
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Werror=vla" >&5
+$as_echo_n "checking whether C compiler accepts -Werror=vla... " >&6; }
+if ${ax_cv_check_cflags___Werror_vla+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -Werror=vla"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___Werror_vla=yes
+else
+ ax_cv_check_cflags___Werror_vla=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___Werror_vla" >&5
+$as_echo "$ax_cv_check_cflags___Werror_vla" >&6; }
+if test x"$ax_cv_check_cflags___Werror_vla" = xyes; then :
+ MSVC2013_COMPAT_CFLAGS="-Werror=vla"
+else
+ :
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -Werror=pointer-arith" >&5
+$as_echo_n "checking whether C++ compiler accepts -Werror=pointer-arith... " >&6; }
+if ${ax_cv_check_cxxflags___Werror_pointer_arith+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CXXFLAGS
+ CXXFLAGS="$CXXFLAGS -Werror=pointer-arith"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ax_cv_check_cxxflags___Werror_pointer_arith=yes
+else
+ ax_cv_check_cxxflags___Werror_pointer_arith=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CXXFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___Werror_pointer_arith" >&5
+$as_echo "$ax_cv_check_cxxflags___Werror_pointer_arith" >&6; }
+if test x"$ax_cv_check_cxxflags___Werror_pointer_arith" = xyes; then :
+ MSVC2013_COMPAT_CXXFLAGS="-Werror=pointer-arith"
+else
+ :
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -Werror=vla" >&5
+$as_echo_n "checking whether C++ compiler accepts -Werror=vla... " >&6; }
+if ${ax_cv_check_cxxflags___Werror_vla+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CXXFLAGS
+ CXXFLAGS="$CXXFLAGS -Werror=vla"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ax_cv_check_cxxflags___Werror_vla=yes
+else
+ ax_cv_check_cxxflags___Werror_vla=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CXXFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___Werror_vla" >&5
+$as_echo "$ax_cv_check_cxxflags___Werror_vla" >&6; }
+if test x"$ax_cv_check_cxxflags___Werror_vla" = xyes; then :
+ MSVC2013_COMPAT_CXXFLAGS="-Werror=vla"
+else
+ :
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+if test "x$GCC" = xyes; then
+ if test "x$USE_GNU99" = xyes; then
+ CFLAGS="$CFLAGS -std=gnu99"
+ else
+ CFLAGS="$CFLAGS -std=c99"
+ fi
+fi
+
+case "$host_os" in
+cygwin*)
+ VISIBILITY_CFLAGS=""
+ VISIBILITY_CXXFLAGS=""
+ ;;
+esac
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Wno-override-init" >&5
+$as_echo_n "checking whether C compiler accepts -Wno-override-init... " >&6; }
+if ${ax_cv_check_cflags___Wno_override_init+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -Wno-override-init"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___Wno_override_init=yes
+else
+ ax_cv_check_cflags___Wno_override_init=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___Wno_override_init" >&5
+$as_echo "$ax_cv_check_cflags___Wno_override_init" >&6; }
+if test x"$ax_cv_check_cflags___Wno_override_init" = xyes; then :
+ WNO_OVERRIDE_INIT="-Wno-override-init"
+else
+ :
+fi
+ # gcc
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -Wno-initializer-overrides" >&5
+$as_echo_n "checking whether C compiler accepts -Wno-initializer-overrides... " >&6; }
+if ${ax_cv_check_cflags___Wno_initializer_overrides+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ax_check_save_flags=$CFLAGS
+ CFLAGS="$CFLAGS -Wno-initializer-overrides"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ax_cv_check_cflags___Wno_initializer_overrides=yes
+else
+ ax_cv_check_cflags___Wno_initializer_overrides=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS=$ax_check_save_flags
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___Wno_initializer_overrides" >&5
+$as_echo "$ax_cv_check_cflags___Wno_initializer_overrides" >&6; }
+if test x"$ax_cv_check_cflags___Wno_initializer_overrides" = xyes; then :
+ WNO_OVERRIDE_INIT="-Wno-initializer-overrides"
+else
+ :
+fi
+ # clang
+
+
+SSE41_CFLAGS="-msse4.1"
+case "$target_cpu" in
+i?86)
+ SSE41_CFLAGS="$SSE41_CFLAGS -mstackrealign"
+ ;;
+esac
+save_CFLAGS="$CFLAGS"
+CFLAGS="$SSE41_CFLAGS $CFLAGS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include
+int param;
+int main () {
+ __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
+ c = _mm_max_epu32(a, b);
+ return _mm_cvtsi128_si32(c);
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ SSE41_SUPPORTED=1
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+CFLAGS="$save_CFLAGS"
+if test "x$SSE41_SUPPORTED" = x1; then
+ DEFINES="$DEFINES -DUSE_SSE41"
+fi
+ if test x$SSE41_SUPPORTED = x1; then
+ SSE41_SUPPORTED_TRUE=
+ SSE41_SUPPORTED_FALSE='#'
+else
+ SSE41_SUPPORTED_TRUE='#'
+ SSE41_SUPPORTED_FALSE=
+fi
+
+SSE41_CFLAGS=$SSE41_CFLAGS
+
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int main() {
+ int n;
+ return __atomic_load_n(&n, __ATOMIC_ACQUIRE);
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ GCC_ATOMIC_BUILTINS_SUPPORTED=1
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then
+ DEFINES="$DEFINES -DUSE_GCC_ATOMIC_BUILTINS"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -latomic is needed" >&5
+$as_echo_n "checking whether -latomic is needed... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #include
+ uint64_t v;
+ int main() {
+ return (int)__atomic_load_n(&v, __ATOMIC_ACQUIRE);
+ }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no
+else
+ GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" >&5
+$as_echo "$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" >&6; }
+ if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then
+ LIBATOMIC_LIBS="-latomic"
+ fi
+fi
+ if test x$GCC_ATOMIC_BUILTINS_SUPPORTED = x1; then
+ GCC_ATOMIC_BUILTINS_SUPPORTED_TRUE=
+ GCC_ATOMIC_BUILTINS_SUPPORTED_FALSE='#'
+else
+ GCC_ATOMIC_BUILTINS_SUPPORTED_TRUE='#'
+ GCC_ATOMIC_BUILTINS_SUPPORTED_FALSE=
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __sync_add_and_fetch_8 is supported" >&5
+$as_echo_n "checking whether __sync_add_and_fetch_8 is supported... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include
+uint64_t v;
+int main() {
+ return __sync_add_and_fetch(&v, (uint64_t)1);
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ GCC_64BIT_ATOMICS_SUPPORTED=yes
+else
+ GCC_64BIT_ATOMICS_SUPPORTED=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+if test "x$GCC_64BIT_ATOMICS_SUPPORTED" != xyes; then
+ DEFINES="$DEFINES -DMISSING_64BIT_ATOMICS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $GCC_64BIT_ATOMICS_SUPPORTED" >&5
+$as_echo "$GCC_64BIT_ATOMICS_SUPPORTED" >&6; }
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
+$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
+if ${ac_cv_c_bigendian+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_cv_c_bigendian=unknown
+ # See if we're dealing with a universal compiler.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#ifndef __APPLE_CC__
not a universal capable compiler
#endif
typedef int dummy;
@@ -20889,16 +21396,7 @@
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker supports version-scripts" >&5
$as_echo_n "checking if the linker supports version-scripts... " >&6; }
save_LDFLAGS=$LDFLAGS
-LDFLAGS="$LDFLAGS -Wl,--version-script=conftest.map"
-cat > conftest.map <conftest.$ac_ext
/* end confdefs.h. */
int main() { return 0;}
@@ -20925,12 +21423,7 @@
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker supports --dynamic-list" >&5
$as_echo_n "checking if the linker supports --dynamic-list... " >&6; }
save_LDFLAGS=$LDFLAGS
-LDFLAGS="$LDFLAGS -Wl,--dynamic-list=conftest.dyn"
-cat > conftest.dyn <conftest.$ac_ext
/* end confdefs.h. */
int main() { return 0;}
@@ -21189,6 +21682,11 @@
DEFINES="$DEFINES -DHAVE_MKOSTEMP"
fi
+ac_fn_c_check_func "$LINENO" "memfd_create" "ac_cv_func_memfd_create"
+if test "x$ac_cv_func_memfd_create" = xyes; then :
+ DEFINES="$DEFINES -DHAVE_MEMFD_CREATE"
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether strtod has locale support" >&5
$as_echo_n "checking whether strtod has locale support... " >&6; }
@@ -21224,7 +21722,7 @@
ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen"
if test "x$ac_cv_func_dlopen" = xyes; then :
- DEFINES="$DEFINES -DHAVE_DLOPEN"
+
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
$as_echo_n "checking for dlopen in -ldl... " >&6; }
@@ -21263,7 +21761,7 @@
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
- DEFINES="$DEFINES -DHAVE_DLOPEN"; DLOPEN_LIBS="-ldl"
+ DLOPEN_LIBS="-ldl"
fi
fi
@@ -22502,6 +23000,15 @@
fi
}
+llvm_add_optional_component() {
+ new_llvm_component=$1
+ driver_name=$2
+
+ if $LLVM_CONFIG --components | grep -iqw $new_llvm_component ; then
+ LLVM_COMPONENTS="${LLVM_COMPONENTS} ${new_llvm_component}"
+ fi
+}
+
llvm_add_default_components() {
driver_name=$1
@@ -22512,9 +23019,7 @@
llvm_add_component "mcjit" $driver_name
# Optional default components
- if $LLVM_CONFIG --components | grep -iqw inteljitevents ; then
- LLVM_COMPONENTS="$LLVM_COMPONENTS inteljitevents"
- fi
+ llvm_add_optional_component "inteljitevents" $driver_name
}
llvm_add_target() {
@@ -22939,9 +23444,14 @@
# Check whether --enable-omx was given.
if test "${enable_omx+set}" = set; then :
- enableval=$enable_omx; enable_omx="$enableval"
+ enableval=$enable_omx; as_fn_error $? "--enable-omx is deprecated. Use --enable-omx-bellagio instead." "$LINENO" 5
+fi
+
+# Check whether --enable-omx-bellagio was given.
+if test "${enable_omx_bellagio+set}" = set; then :
+ enableval=$enable_omx_bellagio; enable_omx_bellagio="$enableval"
else
- enable_omx=no
+ enable_omx_bellagio=no
fi
# Check whether --enable-va was given.
@@ -23002,7 +23512,7 @@
"x$enable_xa" = xno -a \
"x$enable_xvmc" = xno -a \
"x$enable_vdpau" = xno -a \
- "x$enable_omx" = xno -a \
+ "x$enable_omx_bellagio" = xno -a \
"x$enable_va" = xno -a \
"x$enable_opencl" = xno; then
as_fn_error $? "at least one API should be enabled" "$LINENO" 5
@@ -23162,15 +23672,10 @@
if test "x$enable_libglvnd" = xyes ; then
- case "x$enable_glx" in
- xno)
- as_fn_error $? "cannot build libglvnd without GLX" "$LINENO" 5
- ;;
+ case "x$enable_glx" in
xxlib | xgallium-xlib )
as_fn_error $? "cannot build libgvnd when Xlib-GLX or Gallium-Xlib-GLX is enabled" "$LINENO" 5
;;
- xdri)
- ;;
esac
@@ -23269,6 +23774,10 @@
DEFINES="${DEFINES} -DUSE_LIBGLVND=1"
DEFAULT_GL_LIB_NAME=GLX_mesa
+
+ if test "x$enable_glx" = xno -a "x$enable_egl" = xno; then
+ as_fn_error $? "cannot build libglvnd without GLX or EGL" "$LINENO" 5
+ fi
fi
@@ -24282,19 +24791,110 @@
pkg_failed=no
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for WAYLAND" >&5
-$as_echo_n "checking for WAYLAND... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for WAYLAND_CLIENT" >&5
+$as_echo_n "checking for WAYLAND_CLIENT... " >&6; }
+
+if test -n "$WAYLAND_CLIENT_CFLAGS"; then
+ pkg_cv_WAYLAND_CLIENT_CFLAGS="$WAYLAND_CLIENT_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"wayland-client >= \$WAYLAND_REQUIRED\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "wayland-client >= $WAYLAND_REQUIRED") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_WAYLAND_CLIENT_CFLAGS=`$PKG_CONFIG --cflags "wayland-client >= $WAYLAND_REQUIRED" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$WAYLAND_CLIENT_LIBS"; then
+ pkg_cv_WAYLAND_CLIENT_LIBS="$WAYLAND_CLIENT_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"wayland-client >= \$WAYLAND_REQUIRED\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "wayland-client >= $WAYLAND_REQUIRED") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_WAYLAND_CLIENT_LIBS=`$PKG_CONFIG --libs "wayland-client >= $WAYLAND_REQUIRED" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ WAYLAND_CLIENT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "wayland-client >= $WAYLAND_REQUIRED" 2>&1`
+ else
+ WAYLAND_CLIENT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "wayland-client >= $WAYLAND_REQUIRED" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$WAYLAND_CLIENT_PKG_ERRORS" >&5
+
+ as_fn_error $? "Package requirements (wayland-client >= $WAYLAND_REQUIRED) were not met:
+
+$WAYLAND_CLIENT_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables WAYLAND_CLIENT_CFLAGS
+and WAYLAND_CLIENT_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables WAYLAND_CLIENT_CFLAGS
+and WAYLAND_CLIENT_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see .
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ WAYLAND_CLIENT_CFLAGS=$pkg_cv_WAYLAND_CLIENT_CFLAGS
+ WAYLAND_CLIENT_LIBS=$pkg_cv_WAYLAND_CLIENT_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for WAYLAND_SERVER" >&5
+$as_echo_n "checking for WAYLAND_SERVER... " >&6; }
-if test -n "$WAYLAND_CFLAGS"; then
- pkg_cv_WAYLAND_CFLAGS="$WAYLAND_CFLAGS"
+if test -n "$WAYLAND_SERVER_CFLAGS"; then
+ pkg_cv_WAYLAND_SERVER_CFLAGS="$WAYLAND_SERVER_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"wayland-client >= \$WAYLAND_REQUIRED wayland-server >= \$WAYLAND_REQUIRED\""; } >&5
- ($PKG_CONFIG --exists --print-errors "wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"wayland-server >= \$WAYLAND_REQUIRED\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "wayland-server >= $WAYLAND_REQUIRED") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_WAYLAND_CFLAGS=`$PKG_CONFIG --cflags "wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED" 2>/dev/null`
+ pkg_cv_WAYLAND_SERVER_CFLAGS=`$PKG_CONFIG --cflags "wayland-server >= $WAYLAND_REQUIRED" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -24302,16 +24902,16 @@
else
pkg_failed=untried
fi
-if test -n "$WAYLAND_LIBS"; then
- pkg_cv_WAYLAND_LIBS="$WAYLAND_LIBS"
+if test -n "$WAYLAND_SERVER_LIBS"; then
+ pkg_cv_WAYLAND_SERVER_LIBS="$WAYLAND_SERVER_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"wayland-client >= \$WAYLAND_REQUIRED wayland-server >= \$WAYLAND_REQUIRED\""; } >&5
- ($PKG_CONFIG --exists --print-errors "wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"wayland-server >= \$WAYLAND_REQUIRED\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "wayland-server >= $WAYLAND_REQUIRED") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_WAYLAND_LIBS=`$PKG_CONFIG --libs "wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED" 2>/dev/null`
+ pkg_cv_WAYLAND_SERVER_LIBS=`$PKG_CONFIG --libs "wayland-server >= $WAYLAND_REQUIRED" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -24332,22 +24932,22 @@
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- WAYLAND_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED" 2>&1`
+ WAYLAND_SERVER_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "wayland-server >= $WAYLAND_REQUIRED" 2>&1`
else
- WAYLAND_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED" 2>&1`
+ WAYLAND_SERVER_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "wayland-server >= $WAYLAND_REQUIRED" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
- echo "$WAYLAND_PKG_ERRORS" >&5
+ echo "$WAYLAND_SERVER_PKG_ERRORS" >&5
- as_fn_error $? "Package requirements (wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED) were not met:
+ as_fn_error $? "Package requirements (wayland-server >= $WAYLAND_REQUIRED) were not met:
-$WAYLAND_PKG_ERRORS
+$WAYLAND_SERVER_PKG_ERRORS
Consider adjusting the PKG_CONFIG_PATH environment variable if you
installed software in a non-standard prefix.
-Alternatively, you may set the environment variables WAYLAND_CFLAGS
-and WAYLAND_LIBS to avoid the need to call pkg-config.
+Alternatively, you may set the environment variables WAYLAND_SERVER_CFLAGS
+and WAYLAND_SERVER_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details." "$LINENO" 5
elif test $pkg_failed = untried; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
@@ -24358,15 +24958,15 @@
is in your PATH or set the PKG_CONFIG environment variable to the full
path to pkg-config.
-Alternatively, you may set the environment variables WAYLAND_CFLAGS
-and WAYLAND_LIBS to avoid the need to call pkg-config.
+Alternatively, you may set the environment variables WAYLAND_SERVER_CFLAGS
+and WAYLAND_SERVER_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details.
To get pkg-config, see .
See \`config.log' for more details" "$LINENO" 5; }
else
- WAYLAND_CFLAGS=$pkg_cv_WAYLAND_CFLAGS
- WAYLAND_LIBS=$pkg_cv_WAYLAND_LIBS
+ WAYLAND_SERVER_CFLAGS=$pkg_cv_WAYLAND_SERVER_CFLAGS
+ WAYLAND_SERVER_LIBS=$pkg_cv_WAYLAND_SERVER_LIBS
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
@@ -24378,7 +24978,7 @@
if test "x$have_wayland_protocols" = xno; then
as_fn_error $? "wayland-protocols >= $WAYLAND_PROTOCOLS_REQUIRED is needed to compile the wayland platform" "$LINENO" 5
fi
- DEFINES="$DEFINES -DHAVE_WAYLAND_PLATFORM"
+ DEFINES="$DEFINES -DHAVE_WAYLAND_PLATFORM -DWL_HIDE_DEPRECATED"
;;
x11)
@@ -24775,38 +25375,161 @@
with_dri_drivers=''
fi
-if test "x$enable_dri" = xyes; then
- # Platform specific settings and drivers to build
- case "$host_os" in
- linux*)
- case "$host_cpu" in
- powerpc* | sparc*)
- # Build only the drivers for cards that exist on PowerPC/sparc
- if test "x$with_dri_drivers" = "xyes"; then
- with_dri_drivers="r200 radeon swrast"
- fi
- ;;
- esac
- ;;
- cygwin*)
- if test "x$with_dri_drivers" = "xyes"; then
- with_dri_drivers="swrast"
+# Check for expat
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for EXPAT" >&5
+$as_echo_n "checking for EXPAT... " >&6; }
+
+if test -n "$EXPAT_CFLAGS"; then
+ pkg_cv_EXPAT_CFLAGS="$EXPAT_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"expat\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "expat") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_EXPAT_CFLAGS=`$PKG_CONFIG --cflags "expat" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$EXPAT_LIBS"; then
+ pkg_cv_EXPAT_LIBS="$EXPAT_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"expat\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "expat") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_EXPAT_LIBS=`$PKG_CONFIG --libs "expat" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ EXPAT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "expat" 2>&1`
+ else
+ EXPAT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "expat" 2>&1`
fi
- ;;
- darwin*)
- DEFINES="$DEFINES -DBUILDING_MESA"
- if test "x$with_dri_drivers" = "xyes"; then
- with_dri_drivers="swrast"
+ # Put the nasty error message in config.log where it belongs
+ echo "$EXPAT_PKG_ERRORS" >&5
+
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for EXPAT" >&5
+$as_echo_n "checking for EXPAT... " >&6; }
+
+if test -n "$EXPAT_CFLAGS"; then
+ pkg_cv_EXPAT_CFLAGS="$EXPAT_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"expat21\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "expat21") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_EXPAT_CFLAGS=`$PKG_CONFIG --cflags "expat21" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$EXPAT_LIBS"; then
+ pkg_cv_EXPAT_LIBS="$EXPAT_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"expat21\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "expat21") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_EXPAT_LIBS=`$PKG_CONFIG --libs "expat21" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ EXPAT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "expat21" 2>&1`
+ else
+ EXPAT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "expat21" 2>&1`
fi
- ;;
- esac
+ # Put the nasty error message in config.log where it belongs
+ echo "$EXPAT_PKG_ERRORS" >&5
- # default drivers
- if test "x$with_dri_drivers" = "xyes"; then
- with_dri_drivers="i915 i965 nouveau r200 radeon swrast"
- fi
+ as_fn_error $? "Package requirements (expat21) were not met:
+
+$EXPAT_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables EXPAT_CFLAGS
+and EXPAT_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables EXPAT_CFLAGS
+and EXPAT_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see .
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ EXPAT_CFLAGS=$pkg_cv_EXPAT_CFLAGS
+ EXPAT_LIBS=$pkg_cv_EXPAT_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
- # Check for expat
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
pkg_failed=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for EXPAT" >&5
@@ -24816,12 +25539,12 @@
pkg_cv_EXPAT_CFLAGS="$EXPAT_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"expat\""; } >&5
- ($PKG_CONFIG --exists --print-errors "expat") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"expat21\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "expat21") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_EXPAT_CFLAGS=`$PKG_CONFIG --cflags "expat" 2>/dev/null`
+ pkg_cv_EXPAT_CFLAGS=`$PKG_CONFIG --cflags "expat21" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -24833,12 +25556,12 @@
pkg_cv_EXPAT_LIBS="$EXPAT_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"expat\""; } >&5
- ($PKG_CONFIG --exists --print-errors "expat") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"expat21\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "expat21") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_EXPAT_LIBS=`$PKG_CONFIG --libs "expat" 2>/dev/null`
+ pkg_cv_EXPAT_LIBS=`$PKG_CONFIG --libs "expat21" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -24859,130 +25582,46 @@
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- EXPAT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "expat" 2>&1`
+ EXPAT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "expat21" 2>&1`
else
- EXPAT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "expat" 2>&1`
+ EXPAT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "expat21" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$EXPAT_PKG_ERRORS" >&5
- # expat version 2.0 and earlier do not provide expat.pc
- ac_fn_c_check_header_mongrel "$LINENO" "expat.h" "ac_cv_header_expat_h" "$ac_includes_default"
-if test "x$ac_cv_header_expat_h" = xyes; then :
-
-else
- as_fn_error $? "Expat headers required for DRI not found" "$LINENO" 5
-fi
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for XML_ParserCreate in -lexpat" >&5
-$as_echo_n "checking for XML_ParserCreate in -lexpat... " >&6; }
-if ${ac_cv_lib_expat_XML_ParserCreate+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lexpat $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char XML_ParserCreate ();
-int
-main ()
-{
-return XML_ParserCreate ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_expat_XML_ParserCreate=yes
-else
- ac_cv_lib_expat_XML_ParserCreate=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_expat_XML_ParserCreate" >&5
-$as_echo "$ac_cv_lib_expat_XML_ParserCreate" >&6; }
-if test "x$ac_cv_lib_expat_XML_ParserCreate" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBEXPAT 1
-_ACEOF
+ as_fn_error $? "Package requirements (expat21) were not met:
- LIBS="-lexpat $LIBS"
+$EXPAT_PKG_ERRORS
-else
- as_fn_error $? "Expat library required for DRI not found" "$LINENO" 5
-fi
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
- EXPAT_LIBS="-lexpat"
+Alternatively, you may set the environment variables EXPAT_CFLAGS
+and EXPAT_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
elif test $pkg_failed = untried; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- # expat version 2.0 and earlier do not provide expat.pc
- ac_fn_c_check_header_mongrel "$LINENO" "expat.h" "ac_cv_header_expat_h" "$ac_includes_default"
-if test "x$ac_cv_header_expat_h" = xyes; then :
-
-else
- as_fn_error $? "Expat headers required for DRI not found" "$LINENO" 5
-fi
-
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for XML_ParserCreate in -lexpat" >&5
-$as_echo_n "checking for XML_ParserCreate in -lexpat... " >&6; }
-if ${ac_cv_lib_expat_XML_ParserCreate+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lexpat $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+Alternatively, you may set the environment variables EXPAT_CFLAGS
+and EXPAT_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char XML_ParserCreate ();
-int
-main ()
-{
-return XML_ParserCreate ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_expat_XML_ParserCreate=yes
+To get pkg-config, see .
+See \`config.log' for more details" "$LINENO" 5; }
else
- ac_cv_lib_expat_XML_ParserCreate=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_expat_XML_ParserCreate" >&5
-$as_echo "$ac_cv_lib_expat_XML_ParserCreate" >&6; }
-if test "x$ac_cv_lib_expat_XML_ParserCreate" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBEXPAT 1
-_ACEOF
-
- LIBS="-lexpat $LIBS"
+ EXPAT_CFLAGS=$pkg_cv_EXPAT_CFLAGS
+ EXPAT_LIBS=$pkg_cv_EXPAT_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
-else
- as_fn_error $? "Expat library required for DRI not found" "$LINENO" 5
fi
- EXPAT_LIBS="-lexpat"
else
EXPAT_CFLAGS=$pkg_cv_EXPAT_CFLAGS
EXPAT_LIBS=$pkg_cv_EXPAT_LIBS
@@ -24991,6 +25630,37 @@
fi
+if test "x$enable_dri" = xyes; then
+ # Platform specific settings and drivers to build
+ case "$host_os" in
+ linux*)
+ case "$host_cpu" in
+ powerpc* | sparc*)
+ # Build only the drivers for cards that exist on PowerPC/sparc
+ if test "x$with_dri_drivers" = "xyes"; then
+ with_dri_drivers="r200 radeon swrast"
+ fi
+ ;;
+ esac
+ ;;
+ cygwin*)
+ if test "x$with_dri_drivers" = "xyes"; then
+ with_dri_drivers="swrast"
+ fi
+ ;;
+ darwin*)
+ DEFINES="$DEFINES -DBUILDING_MESA"
+ if test "x$with_dri_drivers" = "xyes"; then
+ with_dri_drivers="swrast"
+ fi
+ ;;
+ esac
+
+ # default drivers
+ if test "x$with_dri_drivers" = "xyes"; then
+ with_dri_drivers="i915 i965 nouveau r200 radeon swrast"
+ fi
+
# put all the necessary libs together
DRI_LIB_DEPS="$DRI_LIB_DEPS $SELINUX_LIBS $LIBDRM_LIBS $EXPAT_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS"
fi
@@ -25772,16 +26442,16 @@
fi
fi
- if test "x$enable_omx" = xauto -a "x$have_omx_platform" = xyes; then
+ if test "x$enable_omx_bellagio" = xauto -a "x$have_omx_platform" = xyes; then
if test -n "$PKG_CONFIG" && \
{ { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libomxil-bellagio >= \$LIBOMXIL_BELLAGIO_REQUIRED\""; } >&5
($PKG_CONFIG --exists --print-errors "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- enable_omx=yes
+ enable_omx_bellagio=yes
else
- enable_omx=no
+ enable_omx_bellagio=no
fi
fi
@@ -25802,7 +26472,7 @@
if test "x$enable_dri" = xyes -o \
"x$enable_xvmc" = xyes -o \
"x$enable_vdpau" = xyes -o \
- "x$enable_omx" = xyes -o \
+ "x$enable_omx_bellagio" = xyes -o \
"x$enable_va" = xyes; then
need_gallium_vl=yes
fi
@@ -25817,8 +26487,9 @@
if test "x$enable_xvmc" = xyes -o \
"x$enable_vdpau" = xyes -o \
- "x$enable_omx" = xyes -o \
+ "x$enable_omx_bellagio" = xyes -o \
"x$enable_va" = xyes; then
+ if echo $platforms | grep -q "x11"; then
pkg_failed=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for VL" >&5
@@ -25910,6 +26581,7 @@
$as_echo "yes" >&6; }
fi
+ fi
need_gallium_vl_winsys=yes
fi
if test "x$need_gallium_vl_winsys" = xyes; then
@@ -26134,17 +26806,17 @@
fi
-if test "x$enable_omx" = xyes; then
+if test "x$enable_omx_bellagio" = xyes; then
if test "x$have_omx_platform" != xyes; then
as_fn_error $? "OMX requires at least one of the x11 or drm platforms" "$LINENO" 5
fi
pkg_failed=no
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for OMX" >&5
-$as_echo_n "checking for OMX... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for OMX_BELLAGIO" >&5
+$as_echo_n "checking for OMX_BELLAGIO... " >&6; }
-if test -n "$OMX_CFLAGS"; then
- pkg_cv_OMX_CFLAGS="$OMX_CFLAGS"
+if test -n "$OMX_BELLAGIO_CFLAGS"; then
+ pkg_cv_OMX_BELLAGIO_CFLAGS="$OMX_BELLAGIO_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
{ { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libomxil-bellagio >= \$LIBOMXIL_BELLAGIO_REQUIRED\""; } >&5
@@ -26152,7 +26824,7 @@
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_OMX_CFLAGS=`$PKG_CONFIG --cflags "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED" 2>/dev/null`
+ pkg_cv_OMX_BELLAGIO_CFLAGS=`$PKG_CONFIG --cflags "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -26160,8 +26832,8 @@
else
pkg_failed=untried
fi
-if test -n "$OMX_LIBS"; then
- pkg_cv_OMX_LIBS="$OMX_LIBS"
+if test -n "$OMX_BELLAGIO_LIBS"; then
+ pkg_cv_OMX_BELLAGIO_LIBS="$OMX_BELLAGIO_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
{ { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libomxil-bellagio >= \$LIBOMXIL_BELLAGIO_REQUIRED\""; } >&5
@@ -26169,7 +26841,7 @@
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_OMX_LIBS=`$PKG_CONFIG --libs "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED" 2>/dev/null`
+ pkg_cv_OMX_BELLAGIO_LIBS=`$PKG_CONFIG --libs "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -26190,22 +26862,22 @@
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- OMX_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED" 2>&1`
+ OMX_BELLAGIO_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED" 2>&1`
else
- OMX_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED" 2>&1`
+ OMX_BELLAGIO_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
- echo "$OMX_PKG_ERRORS" >&5
+ echo "$OMX_BELLAGIO_PKG_ERRORS" >&5
as_fn_error $? "Package requirements (libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED) were not met:
-$OMX_PKG_ERRORS
+$OMX_BELLAGIO_PKG_ERRORS
Consider adjusting the PKG_CONFIG_PATH environment variable if you
installed software in a non-standard prefix.
-Alternatively, you may set the environment variables OMX_CFLAGS
-and OMX_LIBS to avoid the need to call pkg-config.
+Alternatively, you may set the environment variables OMX_BELLAGIO_CFLAGS
+and OMX_BELLAGIO_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details." "$LINENO" 5
elif test $pkg_failed = untried; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
@@ -26216,27 +26888,27 @@
is in your PATH or set the PKG_CONFIG environment variable to the full
path to pkg-config.
-Alternatively, you may set the environment variables OMX_CFLAGS
-and OMX_LIBS to avoid the need to call pkg-config.
+Alternatively, you may set the environment variables OMX_BELLAGIO_CFLAGS
+and OMX_BELLAGIO_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details.
To get pkg-config, see .
See \`config.log' for more details" "$LINENO" 5; }
else
- OMX_CFLAGS=$pkg_cv_OMX_CFLAGS
- OMX_LIBS=$pkg_cv_OMX_LIBS
+ OMX_BELLAGIO_CFLAGS=$pkg_cv_OMX_BELLAGIO_CFLAGS
+ OMX_BELLAGIO_LIBS=$pkg_cv_OMX_BELLAGIO_LIBS
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
fi
- gallium_st="$gallium_st omx"
+ gallium_st="$gallium_st omx_bellagio"
fi
- if test "x$enable_omx" = xyes; then
- HAVE_ST_OMX_TRUE=
- HAVE_ST_OMX_FALSE='#'
+ if test "x$enable_omx_bellagio" = xyes; then
+ HAVE_ST_OMX_BELLAGIO_TRUE=
+ HAVE_ST_OMX_BELLAGIO_FALSE='#'
else
- HAVE_ST_OMX_TRUE='#'
- HAVE_ST_OMX_FALSE=
+ HAVE_ST_OMX_BELLAGIO_TRUE='#'
+ HAVE_ST_OMX_BELLAGIO_FALSE=
fi
@@ -26452,13 +27124,16 @@
llvm_add_default_components "opencl"
llvm_add_component "all-targets" "opencl"
+ llvm_add_component "coverage" "opencl"
llvm_add_component "linker" "opencl"
llvm_add_component "instrumentation" "opencl"
llvm_add_component "ipo" "opencl"
llvm_add_component "irreader" "opencl"
+ llvm_add_component "lto" "opencl"
llvm_add_component "option" "opencl"
llvm_add_component "objcarcopts" "opencl"
llvm_add_component "profiledata" "opencl"
+ llvm_add_optional_component "coroutines" "opencl"
if test -z "$CLANG_LIBDIR"; then
CLANG_LIBDIR=${LLVM_LIBDIR}
@@ -26546,12 +27221,12 @@
-# Check whether --with-omx-libdir was given.
-if test "${with_omx_libdir+set}" = set; then :
- withval=$with_omx_libdir; OMX_LIB_INSTALL_DIR="$withval"
+# Check whether --with-omx-bellagio-libdir was given.
+if test "${with_omx_bellagio_libdir+set}" = set; then :
+ withval=$with_omx_bellagio_libdir; OMX_BELLAGIO_LIB_INSTALL_DIR="$withval"
else
- OMX_LIB_INSTALL_DIR=`$PKG_CONFIG --exists libomxil-bellagio && \
- $PKG_CONFIG --define-variable=libdir=\$libdir --variable=pluginsdir libomxil-bellagio`
+ OMX_BELLAGIO_LIB_INSTALL_DIR=`$PKG_CONFIG --exists libomxil-bellagio && \
+ $PKG_CONFIG --define-variable=libdir=\$libdir --variable=pluginsdir libomxil-bellagio`
fi
@@ -27463,7 +28138,7 @@
swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
- ",-mavx,-march=core-avx" \
+ ",-target-cpu=sandybridge,-mavx,-march=core-avx,-tp=sandybridge" \
SWR_AVX_CXXFLAGS
@@ -27475,21 +28150,21 @@
;;
xavx2)
swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \
- ",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \
+ ",-target-cpu=haswell,-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2,-tp=haswell" \
SWR_AVX2_CXXFLAGS
HAVE_SWR_AVX2=yes
;;
xknl)
swr_require_cxx_feature_flags "KNL" "defined(__AVX512F__) && defined(__AVX512ER__)" \
- ",-march=knl,-xMIC-AVX512" \
+ ",-target-cpu=mic-knl,-march=knl,-xMIC-AVX512" \
SWR_KNL_CXXFLAGS
HAVE_SWR_KNL=yes
;;
xskx)
swr_require_cxx_feature_flags "SKX" "defined(__AVX512F__) && defined(__AVX512BW__)" \
- ",-march=skylake-avx512,-xCORE-AVX512" \
+ ",-target-cpu=x86-skylake,-march=skylake-avx512,-xCORE-AVX512" \
SWR_SKX_CXXFLAGS
HAVE_SWR_SKX=yes
@@ -27586,6 +28261,82 @@
DEFINES="$DEFINES -DUSE_VC4_SIMULATOR"
fi
;;
+ xvc5)
+ HAVE_GALLIUM_VC5=yes
+
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for VC5_SIMULATOR" >&5
+$as_echo_n "checking for VC5_SIMULATOR... " >&6; }
+
+if test -n "$VC5_SIMULATOR_CFLAGS"; then
+ pkg_cv_VC5_SIMULATOR_CFLAGS="$VC5_SIMULATOR_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"v3dv3\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "v3dv3") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_VC5_SIMULATOR_CFLAGS=`$PKG_CONFIG --cflags "v3dv3" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$VC5_SIMULATOR_LIBS"; then
+ pkg_cv_VC5_SIMULATOR_LIBS="$VC5_SIMULATOR_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"v3dv3\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "v3dv3") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_VC5_SIMULATOR_LIBS=`$PKG_CONFIG --libs "v3dv3" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ VC5_SIMULATOR_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "v3dv3" 2>&1`
+ else
+ VC5_SIMULATOR_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "v3dv3" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$VC5_SIMULATOR_PKG_ERRORS" >&5
+
+ as_fn_error $? "vc5 requires the simulator" "$LINENO" 5
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ as_fn_error $? "vc5 requires the simulator" "$LINENO" 5
+else
+ VC5_SIMULATOR_CFLAGS=$pkg_cv_VC5_SIMULATOR_CFLAGS
+ VC5_SIMULATOR_LIBS=$pkg_cv_VC5_SIMULATOR_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+ USE_VC5_SIMULATOR=yes;
+ DEFINES="$DEFINES -DUSE_VC5_SIMULATOR"
+fi
+ ;;
xpl111)
HAVE_GALLIUM_PL111=yes
;;
@@ -27661,6 +28412,31 @@
as_fn_error $? "Building with pl111 requires vc4" "$LINENO" 5
fi
+
+detect_old_buggy_llvm() {
+ LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
+ if test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"; then :
+ llvm_have_one_so=yes
+fi
+
+ if test "x$llvm_have_one_so" = xyes; then
+ LLVM_LIBS="-l$LLVM_SO_NAME"
+ else
+ if test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"; then :
+ as_fn_error $? "Could not find llvm shared libraries:
+ Please make sure you have built llvm with the --enable-shared option
+ and that your llvm libraries are installed in $LLVM_LIBDIR
+ If you have installed your llvm libraries to a different directory you
+ can use the --with-llvm-prefix= configure flag to specify this directory.
+ NOTE: Mesa is attempting to use llvm shared libraries by default.
+ If you do not want to build with llvm shared libraries and instead want to
+ use llvm static libraries then add --disable-llvm-shared-libs to your configure
+ invocation and rebuild." "$LINENO" 5
+fi
+
+ fi
+}
+
if test "x$enable_llvm" = xyes; then
DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
@@ -27669,38 +28445,23 @@
LLVM_CXXFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cxxflags"`
- if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
- as_fn_error $? "Calling ${LLVM_CONFIG} failed" "$LINENO" 5
- fi
- LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
-
- if test "x$enable_llvm_shared_libs" = xyes; then
- LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
- if test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"; then :
- llvm_have_one_so=yes
-fi
-
- if test "x$llvm_have_one_so" = xyes; then
- LLVM_LIBS="-l$LLVM_SO_NAME"
+ if test $LLVM_VERSION_MAJOR -ge 4 -o $LLVM_VERSION_MAJOR -eq 3 -a $LLVM_VERSION_MINOR -ge 9; then
+ if test "x$enable_llvm_shared_libs" = xyes; then
+ LLVM_LIBS="`$LLVM_CONFIG --link-shared --libs ${LLVM_COMPONENTS}`"
else
- if test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"; then :
- as_fn_error $? "Could not find llvm shared libraries:
- Please make sure you have built llvm with the --enable-shared option
- and that your llvm libraries are installed in $LLVM_LIBDIR
- If you have installed your llvm libraries to a different directory you
- can use the --with-llvm-prefix= configure flag to specify this directory.
- NOTE: Mesa is attempting to use llvm shared libraries by default.
- If you do not want to build with llvm shared libraries and instead want to
- use llvm static libraries then add --disable-llvm-shared-libs to your configure
- invocation and rebuild." "$LINENO" 5
-fi
-
- fi
+ LLVM_LIBS="`$LLVM_CONFIG --link-static --libs ${LLVM_COMPONENTS}`"
+ LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --link-static --system-libs`"
+ fi
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Building mesa with statically linked LLVM may cause compilation issues" >&5
+ LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
+ if test "x$enable_llvm_shared_libs" = xyes; then
+ detect_old_buggy_llvm
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Building mesa with statically linked LLVM may cause compilation issues" >&5
$as_echo "$as_me: WARNING: Building mesa with statically linked LLVM may cause compilation issues" >&2;}
- if test $LLVM_VERSION_MAJOR -ge 4 -o $LLVM_VERSION_MAJOR -eq 3 -a $LLVM_VERSION_MINOR -ge 5; then
- LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --system-libs`"
+ if test $LLVM_VERSION_MAJOR -ge 4 -o $LLVM_VERSION_MAJOR -eq 3 -a $LLVM_VERSION_MINOR -ge 5; then
+ LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --system-libs`"
+ fi
fi
fi
fi
@@ -27753,8 +28514,7 @@
HAVE_GALLIUM_RADEONSI_FALSE=
fi
- if test "x$HAVE_GALLIUM_R600" = xyes -o \
- "x$HAVE_GALLIUM_RADEONSI" = xyes; then
+ if test "x$HAVE_GALLIUM_RADEONSI" = xyes; then
HAVE_GALLIUM_RADEON_COMMON_TRUE=
HAVE_GALLIUM_RADEON_COMMON_FALSE='#'
else
@@ -27836,6 +28596,14 @@
HAVE_GALLIUM_VC4_FALSE=
fi
+ if test "x$HAVE_GALLIUM_VC5" = xyes; then
+ HAVE_GALLIUM_VC5_TRUE=
+ HAVE_GALLIUM_VC5_FALSE='#'
+else
+ HAVE_GALLIUM_VC5_TRUE='#'
+ HAVE_GALLIUM_VC5_FALSE=
+fi
+
if test "x$HAVE_GALLIUM_VIRGL" = xyes; then
HAVE_GALLIUM_VIRGL_TRUE=
HAVE_GALLIUM_VIRGL_FALSE='#'
@@ -27939,6 +28707,16 @@
fi
+ if test "x$HAVE_GALLIUM_VC4" = xyes -o \
+ "x$HAVE_GALLIUM_VC5" = xyes; then
+ HAVE_BROADCOM_DRIVERS_TRUE=
+ HAVE_BROADCOM_DRIVERS_FALSE='#'
+else
+ HAVE_BROADCOM_DRIVERS_TRUE='#'
+ HAVE_BROADCOM_DRIVERS_FALSE=
+fi
+
+
if test "x$HAVE_INTEL_VULKAN" = xyes -o \
"x$HAVE_I965_DRI" = xyes; then
HAVE_INTEL_DRIVERS_TRUE=
@@ -27991,6 +28769,14 @@
USE_VC4_SIMULATOR_FALSE=
fi
+ if test x$USE_VC5_SIMULATOR = xyes; then
+ USE_VC5_SIMULATOR_TRUE=
+ USE_VC5_SIMULATOR_FALSE='#'
+else
+ USE_VC5_SIMULATOR_TRUE='#'
+ USE_VC5_SIMULATOR_FALSE=
+fi
+
if test "x$have_libdrm" = xyes; then
HAVE_LIBDRM_TRUE=
@@ -28235,7 +29021,7 @@
CFLAGS="$CFLAGS $USER_CFLAGS"
CXXFLAGS="$CXXFLAGS $USER_CXXFLAGS"
-ac_config_files="$ac_config_files Makefile src/Makefile src/amd/Makefile src/amd/vulkan/Makefile src/broadcom/Makefile src/compiler/Makefile src/egl/Makefile src/egl/main/egl.pc src/egl/wayland/wayland-drm/Makefile src/egl/wayland/wayland-egl/Makefile src/egl/wayland/wayland-egl/wayland-egl.pc src/gallium/Makefile src/gallium/auxiliary/Makefile src/gallium/auxiliary/pipe-loader/Makefile src/gallium/drivers/freedreno/Makefile src/gallium/drivers/ddebug/Makefile src/gallium/drivers/i915/Makefile src/gallium/drivers/llvmpipe/Makefile src/gallium/drivers/noop/Makefile src/gallium/drivers/nouveau/Makefile src/gallium/drivers/pl111/Makefile src/gallium/drivers/r300/Makefile src/gallium/drivers/r600/Makefile src/gallium/drivers/radeon/Makefile src/gallium/drivers/radeonsi/Makefile src/gallium/drivers/rbug/Makefile src/gallium/drivers/softpipe/Makefile src/gallium/drivers/svga/Makefile src/gallium/drivers/swr/Makefile src/gallium/drivers/trace/Makefile src/gallium/drivers/etnaviv/Makefile src/gallium/drivers/imx/Makefile src/gallium/drivers/vc4/Makefile src/gallium/drivers/virgl/Makefile src/gallium/state_trackers/clover/Makefile src/gallium/state_trackers/dri/Makefile src/gallium/state_trackers/glx/xlib/Makefile src/gallium/state_trackers/nine/Makefile src/gallium/state_trackers/omx/Makefile src/gallium/state_trackers/osmesa/Makefile src/gallium/state_trackers/va/Makefile src/gallium/state_trackers/vdpau/Makefile src/gallium/state_trackers/xa/Makefile src/gallium/state_trackers/xvmc/Makefile src/gallium/targets/d3dadapter9/Makefile src/gallium/targets/d3dadapter9/d3d.pc src/gallium/targets/dri/Makefile src/gallium/targets/libgl-xlib/Makefile src/gallium/targets/omx/Makefile src/gallium/targets/opencl/Makefile src/gallium/targets/opencl/mesa.icd src/gallium/targets/osmesa/Makefile src/gallium/targets/osmesa/osmesa.pc src/gallium/targets/pipe-loader/Makefile src/gallium/targets/va/Makefile src/gallium/targets/vdpau/Makefile src/gallium/targets/xa/Makefile src/gallium/targets/xa/xatracker.pc src/gallium/targets/xvmc/Makefile src/gallium/tests/trivial/Makefile src/gallium/tests/unit/Makefile src/gallium/winsys/etnaviv/drm/Makefile src/gallium/winsys/imx/drm/Makefile src/gallium/winsys/freedreno/drm/Makefile src/gallium/winsys/i915/drm/Makefile src/gallium/winsys/nouveau/drm/Makefile src/gallium/winsys/pl111/drm/Makefile src/gallium/winsys/radeon/drm/Makefile src/gallium/winsys/amdgpu/drm/Makefile src/gallium/winsys/svga/drm/Makefile src/gallium/winsys/sw/dri/Makefile src/gallium/winsys/sw/kms-dri/Makefile src/gallium/winsys/sw/null/Makefile src/gallium/winsys/sw/wrapper/Makefile src/gallium/winsys/sw/xlib/Makefile src/gallium/winsys/vc4/drm/Makefile src/gallium/winsys/virgl/drm/Makefile src/gallium/winsys/virgl/vtest/Makefile src/gbm/Makefile src/gbm/main/gbm.pc src/glx/Makefile src/glx/apple/Makefile src/glx/tests/Makefile src/glx/windows/Makefile src/glx/windows/windowsdriproto.pc src/gtest/Makefile src/intel/Makefile src/loader/Makefile src/mapi/Makefile src/mapi/es1api/glesv1_cm.pc src/mapi/es2api/glesv2.pc src/mapi/glapi/gen/Makefile src/mesa/Makefile src/mesa/gl.pc src/mesa/drivers/dri/dri.pc src/mesa/drivers/dri/common/Makefile src/mesa/drivers/dri/common/xmlpool/Makefile src/mesa/drivers/dri/i915/Makefile src/mesa/drivers/dri/i965/Makefile src/mesa/drivers/dri/Makefile src/mesa/drivers/dri/nouveau/Makefile src/mesa/drivers/dri/r200/Makefile src/mesa/drivers/dri/radeon/Makefile src/mesa/drivers/dri/swrast/Makefile src/mesa/drivers/osmesa/Makefile src/mesa/drivers/osmesa/osmesa.pc src/mesa/drivers/x11/Makefile src/mesa/main/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile src/vulkan/Makefile"
+ac_config_files="$ac_config_files Makefile src/Makefile src/amd/Makefile src/amd/vulkan/Makefile src/broadcom/Makefile src/compiler/Makefile src/egl/Makefile src/egl/main/egl.pc src/egl/wayland/wayland-drm/Makefile src/egl/wayland/wayland-egl/Makefile src/egl/wayland/wayland-egl/wayland-egl.pc src/gallium/Makefile src/gallium/auxiliary/Makefile src/gallium/auxiliary/pipe-loader/Makefile src/gallium/drivers/freedreno/Makefile src/gallium/drivers/ddebug/Makefile src/gallium/drivers/i915/Makefile src/gallium/drivers/llvmpipe/Makefile src/gallium/drivers/noop/Makefile src/gallium/drivers/nouveau/Makefile src/gallium/drivers/pl111/Makefile src/gallium/drivers/r300/Makefile src/gallium/drivers/r600/Makefile src/gallium/drivers/radeon/Makefile src/gallium/drivers/radeonsi/Makefile src/gallium/drivers/rbug/Makefile src/gallium/drivers/softpipe/Makefile src/gallium/drivers/svga/Makefile src/gallium/drivers/swr/Makefile src/gallium/drivers/trace/Makefile src/gallium/drivers/etnaviv/Makefile src/gallium/drivers/imx/Makefile src/gallium/drivers/vc4/Makefile src/gallium/drivers/vc5/Makefile src/gallium/drivers/virgl/Makefile src/gallium/state_trackers/clover/Makefile src/gallium/state_trackers/dri/Makefile src/gallium/state_trackers/glx/xlib/Makefile src/gallium/state_trackers/nine/Makefile src/gallium/state_trackers/omx_bellagio/Makefile src/gallium/state_trackers/osmesa/Makefile src/gallium/state_trackers/va/Makefile src/gallium/state_trackers/vdpau/Makefile src/gallium/state_trackers/xa/Makefile src/gallium/state_trackers/xvmc/Makefile src/gallium/targets/d3dadapter9/Makefile src/gallium/targets/d3dadapter9/d3d.pc src/gallium/targets/dri/Makefile src/gallium/targets/libgl-xlib/Makefile src/gallium/targets/omx-bellagio/Makefile src/gallium/targets/opencl/Makefile src/gallium/targets/opencl/mesa.icd src/gallium/targets/osmesa/Makefile src/gallium/targets/osmesa/osmesa.pc src/gallium/targets/pipe-loader/Makefile src/gallium/targets/va/Makefile src/gallium/targets/vdpau/Makefile src/gallium/targets/xa/Makefile src/gallium/targets/xa/xatracker.pc src/gallium/targets/xvmc/Makefile src/gallium/tests/trivial/Makefile src/gallium/tests/unit/Makefile src/gallium/winsys/etnaviv/drm/Makefile src/gallium/winsys/imx/drm/Makefile src/gallium/winsys/freedreno/drm/Makefile src/gallium/winsys/i915/drm/Makefile src/gallium/winsys/nouveau/drm/Makefile src/gallium/winsys/pl111/drm/Makefile src/gallium/winsys/radeon/drm/Makefile src/gallium/winsys/amdgpu/drm/Makefile src/gallium/winsys/svga/drm/Makefile src/gallium/winsys/sw/dri/Makefile src/gallium/winsys/sw/kms-dri/Makefile src/gallium/winsys/sw/null/Makefile src/gallium/winsys/sw/wrapper/Makefile src/gallium/winsys/sw/xlib/Makefile src/gallium/winsys/vc4/drm/Makefile src/gallium/winsys/vc5/drm/Makefile src/gallium/winsys/virgl/drm/Makefile src/gallium/winsys/virgl/vtest/Makefile src/gbm/Makefile src/gbm/main/gbm.pc src/glx/Makefile src/glx/apple/Makefile src/glx/tests/Makefile src/glx/windows/Makefile src/glx/windows/windowsdriproto.pc src/gtest/Makefile src/intel/Makefile src/loader/Makefile src/mapi/Makefile src/mapi/es1api/glesv1_cm.pc src/mapi/es2api/glesv2.pc src/mapi/glapi/gen/Makefile src/mesa/Makefile src/mesa/gl.pc src/mesa/drivers/dri/dri.pc src/mesa/drivers/dri/common/Makefile src/mesa/drivers/dri/i915/Makefile src/mesa/drivers/dri/i965/Makefile src/mesa/drivers/dri/Makefile src/mesa/drivers/dri/nouveau/Makefile src/mesa/drivers/dri/r200/Makefile src/mesa/drivers/dri/radeon/Makefile src/mesa/drivers/dri/swrast/Makefile src/mesa/drivers/osmesa/Makefile src/mesa/drivers/osmesa/osmesa.pc src/mesa/drivers/x11/Makefile src/mesa/main/tests/Makefile src/mesa/state_tracker/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile src/util/tests/string_buffer/Makefile src/util/xmlpool/Makefile src/vulkan/Makefile"
cat >confcache <<\_ACEOF
@@ -28599,8 +29385,8 @@
as_fn_error $? "conditional \"HAVE_ST_VDPAU\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
-if test -z "${HAVE_ST_OMX_TRUE}" && test -z "${HAVE_ST_OMX_FALSE}"; then
- as_fn_error $? "conditional \"HAVE_ST_OMX\" was never defined.
+if test -z "${HAVE_ST_OMX_BELLAGIO_TRUE}" && test -z "${HAVE_ST_OMX_BELLAGIO_FALSE}"; then
+ as_fn_error $? "conditional \"HAVE_ST_OMX_BELLAGIO\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${HAVE_ST_VA_TRUE}" && test -z "${HAVE_ST_VA_FALSE}"; then
@@ -28711,6 +29497,10 @@
as_fn_error $? "conditional \"HAVE_GALLIUM_VC4\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${HAVE_GALLIUM_VC5_TRUE}" && test -z "${HAVE_GALLIUM_VC5_FALSE}"; then
+ as_fn_error $? "conditional \"HAVE_GALLIUM_VC5\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${HAVE_GALLIUM_VIRGL_TRUE}" && test -z "${HAVE_GALLIUM_VIRGL_FALSE}"; then
as_fn_error $? "conditional \"HAVE_GALLIUM_VIRGL\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -28755,6 +29545,10 @@
as_fn_error $? "conditional \"HAVE_AMD_DRIVERS\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${HAVE_BROADCOM_DRIVERS_TRUE}" && test -z "${HAVE_BROADCOM_DRIVERS_FALSE}"; then
+ as_fn_error $? "conditional \"HAVE_BROADCOM_DRIVERS\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${HAVE_INTEL_DRIVERS_TRUE}" && test -z "${HAVE_INTEL_DRIVERS_FALSE}"; then
as_fn_error $? "conditional \"HAVE_INTEL_DRIVERS\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -28779,6 +29573,10 @@
as_fn_error $? "conditional \"USE_VC4_SIMULATOR\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${USE_VC5_SIMULATOR_TRUE}" && test -z "${USE_VC5_SIMULATOR_FALSE}"; then
+ as_fn_error $? "conditional \"USE_VC5_SIMULATOR\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${HAVE_LIBDRM_TRUE}" && test -z "${HAVE_LIBDRM_FALSE}"; then
as_fn_error $? "conditional \"HAVE_LIBDRM\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -29220,7 +30018,7 @@
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by Mesa $as_me 17.2.4, which was
+This file was extended by Mesa $as_me 17.3.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -29277,7 +30075,7 @@
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-Mesa config.status 17.2.4
+Mesa config.status 17.3.3
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -29811,12 +30609,13 @@
"src/gallium/drivers/etnaviv/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/drivers/etnaviv/Makefile" ;;
"src/gallium/drivers/imx/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/drivers/imx/Makefile" ;;
"src/gallium/drivers/vc4/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/drivers/vc4/Makefile" ;;
+ "src/gallium/drivers/vc5/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/drivers/vc5/Makefile" ;;
"src/gallium/drivers/virgl/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/drivers/virgl/Makefile" ;;
"src/gallium/state_trackers/clover/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/clover/Makefile" ;;
"src/gallium/state_trackers/dri/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/dri/Makefile" ;;
"src/gallium/state_trackers/glx/xlib/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/glx/xlib/Makefile" ;;
"src/gallium/state_trackers/nine/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/nine/Makefile" ;;
- "src/gallium/state_trackers/omx/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/omx/Makefile" ;;
+ "src/gallium/state_trackers/omx_bellagio/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/omx_bellagio/Makefile" ;;
"src/gallium/state_trackers/osmesa/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/osmesa/Makefile" ;;
"src/gallium/state_trackers/va/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/va/Makefile" ;;
"src/gallium/state_trackers/vdpau/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/state_trackers/vdpau/Makefile" ;;
@@ -29826,7 +30625,7 @@
"src/gallium/targets/d3dadapter9/d3d.pc") CONFIG_FILES="$CONFIG_FILES src/gallium/targets/d3dadapter9/d3d.pc" ;;
"src/gallium/targets/dri/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/targets/dri/Makefile" ;;
"src/gallium/targets/libgl-xlib/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/targets/libgl-xlib/Makefile" ;;
- "src/gallium/targets/omx/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/targets/omx/Makefile" ;;
+ "src/gallium/targets/omx-bellagio/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/targets/omx-bellagio/Makefile" ;;
"src/gallium/targets/opencl/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/targets/opencl/Makefile" ;;
"src/gallium/targets/opencl/mesa.icd") CONFIG_FILES="$CONFIG_FILES src/gallium/targets/opencl/mesa.icd" ;;
"src/gallium/targets/osmesa/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/targets/osmesa/Makefile" ;;
@@ -29854,6 +30653,7 @@
"src/gallium/winsys/sw/wrapper/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/winsys/sw/wrapper/Makefile" ;;
"src/gallium/winsys/sw/xlib/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/winsys/sw/xlib/Makefile" ;;
"src/gallium/winsys/vc4/drm/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/winsys/vc4/drm/Makefile" ;;
+ "src/gallium/winsys/vc5/drm/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/winsys/vc5/drm/Makefile" ;;
"src/gallium/winsys/virgl/drm/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/winsys/virgl/drm/Makefile" ;;
"src/gallium/winsys/virgl/vtest/Makefile") CONFIG_FILES="$CONFIG_FILES src/gallium/winsys/virgl/vtest/Makefile" ;;
"src/gbm/Makefile") CONFIG_FILES="$CONFIG_FILES src/gbm/Makefile" ;;
@@ -29874,7 +30674,6 @@
"src/mesa/gl.pc") CONFIG_FILES="$CONFIG_FILES src/mesa/gl.pc" ;;
"src/mesa/drivers/dri/dri.pc") CONFIG_FILES="$CONFIG_FILES src/mesa/drivers/dri/dri.pc" ;;
"src/mesa/drivers/dri/common/Makefile") CONFIG_FILES="$CONFIG_FILES src/mesa/drivers/dri/common/Makefile" ;;
- "src/mesa/drivers/dri/common/xmlpool/Makefile") CONFIG_FILES="$CONFIG_FILES src/mesa/drivers/dri/common/xmlpool/Makefile" ;;
"src/mesa/drivers/dri/i915/Makefile") CONFIG_FILES="$CONFIG_FILES src/mesa/drivers/dri/i915/Makefile" ;;
"src/mesa/drivers/dri/i965/Makefile") CONFIG_FILES="$CONFIG_FILES src/mesa/drivers/dri/i965/Makefile" ;;
"src/mesa/drivers/dri/Makefile") CONFIG_FILES="$CONFIG_FILES src/mesa/drivers/dri/Makefile" ;;
@@ -29886,8 +30685,11 @@
"src/mesa/drivers/osmesa/osmesa.pc") CONFIG_FILES="$CONFIG_FILES src/mesa/drivers/osmesa/osmesa.pc" ;;
"src/mesa/drivers/x11/Makefile") CONFIG_FILES="$CONFIG_FILES src/mesa/drivers/x11/Makefile" ;;
"src/mesa/main/tests/Makefile") CONFIG_FILES="$CONFIG_FILES src/mesa/main/tests/Makefile" ;;
+ "src/mesa/state_tracker/tests/Makefile") CONFIG_FILES="$CONFIG_FILES src/mesa/state_tracker/tests/Makefile" ;;
"src/util/Makefile") CONFIG_FILES="$CONFIG_FILES src/util/Makefile" ;;
"src/util/tests/hash_table/Makefile") CONFIG_FILES="$CONFIG_FILES src/util/tests/hash_table/Makefile" ;;
+ "src/util/tests/string_buffer/Makefile") CONFIG_FILES="$CONFIG_FILES src/util/tests/string_buffer/Makefile" ;;
+ "src/util/xmlpool/Makefile") CONFIG_FILES="$CONFIG_FILES src/util/xmlpool/Makefile" ;;
"src/vulkan/Makefile") CONFIG_FILES="$CONFIG_FILES src/vulkan/Makefile" ;;
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
@@ -31163,6 +31965,8 @@
# source file
$SED -i -e 's/brw_blorp.cpp/brw_blorp.c/' src/mesa/drivers/dri/i965/.deps/brw_blorp.Plo
+rm -f src/compiler/spirv/spirv_info.lo
+echo "# dummy" > src/compiler/spirv/.deps/spirv_info.Plo
echo ""
echo " prefix: $prefix"
diff -Nru mesa-17.2.4/configure.ac mesa-17.3.3/configure.ac
--- mesa-17.2.4/configure.ac 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/configure.ac 2018-01-18 21:30:28.000000000 +0000
@@ -74,7 +74,7 @@
# in the first entry.
LIBDRM_REQUIRED=2.4.75
LIBDRM_RADEON_REQUIRED=2.4.71
-LIBDRM_AMDGPU_REQUIRED=2.4.82
+LIBDRM_AMDGPU_REQUIRED=2.4.85
LIBDRM_INTEL_REQUIRED=2.4.75
LIBDRM_NVVIEUX_REQUIRED=2.4.66
LIBDRM_NOUVEAU_REQUIRED=2.4.66
@@ -101,7 +101,7 @@
dnl LLVM versions
LLVM_REQUIRED_GALLIUM=3.3.0
-LLVM_REQUIRED_OPENCL=3.6.0
+LLVM_REQUIRED_OPENCL=3.9.0
LLVM_REQUIRED_R600=3.9.0
LLVM_REQUIRED_RADEONSI=3.9.0
LLVM_REQUIRED_RADV=3.9.0
@@ -282,86 +282,51 @@
AM_CONDITIONAL(HAVE_ANDROID, test "x$android" = xyes)
-dnl Add flags for gcc and g++
-if test "x$GCC" = xyes; then
- CFLAGS="$CFLAGS -Wall"
+dnl
+dnl Check compiler flags
+dnl
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall"])
+AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"])
+AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes], [CFLAGS="$CFLAGS -Werror=missing-prototypes"])
+AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes], [CFLAGS="$CFLAGS -Wmissing-prototypes"])
+AX_CHECK_COMPILE_FLAG([-fno-math-errno], [CFLAGS="$CFLAGS -fno-math-errno"])
+AX_CHECK_COMPILE_FLAG([-fno-trapping-math], [CFLAGS="$CFLAGS -fno-trapping-math"])
+AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [VISIBILITY_CFLAGS="-fvisibility=hidden"])
+
+dnl
+dnl Check C++ compiler flags
+dnl
+AC_LANG_PUSH([C++])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall"])
+AX_CHECK_COMPILE_FLAG([-fno-math-errno], [CXXFLAGS="$CXXFLAGS -fno-math-errno"])
+AX_CHECK_COMPILE_FLAG([-fno-trapping-math], [CXXFLAGS="$CXXFLAGS -fno-trapping-math"])
+AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [VISIBILITY_CXXFLAGS="-fvisibility=hidden"])
+AC_LANG_POP([C++])
+
+# Flags to help ensure that certain portions of the code -- and only those
+# portions -- can be built with MSVC:
+# - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and
+# - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which
+# supports most of C99)
+# - the rest has no compiler compiler restrictions
+AX_CHECK_COMPILE_FLAG([-Werror=pointer-arith], [MSVC2013_COMPAT_CFLAGS="-Werror=pointer-arith"])
+AX_CHECK_COMPILE_FLAG([-Werror=vla], [MSVC2013_COMPAT_CFLAGS="-Werror=vla"])
+AC_LANG_PUSH([C++])
+AX_CHECK_COMPILE_FLAG([-Werror=pointer-arith], [MSVC2013_COMPAT_CXXFLAGS="-Werror=pointer-arith"])
+AX_CHECK_COMPILE_FLAG([-Werror=vla], [MSVC2013_COMPAT_CXXFLAGS="-Werror=vla"])
+AC_LANG_POP([C++])
+
+AC_SUBST([MSVC2013_COMPAT_CFLAGS])
+AC_SUBST([MSVC2013_COMPAT_CXXFLAGS])
+if test "x$GCC" = xyes; then
if test "x$USE_GNU99" = xyes; then
CFLAGS="$CFLAGS -std=gnu99"
else
CFLAGS="$CFLAGS -std=c99"
fi
-
- # Enable -Werror=implicit-function-declaration and
- # -Werror=missing-prototypes, if available, or otherwise, just
- # -Wmissing-prototypes. This is particularly useful to avoid
- # generating a loadable driver module that has undefined symbols.
- save_CFLAGS="$CFLAGS"
- AC_MSG_CHECKING([whether $CC supports -Werror=missing-prototypes])
- CFLAGS="$CFLAGS -Werror=implicit-function-declaration"
- CFLAGS="$CFLAGS -Werror=missing-prototypes"
- AC_LINK_IFELSE([AC_LANG_PROGRAM()],
- AC_MSG_RESULT([yes]),
- [CFLAGS="$save_CFLAGS -Wmissing-prototypes";
- AC_MSG_RESULT([no])])
-
- # Enable -fvisibility=hidden if using a gcc that supports it
- save_CFLAGS="$CFLAGS"
- AC_MSG_CHECKING([whether $CC supports -fvisibility=hidden])
- VISIBILITY_CFLAGS="-fvisibility=hidden"
- CFLAGS="$CFLAGS $VISIBILITY_CFLAGS"
- AC_LINK_IFELSE([AC_LANG_PROGRAM()], AC_MSG_RESULT([yes]),
- [VISIBILITY_CFLAGS=""; AC_MSG_RESULT([no])])
-
- # Restore CFLAGS; VISIBILITY_CFLAGS are added to it where needed.
- CFLAGS=$save_CFLAGS
-
- # We don't want floating-point math functions to set errno or trap
- CFLAGS="$CFLAGS -fno-math-errno -fno-trapping-math"
-
- # Flags to help ensure that certain portions of the code -- and only those
- # portions -- can be built with MSVC:
- # - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and
- # - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which
- # supports most of C99)
- # - the rest has no compiler compiler restrictions
- MSVC2013_COMPAT_CFLAGS="-Werror=pointer-arith"
- MSVC2013_COMPAT_CXXFLAGS="-Werror=pointer-arith"
-
- # Enable -Werror=vla if compiler supports it
- save_CFLAGS="$CFLAGS"
- AC_MSG_CHECKING([whether $CC supports -Werror=vla])
- CFLAGS="$CFLAGS -Werror=vla"
- AC_LINK_IFELSE([AC_LANG_PROGRAM()],
- [MSVC2013_COMPAT_CFLAGS="$MSVC2013_COMPAT_CFLAGS -Werror=vla";
- MSVC2013_COMPAT_CXXFLAGS="$MSVC2013_COMPAT_CXXFLAGS -Werror=vla";
- AC_MSG_RESULT([yes])],
- AC_MSG_RESULT([no]))
- CFLAGS="$save_CFLAGS"
-fi
-if test "x$GXX" = xyes; then
- CXXFLAGS="$CXXFLAGS -Wall"
-
- # Enable -fvisibility=hidden if using a gcc that supports it
- save_CXXFLAGS="$CXXFLAGS"
- AC_MSG_CHECKING([whether $CXX supports -fvisibility=hidden])
- VISIBILITY_CXXFLAGS="-fvisibility=hidden"
- CXXFLAGS="$CXXFLAGS $VISIBILITY_CXXFLAGS"
- AC_LANG_PUSH([C++])
- AC_LINK_IFELSE([AC_LANG_PROGRAM()], AC_MSG_RESULT([yes]),
- [VISIBILITY_CXXFLAGS="" ; AC_MSG_RESULT([no])])
- AC_LANG_POP([C++])
-
- # Restore CXXFLAGS; VISIBILITY_CXXFLAGS are added to it where needed.
- CXXFLAGS=$save_CXXFLAGS
-
- # We don't want floating-point math functions to set errno or trap
- CXXFLAGS="$CXXFLAGS -fno-math-errno -fno-trapping-math"
fi
-AC_SUBST([MSVC2013_COMPAT_CFLAGS])
-AC_SUBST([MSVC2013_COMPAT_CXXFLAGS])
-
dnl even if the compiler appears to support it, using visibility attributes isn't
dnl going to do anything useful currently on cygwin apart from emit lots of warnings
case "$host_os" in
@@ -374,6 +339,10 @@
AC_SUBST([VISIBILITY_CFLAGS])
AC_SUBST([VISIBILITY_CXXFLAGS])
+AX_CHECK_COMPILE_FLAG([-Wno-override-init], [WNO_OVERRIDE_INIT="-Wno-override-init"]) # gcc
+AX_CHECK_COMPILE_FLAG([-Wno-initializer-overrides], [WNO_OVERRIDE_INIT="-Wno-initializer-overrides"]) # clang
+AC_SUBST([WNO_OVERRIDE_INIT])
+
dnl
dnl Optional flags, check for compiler support
dnl
@@ -637,16 +606,7 @@
dnl
AC_MSG_CHECKING([if the linker supports version-scripts])
save_LDFLAGS=$LDFLAGS
-LDFLAGS="$LDFLAGS -Wl,--version-script=conftest.map"
-cat > conftest.map < conftest.dyn <@])],
- [enable_omx="$enableval"],
- [enable_omx=no])
+ [DEPRECATED: Use --enable-omx-bellagio instead @<:@default=auto@:>@])],
+ [AC_MSG_ERROR([--enable-omx is deprecated. Use --enable-omx-bellagio instead.])],
+ [])
+AC_ARG_ENABLE([omx-bellagio],
+ [AS_HELP_STRING([--enable-omx-bellagio],
+ [enable OpenMAX Bellagio library @<:@default=disabled@:>@])],
+ [enable_omx_bellagio="$enableval"],
+ [enable_omx_bellagio=no])
AC_ARG_ENABLE([va],
[AS_HELP_STRING([--enable-va],
[enable va library @<:@default=auto@:>@])],
@@ -1306,7 +1274,7 @@
AC_ARG_WITH([gallium-drivers],
[AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
[comma delimited Gallium drivers list, e.g.
- "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,virgl,etnaviv,imx"
+ "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,vc5,virgl,etnaviv,imx"
@<:@default=r300,r600,svga,swrast@:>@])],
[with_gallium_drivers="$withval"],
[with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -1325,7 +1293,7 @@
"x$enable_xa" = xno -a \
"x$enable_xvmc" = xno -a \
"x$enable_vdpau" = xno -a \
- "x$enable_omx" = xno -a \
+ "x$enable_omx_bellagio" = xno -a \
"x$enable_va" = xno -a \
"x$enable_opencl" = xno; then
AC_MSG_ERROR([at least one API should be enabled])
@@ -1416,18 +1384,10 @@
AM_CONDITIONAL(USE_LIBGLVND, test "x$enable_libglvnd" = xyes)
if test "x$enable_libglvnd" = xyes ; then
- dnl XXX: update once we can handle more than libGL/glx.
- dnl Namely: we should error out if neither of the glvnd enabled libraries
- dnl are built
case "x$enable_glx" in
- xno)
- AC_MSG_ERROR([cannot build libglvnd without GLX])
- ;;
xxlib | xgallium-xlib )
AC_MSG_ERROR([cannot build libgvnd when Xlib-GLX or Gallium-Xlib-GLX is enabled])
;;
- xdri)
- ;;
esac
PKG_CHECK_MODULES([GLVND], libglvnd >= 0.2.0)
@@ -1436,6 +1396,10 @@
DEFINES="${DEFINES} -DUSE_LIBGLVND=1"
DEFAULT_GL_LIB_NAME=GLX_mesa
+
+ if test "x$enable_glx" = xno -a "x$enable_egl" = xno; then
+ AC_MSG_ERROR([cannot build libglvnd without GLX or EGL])
+ fi
fi
AC_ARG_WITH([gl-lib-name],
@@ -1749,7 +1713,8 @@
case "$plat" in
wayland)
- PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
+ PKG_CHECK_MODULES([WAYLAND_CLIENT], [wayland-client >= $WAYLAND_REQUIRED])
+ PKG_CHECK_MODULES([WAYLAND_SERVER], [wayland-server >= $WAYLAND_REQUIRED])
if test "x$WAYLAND_SCANNER" = "x:"; then
AC_MSG_ERROR([wayland-scanner is needed to compile the wayland platform])
@@ -1757,7 +1722,7 @@
if test "x$have_wayland_protocols" = xno; then
AC_MSG_ERROR([wayland-protocols >= $WAYLAND_PROTOCOLS_REQUIRED is needed to compile the wayland platform])
fi
- DEFINES="$DEFINES -DHAVE_WAYLAND_PLATFORM"
+ DEFINES="$DEFINES -DHAVE_WAYLAND_PLATFORM -DWL_HIDE_DEPRECATED"
;;
x11)
@@ -1847,6 +1812,11 @@
with_dri_drivers=''
fi
+# Check for expat
+PKG_CHECK_MODULES([EXPAT], [expat],,
+ [PKG_CHECK_MODULES([EXPAT], [expat21])]
+)
+
dnl If $with_dri_drivers is yes, drivers will be added through
dnl platform checks. Set DEFINES and LIB_DEPS
if test "x$enable_dri" = xyes; then
@@ -1880,15 +1850,6 @@
with_dri_drivers="i915 i965 nouveau r200 radeon swrast"
fi
- # Check for expat
- PKG_CHECK_MODULES([EXPAT], [expat], [],
- # expat version 2.0 and earlier do not provide expat.pc
- [AC_CHECK_HEADER([expat.h],[],
- [AC_MSG_ERROR([Expat headers required for DRI not found])])
- AC_CHECK_LIB([expat],[XML_ParserCreate],[],
- [AC_MSG_ERROR([Expat library required for DRI not found])])
- EXPAT_LIBS="-lexpat"])
-
# put all the necessary libs together
DRI_LIB_DEPS="$DRI_LIB_DEPS $SELINUX_LIBS $LIBDRM_LIBS $EXPAT_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS"
fi
@@ -2174,8 +2135,8 @@
PKG_CHECK_EXISTS([vdpau >= $VDPAU_REQUIRED], [enable_vdpau=yes], [enable_vdpau=no])
fi
- if test "x$enable_omx" = xauto -a "x$have_omx_platform" = xyes; then
- PKG_CHECK_EXISTS([libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED], [enable_omx=yes], [enable_omx=no])
+ if test "x$enable_omx_bellagio" = xauto -a "x$have_omx_platform" = xyes; then
+ PKG_CHECK_EXISTS([libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED], [enable_omx_bellagio=yes], [enable_omx_bellagio=no])
fi
if test "x$enable_va" = xauto -a "x$have_va_platform" = xyes; then
@@ -2186,7 +2147,7 @@
if test "x$enable_dri" = xyes -o \
"x$enable_xvmc" = xyes -o \
"x$enable_vdpau" = xyes -o \
- "x$enable_omx" = xyes -o \
+ "x$enable_omx_bellagio" = xyes -o \
"x$enable_va" = xyes; then
need_gallium_vl=yes
fi
@@ -2194,9 +2155,11 @@
if test "x$enable_xvmc" = xyes -o \
"x$enable_vdpau" = xyes -o \
- "x$enable_omx" = xyes -o \
+ "x$enable_omx_bellagio" = xyes -o \
"x$enable_va" = xyes; then
- PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
+ if echo $platforms | grep -q "x11"; then
+ PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
+ fi
need_gallium_vl_winsys=yes
fi
AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes)
@@ -2220,14 +2183,14 @@
fi
AM_CONDITIONAL(HAVE_ST_VDPAU, test "x$enable_vdpau" = xyes)
-if test "x$enable_omx" = xyes; then
+if test "x$enable_omx_bellagio" = xyes; then
if test "x$have_omx_platform" != xyes; then
AC_MSG_ERROR([OMX requires at least one of the x11 or drm platforms])
fi
- PKG_CHECK_MODULES([OMX], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED])
- gallium_st="$gallium_st omx"
+ PKG_CHECK_MODULES([OMX_BELLAGIO], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED])
+ gallium_st="$gallium_st omx_bellagio"
fi
-AM_CONDITIONAL(HAVE_ST_OMX, test "x$enable_omx" = xyes)
+AM_CONDITIONAL(HAVE_ST_OMX_BELLAGIO, test "x$enable_omx_bellagio" = xyes)
if test "x$enable_va" = xyes; then
if test "x$have_va_platform" != xyes; then
@@ -2323,13 +2286,16 @@
llvm_add_default_components "opencl"
llvm_add_component "all-targets" "opencl"
+ llvm_add_component "coverage" "opencl"
llvm_add_component "linker" "opencl"
llvm_add_component "instrumentation" "opencl"
llvm_add_component "ipo" "opencl"
llvm_add_component "irreader" "opencl"
+ llvm_add_component "lto" "opencl"
llvm_add_component "option" "opencl"
llvm_add_component "objcarcopts" "opencl"
llvm_add_component "profiledata" "opencl"
+ llvm_add_optional_component "coroutines" "opencl"
dnl Check for Clang internal headers
if test -z "$CLANG_LIBDIR"; then
@@ -2389,15 +2355,15 @@
[VDPAU_LIB_INSTALL_DIR='${libdir}/vdpau'])
AC_SUBST([VDPAU_LIB_INSTALL_DIR])
-dnl Directory for OMX libs
+dnl Directory for OMX_BELLAGIO libs
-AC_ARG_WITH([omx-libdir],
- [AS_HELP_STRING([--with-omx-libdir=DIR],
- [directory for the OMX libraries])],
- [OMX_LIB_INSTALL_DIR="$withval"],
- [OMX_LIB_INSTALL_DIR=`$PKG_CONFIG --exists libomxil-bellagio && \
- $PKG_CONFIG --define-variable=libdir=\$libdir --variable=pluginsdir libomxil-bellagio`])
-AC_SUBST([OMX_LIB_INSTALL_DIR])
+AC_ARG_WITH([omx-bellagio-libdir],
+ [AS_HELP_STRING([--with-omx-bellagio-libdir=DIR],
+ [directory for the OMX_BELLAGIO libraries])],
+ [OMX_BELLAGIO_LIB_INSTALL_DIR="$withval"],
+ [OMX_BELLAGIO_LIB_INSTALL_DIR=`$PKG_CONFIG --exists libomxil-bellagio && \
+ $PKG_CONFIG --define-variable=libdir=\$libdir --variable=pluginsdir libomxil-bellagio`])
+AC_SUBST([OMX_BELLAGIO_LIB_INSTALL_DIR])
dnl Directory for VA libs
@@ -2567,7 +2533,7 @@
AC_SUBST([SWR_CXX11_CXXFLAGS])
swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
- ",-mavx,-march=core-avx" \
+ ",-target-cpu=sandybridge,-mavx,-march=core-avx,-tp=sandybridge" \
SWR_AVX_CXXFLAGS
AC_SUBST([SWR_AVX_CXXFLAGS])
@@ -2579,21 +2545,21 @@
;;
xavx2)
swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \
- ",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \
+ ",-target-cpu=haswell,-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2,-tp=haswell" \
SWR_AVX2_CXXFLAGS
AC_SUBST([SWR_AVX2_CXXFLAGS])
HAVE_SWR_AVX2=yes
;;
xknl)
swr_require_cxx_feature_flags "KNL" "defined(__AVX512F__) && defined(__AVX512ER__)" \
- ",-march=knl,-xMIC-AVX512" \
+ ",-target-cpu=mic-knl,-march=knl,-xMIC-AVX512" \
SWR_KNL_CXXFLAGS
AC_SUBST([SWR_KNL_CXXFLAGS])
HAVE_SWR_KNL=yes
;;
xskx)
swr_require_cxx_feature_flags "SKX" "defined(__AVX512F__) && defined(__AVX512BW__)" \
- ",-march=skylake-avx512,-xCORE-AVX512" \
+ ",-target-cpu=x86-skylake,-march=skylake-avx512,-xCORE-AVX512" \
SWR_SKX_CXXFLAGS
AC_SUBST([SWR_SKX_CXXFLAGS])
HAVE_SWR_SKX=yes
@@ -2622,6 +2588,14 @@
DEFINES="$DEFINES -DUSE_VC4_SIMULATOR"],
[USE_VC4_SIMULATOR=no])
;;
+ xvc5)
+ HAVE_GALLIUM_VC5=yes
+
+ PKG_CHECK_MODULES([VC5_SIMULATOR], [v3dv3],
+ [USE_VC5_SIMULATOR=yes;
+ DEFINES="$DEFINES -DUSE_VC5_SIMULATOR"],
+ [AC_MSG_ERROR([vc5 requires the simulator])])
+ ;;
xpl111)
HAVE_GALLIUM_PL111=yes
;;
@@ -2663,6 +2637,39 @@
AC_MSG_ERROR([Building with pl111 requires vc4])
fi
+
+detect_old_buggy_llvm() {
+ dnl llvm-config may not give the right answer when llvm is a built as a
+ dnl single shared library, so we must work the library name out for
+ dnl ourselves.
+ dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
+ dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
+ LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
+ AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])
+
+ if test "x$llvm_have_one_so" = xyes; then
+ dnl LLVM was built using auto*, so there is only one shared object.
+ LLVM_LIBS="-l$LLVM_SO_NAME"
+ else
+ dnl If LLVM was built with CMake, there will be one shared object per
+ dnl component.
+ AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
+ [AC_MSG_ERROR([Could not find llvm shared libraries:
+ Please make sure you have built llvm with the --enable-shared option
+ and that your llvm libraries are installed in $LLVM_LIBDIR
+ If you have installed your llvm libraries to a different directory you
+ can use the --with-llvm-prefix= configure flag to specify this directory.
+ NOTE: Mesa is attempting to use llvm shared libraries by default.
+ If you do not want to build with llvm shared libraries and instead want to
+ use llvm static libraries then add --disable-llvm-shared-libs to your configure
+ invocation and rebuild.])])
+
+ dnl We don't need to update LLVM_LIBS in this case because the LLVM
+ dnl install uses a shared object for each component and we have
+ dnl already added all of these objects to LLVM_LIBS.
+ fi
+}
+
dnl
dnl Set defines and buildtime variables only when using LLVM.
dnl
@@ -2680,47 +2687,28 @@
dnl this was causing the same libraries to be appear multiple times
dnl in LLVM_LIBS.
- if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
- AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed])
- fi
- LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
-
- dnl llvm-config may not give the right answer when llvm is a built as a
- dnl single shared library, so we must work the library name out for
- dnl ourselves.
- dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
- if test "x$enable_llvm_shared_libs" = xyes; then
- dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
- LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
- AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])
-
- if test "x$llvm_have_one_so" = xyes; then
- dnl LLVM was built using auto*, so there is only one shared object.
- LLVM_LIBS="-l$LLVM_SO_NAME"
+ if test $LLVM_VERSION_MAJOR -ge 4 -o $LLVM_VERSION_MAJOR -eq 3 -a $LLVM_VERSION_MINOR -ge 9; then
+ if test "x$enable_llvm_shared_libs" = xyes; then
+ LLVM_LIBS="`$LLVM_CONFIG --link-shared --libs ${LLVM_COMPONENTS}`"
else
- dnl If LLVM was built with CMake, there will be one shared object per
- dnl component.
- AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
- [AC_MSG_ERROR([Could not find llvm shared libraries:
- Please make sure you have built llvm with the --enable-shared option
- and that your llvm libraries are installed in $LLVM_LIBDIR
- If you have installed your llvm libraries to a different directory you
- can use the --with-llvm-prefix= configure flag to specify this directory.
- NOTE: Mesa is attempting to use llvm shared libraries by default.
- If you do not want to build with llvm shared libraries and instead want to
- use llvm static libraries then add --disable-llvm-shared-libs to your configure
- invocation and rebuild.])])
-
- dnl We don't need to update LLVM_LIBS in this case because the LLVM
- dnl install uses a shared object for each component and we have
- dnl already added all of these objects to LLVM_LIBS.
+ dnl Invoking llvm-config with both -libs and --system-libs produces the
+ dnl two separate lines - each for the set of libraries.
+ dnl Call the program twice, effectively folding them into a single line.
+ LLVM_LIBS="`$LLVM_CONFIG --link-static --libs ${LLVM_COMPONENTS}`"
+ dnl We need to link to llvm system libs when using static libs
+ LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --link-static --system-libs`"
fi
else
- AC_MSG_WARN([Building mesa with statically linked LLVM may cause compilation issues])
- dnl We need to link to llvm system libs when using static libs
- dnl However, only llvm 3.5+ provides --system-libs
- if test $LLVM_VERSION_MAJOR -ge 4 -o $LLVM_VERSION_MAJOR -eq 3 -a $LLVM_VERSION_MINOR -ge 5; then
- LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --system-libs`"
+ LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
+ if test "x$enable_llvm_shared_libs" = xyes; then
+ detect_old_buggy_llvm
+ else
+ AC_MSG_WARN([Building mesa with statically linked LLVM may cause compilation issues])
+ dnl We need to link to llvm system libs when using static libs
+ dnl However, only llvm 3.5+ provides --system-libs
+ if test $LLVM_VERSION_MAJOR -ge 4 -o $LLVM_VERSION_MAJOR -eq 3 -a $LLVM_VERSION_MINOR -ge 5; then
+ LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --system-libs`"
+ fi
fi
fi
fi
@@ -2731,8 +2719,7 @@
AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
-AM_CONDITIONAL(HAVE_GALLIUM_RADEON_COMMON, test "x$HAVE_GALLIUM_R600" = xyes -o \
- "x$HAVE_GALLIUM_RADEONSI" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_RADEON_COMMON, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_ETNAVIV, test "x$HAVE_GALLIUM_ETNAVIV" = xyes)
@@ -2744,6 +2731,7 @@
"x$HAVE_GALLIUM_LLVMPIPE" = xyes -o \
"x$HAVE_GALLIUM_SWR" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_VC5, test "x$HAVE_GALLIUM_VC5" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)
@@ -2770,6 +2758,9 @@
AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
"x$HAVE_RADEON_VULKAN" = xyes)
+AM_CONDITIONAL(HAVE_BROADCOM_DRIVERS, test "x$HAVE_GALLIUM_VC4" = xyes -o \
+ "x$HAVE_GALLIUM_VC5" = xyes)
+
AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
"x$HAVE_I965_DRI" = xyes)
@@ -2780,6 +2771,7 @@
AM_CONDITIONAL(HAVE_GALLIUM_COMPUTE, test x$enable_opencl = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_LLVM, test "x$enable_llvm" = xyes)
AM_CONDITIONAL(USE_VC4_SIMULATOR, test x$USE_VC4_SIMULATOR = xyes)
+AM_CONDITIONAL(USE_VC5_SIMULATOR, test x$USE_VC5_SIMULATOR = xyes)
AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
@@ -2893,12 +2885,13 @@
src/gallium/drivers/etnaviv/Makefile
src/gallium/drivers/imx/Makefile
src/gallium/drivers/vc4/Makefile
+ src/gallium/drivers/vc5/Makefile
src/gallium/drivers/virgl/Makefile
src/gallium/state_trackers/clover/Makefile
src/gallium/state_trackers/dri/Makefile
src/gallium/state_trackers/glx/xlib/Makefile
src/gallium/state_trackers/nine/Makefile
- src/gallium/state_trackers/omx/Makefile
+ src/gallium/state_trackers/omx_bellagio/Makefile
src/gallium/state_trackers/osmesa/Makefile
src/gallium/state_trackers/va/Makefile
src/gallium/state_trackers/vdpau/Makefile
@@ -2908,7 +2901,7 @@
src/gallium/targets/d3dadapter9/d3d.pc
src/gallium/targets/dri/Makefile
src/gallium/targets/libgl-xlib/Makefile
- src/gallium/targets/omx/Makefile
+ src/gallium/targets/omx-bellagio/Makefile
src/gallium/targets/opencl/Makefile
src/gallium/targets/opencl/mesa.icd
src/gallium/targets/osmesa/Makefile
@@ -2936,6 +2929,7 @@
src/gallium/winsys/sw/wrapper/Makefile
src/gallium/winsys/sw/xlib/Makefile
src/gallium/winsys/vc4/drm/Makefile
+ src/gallium/winsys/vc5/drm/Makefile
src/gallium/winsys/virgl/drm/Makefile
src/gallium/winsys/virgl/vtest/Makefile
src/gbm/Makefile
@@ -2956,7 +2950,6 @@
src/mesa/gl.pc
src/mesa/drivers/dri/dri.pc
src/mesa/drivers/dri/common/Makefile
- src/mesa/drivers/dri/common/xmlpool/Makefile
src/mesa/drivers/dri/i915/Makefile
src/mesa/drivers/dri/i965/Makefile
src/mesa/drivers/dri/Makefile
@@ -2968,8 +2961,11 @@
src/mesa/drivers/osmesa/osmesa.pc
src/mesa/drivers/x11/Makefile
src/mesa/main/tests/Makefile
+ src/mesa/state_tracker/tests/Makefile
src/util/Makefile
src/util/tests/hash_table/Makefile
+ src/util/tests/string_buffer/Makefile
+ src/util/xmlpool/Makefile
src/vulkan/Makefile])
AC_OUTPUT
@@ -2978,6 +2974,8 @@
# source file
$SED -i -e 's/brw_blorp.cpp/brw_blorp.c/' src/mesa/drivers/dri/i965/.deps/brw_blorp.Plo
+rm -f src/compiler/spirv/spirv_info.lo
+echo "# dummy" > src/compiler/spirv/.deps/spirv_info.Plo
dnl
dnl Output some configuration info for the user
diff -Nru mesa-17.2.4/debian/changelog mesa-17.3.3/debian/changelog
--- mesa-17.2.4/debian/changelog 2018-02-01 16:17:31.000000000 +0000
+++ mesa-17.3.3/debian/changelog 2018-02-01 16:17:32.000000000 +0000
@@ -1,3 +1,12 @@
+mesa (17.3.3-0ubuntu1) bionic; urgency=medium
+
+ * New upstream release.
+ * patches: Drop upstreamed patch, refresh egl-platform-mir.patch.
+ * rules: Fix disabling omx.
+ * libgbm1.symbols: Updated.
+
+ -- Timo Aaltonen Fri, 26 Jan 2018 14:45:02 +0200
+
mesa (17.2.4-0ubuntu2) bionic; urgency=medium
* Import changes from 17.2.2-0ubuntu2
diff -Nru mesa-17.2.4/debian/libgbm1.symbols mesa-17.3.3/debian/libgbm1.symbols
--- mesa-17.2.4/debian/libgbm1.symbols 2018-02-01 16:17:31.000000000 +0000
+++ mesa-17.3.3/debian/libgbm1.symbols 2018-02-01 16:17:32.000000000 +0000
@@ -3,6 +3,7 @@
gbm_bo_create@Base 7.11~1
gbm_bo_create_with_modifiers@Base 17.1.0~rc2
gbm_bo_destroy@Base 7.11~1
+ gbm_bo_get_bpp@Base 17.3.0~rc1
gbm_bo_get_device@Base 8.1~0
gbm_bo_get_fd@Base 10.2~0
gbm_bo_get_format@Base 8.1~0
@@ -25,6 +26,7 @@
gbm_device_destroy@Base 7.11~1
gbm_device_get_backend_name@Base 7.11~1
gbm_device_get_fd@Base 7.11~1
+ gbm_device_get_format_modifier_plane_count@Base 17.3.0~rc1
gbm_device_is_format_supported@Base 8.1~0
gbm_surface_create@Base 8.1~0
gbm_surface_create_with_modifiers@Base 17.1.0~rc2
diff -Nru mesa-17.2.4/debian/patches/egl-platform-mir.patch mesa-17.3.3/debian/patches/egl-platform-mir.patch
--- mesa-17.2.4/debian/patches/egl-platform-mir.patch 2018-02-01 16:17:31.000000000 +0000
+++ mesa-17.3.3/debian/patches/egl-platform-mir.patch 2018-02-01 16:17:32.000000000 +0000
@@ -1,8 +1,8 @@
Index: mesa/configure.ac
===================================================================
---- mesa.orig/configure.ac 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/configure.ac 2017-08-10 17:18:19.000000000 +1000
-@@ -1723,7 +1723,9 @@
+--- mesa.orig/configure.ac 2018-01-12 09:56:39.850593962 +1100
++++ mesa/configure.ac 2018-01-12 10:34:15.000000000 +1100
+@@ -1744,7 +1744,9 @@
PKG_CHECK_MODULES([ANDROID], [cutils hardware sync])
DEFINES="$DEFINES -DHAVE_ANDROID_PLATFORM"
;;
@@ -13,7 +13,7 @@
*)
AC_MSG_ERROR([platform '$plat' does not exist])
;;
-@@ -1754,6 +1756,7 @@
+@@ -1775,6 +1777,7 @@
AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm')
AM_CONDITIONAL(HAVE_PLATFORM_SURFACELESS, echo "$platforms" | grep -q 'surfaceless')
AM_CONDITIONAL(HAVE_PLATFORM_ANDROID, echo "$platforms" | grep -q 'android')
@@ -23,9 +23,9 @@
dnl More DRI setup
Index: mesa/include/EGL/eglplatform.h
===================================================================
---- mesa.orig/include/EGL/eglplatform.h 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/include/EGL/eglplatform.h 2017-08-10 17:18:19.000000000 +1000
-@@ -105,6 +105,13 @@
+--- mesa.orig/include/EGL/eglplatform.h 2018-01-12 09:56:39.850593962 +1100
++++ mesa/include/EGL/eglplatform.h 2018-01-12 10:34:15.000000000 +1100
+@@ -104,6 +104,13 @@
typedef struct egl_native_pixmap_t* EGLNativePixmapType;
typedef void* EGLNativeDisplayType;
@@ -41,9 +41,9 @@
#if defined(MESA_EGL_NO_X11_HEADERS)
Index: mesa/include/GL/internal/dri_interface.h
===================================================================
---- mesa.orig/include/GL/internal/dri_interface.h 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/include/GL/internal/dri_interface.h 2017-08-10 16:37:31.966030819 +1000
-@@ -964,10 +964,12 @@
+--- mesa.orig/include/GL/internal/dri_interface.h 2018-01-12 09:56:39.850593962 +1100
++++ mesa/include/GL/internal/dri_interface.h 2018-01-12 09:56:39.842594054 +1100
+@@ -981,10 +981,12 @@
unsigned int pitch;
unsigned int cpp;
unsigned int flags;
@@ -52,16 +52,16 @@
};
#define __DRI_DRI2_LOADER "DRI_DRI2Loader"
--#define __DRI_DRI2_LOADER_VERSION 3
-+#define __DRI_DRI2_LOADER_VERSION 4
- struct __DRIdri2LoaderExtensionRec {
- __DRIextension base;
+-#define __DRI_DRI2_LOADER_VERSION 4
++#define __DRI_DRI2_LOADER_VERSION 5
+ enum dri_loader_cap {
+ /* Whether the loader handles RGBA channel ordering correctly. If not,
Index: mesa/src/egl/drivers/dri2/egl_dri2.c
===================================================================
---- mesa.orig/src/egl/drivers/dri2/egl_dri2.c 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/src/egl/drivers/dri2/egl_dri2.c 2017-08-10 17:18:19.000000000 +1000
-@@ -858,6 +858,11 @@
+--- mesa.orig/src/egl/drivers/dri2/egl_dri2.c 2018-01-12 09:56:39.850593962 +1100
++++ mesa/src/egl/drivers/dri2/egl_dri2.c 2018-01-12 10:34:15.000000000 +1100
+@@ -929,6 +929,11 @@
ret = dri2_initialize_wayland(drv, disp);
break;
#endif
@@ -73,7 +73,7 @@
#ifdef HAVE_ANDROID_PLATFORM
case _EGL_PLATFORM_ANDROID:
ret = dri2_initialize_android(drv, disp);
-@@ -936,6 +941,13 @@
+@@ -1006,6 +1011,13 @@
}
break;
#endif
@@ -87,7 +87,7 @@
#ifdef HAVE_WAYLAND_PLATFORM
case _EGL_PLATFORM_WAYLAND:
if (dri2_dpy->wl_drm)
-@@ -963,7 +975,8 @@
+@@ -1036,7 +1048,8 @@
* the ones from the gbm device. As such the gbm itself is responsible
* for the cleanup.
*/
@@ -99,8 +99,8 @@
free(dri2_dpy->driver_configs);
Index: mesa/src/egl/drivers/dri2/egl_dri2.h
===================================================================
---- mesa.orig/src/egl/drivers/dri2/egl_dri2.h 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/src/egl/drivers/dri2/egl_dri2.h 2017-08-10 17:18:19.000000000 +1000
+--- mesa.orig/src/egl/drivers/dri2/egl_dri2.h 2018-01-12 09:56:39.850593962 +1100
++++ mesa/src/egl/drivers/dri2/egl_dri2.h 2018-01-12 10:34:15.000000000 +1100
@@ -65,6 +65,10 @@
#endif /* HAVE_ANDROID_PLATFORM */
@@ -112,7 +112,7 @@
#include "eglconfig.h"
#include "eglcontext.h"
#include "egldisplay.h"
-@@ -234,6 +238,9 @@
+@@ -227,6 +231,9 @@
bool is_render_node;
bool is_different_gpu;
@@ -122,16 +122,16 @@
};
struct dri2_egl_context
-@@ -283,7 +290,7 @@
- struct gbm_dri_surface *gbm_surf;
- #endif
+@@ -279,7 +286,7 @@
+ /* EGL-owned buffers */
+ __DRIbuffer *local_buffers[__DRI_BUFFER_COUNT];
-#if defined(HAVE_WAYLAND_PLATFORM) || defined(HAVE_DRM_PLATFORM)
+#if defined(HAVE_WAYLAND_PLATFORM) || defined(HAVE_DRM_PLATFORM) || defined(HAVE_MIR_PLATFORM)
- __DRIbuffer *dri_buffers[__DRI_BUFFER_COUNT];
struct {
#ifdef HAVE_WAYLAND_PLATFORM
-@@ -295,9 +302,13 @@
+ struct wl_buffer *wl_buffer;
+@@ -290,9 +297,13 @@
void *data;
int data_size;
#endif
@@ -146,10 +146,10 @@
bool locked;
int age;
} color_buffers[4], *back, *current;
-@@ -326,6 +337,10 @@
- __DRIimage *front;
- unsigned int visual;
+@@ -320,6 +331,10 @@
#endif
+ int out_fence_fd;
+ EGLBoolean enable_out_fence;
+
+#ifdef HAVE_MIR_PLATFORM
+ MirMesaEGLNativeSurface *mir_surf;
@@ -157,7 +157,7 @@
};
struct dri2_egl_config
-@@ -417,6 +432,9 @@
+@@ -414,6 +429,9 @@
EGLBoolean
dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp);
@@ -170,7 +170,7 @@
Index: mesa/src/egl/drivers/dri2/platform_mir.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ mesa/src/egl/drivers/dri2/platform_mir.c 2017-08-10 17:20:55.133969688 +1000
++++ mesa/src/egl/drivers/dri2/platform_mir.c 2018-01-12 10:32:15.670526441 +1100
@@ -0,0 +1,633 @@
+/*
+ * Copyright © 2012 Canonical, Inc
@@ -227,29 +227,29 @@
+ assert(attachments[i] < __DRI_BUFFER_COUNT);
+ assert((i/2) < ARRAY_SIZE(dri2_surf->buffers));
+
-+ if (dri2_surf->dri_buffers[attachments[i]] == NULL) {
++ if (dri2_surf->local_buffers[attachments[i]] == NULL) {
+ /* Our frame callback must keep these buffers valid */
+ assert(attachments[i] != __DRI_BUFFER_FRONT_LEFT);
+ assert(attachments[i] != __DRI_BUFFER_BACK_LEFT);
+
-+ dri2_surf->dri_buffers[attachments[i]] =
++ dri2_surf->local_buffers[attachments[i]] =
+ dri2_dpy->dri2->allocateBuffer(dri2_dpy->dri_screen,
+ attachments[i], attachments[i+1],
+ dri2_surf->base.Width, dri2_surf->base.Height);
+
-+ if (!dri2_surf->dri_buffers[attachments[i]]) {
++ if (!dri2_surf->local_buffers[attachments[i]]) {
+ _eglError(EGL_BAD_ALLOC, "failed to allocate auxiliary buffer");
+ return NULL;
+ }
+ }
+
+ memcpy(&dri2_surf->buffers[i/2],
-+ dri2_surf->dri_buffers[attachments[i]],
++ dri2_surf->local_buffers[attachments[i]],
+ sizeof(__DRIbuffer));
+ }
+
+ assert(dri2_surf->base.Type == EGL_PIXMAP_BIT ||
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]);
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]);
+
+ *out_count = i/2;
+ if (i == 0)
@@ -479,9 +479,9 @@
+
+ dri2_surf->back = &dri2_surf->color_buffers[buf_slot];
+ dri2_surf->back->buffer_age = buffer_package.age;
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->name = 0;
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->fd = buffer_package.fd[0];
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->pitch = buffer_package.stride;
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->name = 0;
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->fd = buffer_package.fd[0];
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->pitch = buffer_package.stride;
+ return EGL_TRUE;
+}
+
@@ -517,15 +517,15 @@
+ dri2_surf->base.Width = win_params.width;
+ dri2_surf->base.Height = win_params.height;
+
-+ dri2_surf->dri_buffers[__DRI_BUFFER_FRONT_LEFT] =
-+ calloc(sizeof(*dri2_surf->dri_buffers[0]), 1);
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT] =
-+ calloc(sizeof(*dri2_surf->dri_buffers[0]), 1);
++ dri2_surf->local_buffers[__DRI_BUFFER_FRONT_LEFT] =
++ calloc(sizeof(*dri2_surf->local_buffers[0]), 1);
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT] =
++ calloc(sizeof(*dri2_surf->local_buffers[0]), 1);
+
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->attachment =
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->attachment =
+ __DRI_BUFFER_BACK_LEFT;
+ /* We only do ARGB 8888 for the moment */
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->cpp = 4;
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->cpp = 4;
+
+ clear_cached_buffers(dri2_surf);
+
@@ -596,14 +596,14 @@
+ (*dri2_dpy->core->destroyDrawable)(dri2_surf->dri_drawable);
+
+ for (i = 0; i < __DRI_BUFFER_COUNT; ++i) {
-+ if (dri2_surf->dri_buffers[i]) {
++ if (dri2_surf->local_buffers[i]) {
+ if ((i == __DRI_BUFFER_FRONT_LEFT) ||
+ (i == __DRI_BUFFER_BACK_LEFT)) {
-+ free(dri2_surf->dri_buffers[i]);
++ free(dri2_surf->local_buffers[i]);
+ }
+ else {
+ dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen,
-+ dri2_surf->dri_buffers[i]);
++ dri2_surf->local_buffers[i]);
+ }
+ }
+ }
@@ -807,8 +807,8 @@
+}
Index: mesa/src/egl/main/egldisplay.c
===================================================================
---- mesa.orig/src/egl/main/egldisplay.c 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/src/egl/main/egldisplay.c 2017-08-10 17:18:19.000000000 +1000
+--- mesa.orig/src/egl/main/egldisplay.c 2018-01-12 09:56:39.850593962 +1100
++++ mesa/src/egl/main/egldisplay.c 2018-01-12 10:34:15.000000000 +1100
@@ -55,7 +55,10 @@
#ifdef HAVE_DRM_PLATFORM
#include
@@ -907,8 +907,8 @@
if (detected_platform == _EGL_INVALID_PLATFORM) {
Index: mesa/src/egl/main/egldisplay.h
===================================================================
---- mesa.orig/src/egl/main/egldisplay.h 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/src/egl/main/egldisplay.h 2017-08-10 17:18:19.000000000 +1000
+--- mesa.orig/src/egl/main/egldisplay.h 2018-01-12 09:56:39.850593962 +1100
++++ mesa/src/egl/main/egldisplay.h 2018-01-12 10:34:15.000000000 +1100
@@ -50,6 +50,7 @@
_EGL_PLATFORM_ANDROID,
_EGL_PLATFORM_HAIKU,
@@ -919,9 +919,9 @@
_EGL_INVALID_PLATFORM = -1
Index: mesa/src/gallium/state_trackers/dri/dri2.c
===================================================================
---- mesa.orig/src/gallium/state_trackers/dri/dri2.c 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/src/gallium/state_trackers/dri/dri2.c 2017-08-10 16:37:31.970030645 +1000
-@@ -707,13 +707,21 @@
+--- mesa.orig/src/gallium/state_trackers/dri/dri2.c 2018-01-12 09:56:39.850593962 +1100
++++ mesa/src/gallium/state_trackers/dri/dri2.c 2018-01-12 09:56:39.846594007 +1100
+@@ -719,13 +719,21 @@
templ.height0 = dri_drawable->h;
templ.format = format;
templ.bind = bind;
@@ -949,9 +949,9 @@
&templ, &whandle,
Index: mesa/src/gbm/backends/dri/gbm_dri.c
===================================================================
---- mesa.orig/src/gbm/backends/dri/gbm_dri.c 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/src/gbm/backends/dri/gbm_dri.c 2017-08-10 16:37:31.970030645 +1000
-@@ -214,7 +214,7 @@
+--- mesa.orig/src/gbm/backends/dri/gbm_dri.c 2018-01-12 09:56:39.850593962 +1100
++++ mesa/src/gbm/backends/dri/gbm_dri.c 2018-01-12 09:56:39.846594007 +1100
+@@ -215,7 +215,7 @@
};
static const __DRIdri2LoaderExtension dri2_loader_extension = {
@@ -962,9 +962,9 @@
.flushFrontBuffer = dri_flush_front_buffer,
Index: mesa/src/egl/Makefile.am
===================================================================
---- mesa.orig/src/egl/Makefile.am 2017-08-10 16:37:31.970030645 +1000
-+++ mesa/src/egl/Makefile.am 2017-08-10 17:18:19.000000000 +1000
-@@ -104,6 +104,12 @@
+--- mesa.orig/src/egl/Makefile.am 2018-01-12 09:56:39.850593962 +1100
++++ mesa/src/egl/Makefile.am 2018-01-12 10:34:15.000000000 +1100
+@@ -108,6 +108,12 @@
dri2_backend_FILES += drivers/dri2/platform_android.c
endif
diff -Nru mesa-17.2.4/debian/patches/egl-platform-rs.patch mesa-17.3.3/debian/patches/egl-platform-rs.patch
--- mesa-17.2.4/debian/patches/egl-platform-rs.patch 2018-02-01 16:17:31.000000000 +0000
+++ mesa-17.3.3/debian/patches/egl-platform-rs.patch 2018-02-01 16:17:32.000000000 +0000
@@ -1,7 +1,7 @@
Index: mesa/configure.ac
===================================================================
---- mesa.orig/configure.ac 2017-08-10 16:08:00.456139559 +1000
-+++ mesa/configure.ac 2017-08-10 16:08:00.452139468 +1000
+--- mesa.orig/configure.ac 2018-01-12 10:34:19.529583858 +1100
++++ mesa/configure.ac 2018-01-12 10:34:19.525583889 +1100
@@ -98,6 +98,7 @@
PYTHON_MAKO_REQUIRED=0.8.0
LIBSENSORS_REQUIRED=4.0.0
@@ -10,7 +10,7 @@
dnl LLVM versions
LLVM_REQUIRED_GALLIUM=3.3.0
-@@ -1726,6 +1727,9 @@
+@@ -1747,6 +1748,9 @@
mir)
PKG_CHECK_MODULES([MIR], [mirclient mir-client-platform-mesa])
;;
@@ -20,7 +20,7 @@
*)
AC_MSG_ERROR([platform '$plat' does not exist])
;;
-@@ -1757,6 +1761,7 @@
+@@ -1778,6 +1782,7 @@
AM_CONDITIONAL(HAVE_PLATFORM_SURFACELESS, echo "$platforms" | grep -q 'surfaceless')
AM_CONDITIONAL(HAVE_PLATFORM_ANDROID, echo "$platforms" | grep -q 'android')
AM_CONDITIONAL(HAVE_PLATFORM_MIR, echo "$platforms" | grep -q 'mir')
@@ -30,9 +30,9 @@
dnl More DRI setup
Index: mesa/include/EGL/eglplatform.h
===================================================================
---- mesa.orig/include/EGL/eglplatform.h 2017-08-10 16:08:00.456139559 +1000
-+++ mesa/include/EGL/eglplatform.h 2017-08-10 16:08:00.452139468 +1000
-@@ -112,6 +112,13 @@
+--- mesa.orig/include/EGL/eglplatform.h 2018-01-12 10:34:19.529583858 +1100
++++ mesa/include/EGL/eglplatform.h 2018-01-12 10:34:19.525583889 +1100
+@@ -111,6 +111,13 @@
typedef void *EGLNativePixmapType;
typedef MirEGLNativeWindowType EGLNativeWindowType;
@@ -48,9 +48,9 @@
#if defined(MESA_EGL_NO_X11_HEADERS)
Index: mesa/src/egl/Makefile.am
===================================================================
---- mesa.orig/src/egl/Makefile.am 2017-08-10 16:08:00.456139559 +1000
-+++ mesa/src/egl/Makefile.am 2017-08-10 16:08:00.452139468 +1000
-@@ -110,6 +110,13 @@
+--- mesa.orig/src/egl/Makefile.am 2018-01-12 10:34:19.529583858 +1100
++++ mesa/src/egl/Makefile.am 2018-01-12 10:34:19.525583889 +1100
+@@ -114,6 +114,13 @@
dri2_backend_FILES += drivers/dri2/platform_mir.c
endif
@@ -66,9 +66,9 @@
-I$(top_builddir)/src/egl/drivers/dri2 \
Index: mesa/src/egl/drivers/dri2/egl_dri2.c
===================================================================
---- mesa.orig/src/egl/drivers/dri2/egl_dri2.c 2017-08-10 16:08:00.456139559 +1000
-+++ mesa/src/egl/drivers/dri2/egl_dri2.c 2017-08-10 16:08:00.452139468 +1000
-@@ -868,6 +868,11 @@
+--- mesa.orig/src/egl/drivers/dri2/egl_dri2.c 2018-01-12 10:34:19.529583858 +1100
++++ mesa/src/egl/drivers/dri2/egl_dri2.c 2018-01-12 10:34:19.525583889 +1100
+@@ -939,6 +939,11 @@
ret = dri2_initialize_android(drv, disp);
break;
#endif
@@ -80,7 +80,7 @@
default:
_eglLog(_EGL_WARNING, "No EGL platform enabled.");
return EGL_FALSE;
-@@ -941,6 +946,13 @@
+@@ -1011,6 +1016,13 @@
}
break;
#endif
@@ -94,7 +94,7 @@
#ifdef HAVE_MIR_PLATFORM
case _EGL_PLATFORM_MIR:
if (dri2_dpy->own_device) {
-@@ -976,7 +988,9 @@
+@@ -1049,7 +1061,9 @@
* for the cleanup.
*/
if (disp->Platform != _EGL_PLATFORM_DRM &&
@@ -107,8 +107,8 @@
free(dri2_dpy->driver_configs);
Index: mesa/src/egl/drivers/dri2/egl_dri2.h
===================================================================
---- mesa.orig/src/egl/drivers/dri2/egl_dri2.h 2017-08-10 16:08:00.456139559 +1000
-+++ mesa/src/egl/drivers/dri2/egl_dri2.h 2017-08-10 16:08:00.452139468 +1000
+--- mesa.orig/src/egl/drivers/dri2/egl_dri2.h 2018-01-12 10:34:19.529583858 +1100
++++ mesa/src/egl/drivers/dri2/egl_dri2.h 2018-01-12 10:34:19.525583889 +1100
@@ -69,6 +69,10 @@
#include
#endif
@@ -120,7 +120,7 @@
#include "eglconfig.h"
#include "eglcontext.h"
#include "egldisplay.h"
-@@ -241,6 +245,11 @@
+@@ -234,6 +238,11 @@
#ifdef HAVE_MIR_PLATFORM
MirMesaEGLNativeDisplay *mir_disp;
#endif
@@ -132,7 +132,7 @@
};
struct dri2_egl_context
-@@ -302,10 +311,10 @@
+@@ -297,10 +306,10 @@
void *data;
int data_size;
#endif
@@ -145,7 +145,7 @@
int fd;
int buffer_age;
#endif
-@@ -333,6 +342,10 @@
+@@ -325,6 +334,10 @@
} color_buffers[3], *back;
#endif
@@ -156,7 +156,7 @@
#if defined(HAVE_SURFACELESS_PLATFORM)
__DRIimage *front;
unsigned int visual;
-@@ -435,6 +448,9 @@
+@@ -432,6 +445,9 @@
EGLBoolean
dri2_initialize_mir(_EGLDriver *drv, _EGLDisplay *disp);
@@ -169,7 +169,7 @@
Index: mesa/src/egl/drivers/dri2/platform_rs.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ mesa/src/egl/drivers/dri2/platform_rs.c 2017-08-10 16:37:18.982594770 +1000
++++ mesa/src/egl/drivers/dri2/platform_rs.c 2018-01-12 10:34:39.477433388 +1100
@@ -0,0 +1,964 @@
+/*
+ * Copyright © 2016 Canonical, Inc
@@ -440,7 +440,7 @@
+ if (sc->state[i] == buffer_state_acquired)
+ {
+ int buffer_fd = sc->gbm_buffer_ext->fd(sc->buffers[i]);
-+ if (buffer_fd == dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->fd)
++ if (buffer_fd == dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->fd)
+ {
+// _eglLog(_EGL_DEBUG, "..submitting buffer %p", sc->buffers[i]);
+ sc->state[i] = buffer_state_submitted;
@@ -545,9 +545,9 @@
+
+ dri2_surf->back = &dri2_surf->color_buffers[buf_slot];
+ dri2_surf->back->buffer_age = buffer_age;
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->name = 0;
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->fd = buffer_fd;
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->pitch = buffer_stride;
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->name = 0;
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->fd = buffer_fd;
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->pitch = buffer_stride;
+
+ return EGL_TRUE;
+}
@@ -646,15 +646,15 @@
+ dri2_surf->base.Width = width;
+ dri2_surf->base.Height = height;
+
-+ dri2_surf->dri_buffers[__DRI_BUFFER_FRONT_LEFT] =
-+ calloc(sizeof(*dri2_surf->dri_buffers[0]), 1);
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT] =
-+ calloc(sizeof(*dri2_surf->dri_buffers[0]), 1);
++ dri2_surf->local_buffers[__DRI_BUFFER_FRONT_LEFT] =
++ calloc(sizeof(*dri2_surf->local_buffers[0]), 1);
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT] =
++ calloc(sizeof(*dri2_surf->local_buffers[0]), 1);
+
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->attachment =
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->attachment =
+ __DRI_BUFFER_BACK_LEFT;
+
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]->cpp = get_format_bpp(sc->format);
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]->cpp = get_format_bpp(sc->format);
+
+ clear_cached_buffers(dri2_surf);
+
@@ -724,14 +724,14 @@
+ (*dri2_dpy->core->destroyDrawable)(dri2_surf->dri_drawable);
+
+ for (i = 0; i < __DRI_BUFFER_COUNT; ++i) {
-+ if (dri2_surf->dri_buffers[i]) {
++ if (dri2_surf->local_buffers[i]) {
+ if ((i == __DRI_BUFFER_FRONT_LEFT) ||
+ (i == __DRI_BUFFER_BACK_LEFT)) {
-+ free(dri2_surf->dri_buffers[i]);
++ free(dri2_surf->local_buffers[i]);
+ }
+ else {
+ dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen,
-+ dri2_surf->dri_buffers[i]);
++ dri2_surf->local_buffers[i]);
+ }
+ }
+ }
@@ -861,29 +861,29 @@
+ assert(attachments[i] < __DRI_BUFFER_COUNT);
+ assert((i/2) < ARRAY_SIZE(dri2_surf->buffers));
+
-+ if (dri2_surf->dri_buffers[attachments[i]] == NULL) {
++ if (dri2_surf->local_buffers[attachments[i]] == NULL) {
+ /* Our frame callback must keep these buffers valid */
+ assert(attachments[i] != __DRI_BUFFER_FRONT_LEFT);
+ assert(attachments[i] != __DRI_BUFFER_BACK_LEFT);
+
-+ dri2_surf->dri_buffers[attachments[i]] =
++ dri2_surf->local_buffers[attachments[i]] =
+ dri2_dpy->dri2->allocateBuffer(dri2_dpy->dri_screen,
+ attachments[i], attachments[i+1],
+ dri2_surf->base.Width, dri2_surf->base.Height);
+
-+ if (!dri2_surf->dri_buffers[attachments[i]]) {
++ if (!dri2_surf->local_buffers[attachments[i]]) {
+ _eglError(EGL_BAD_ALLOC, "Failed to allocate auxiliary buffer");
+ return NULL;
+ }
+ }
+
+ memcpy(&dri2_surf->buffers[(i/2)],
-+ dri2_surf->dri_buffers[attachments[i]],
++ dri2_surf->local_buffers[attachments[i]],
+ sizeof(__DRIbuffer));
+ }
+
+ assert(dri2_surf->base.Type == EGL_PIXMAP_BIT ||
-+ dri2_surf->dri_buffers[__DRI_BUFFER_BACK_LEFT]);
++ dri2_surf->local_buffers[__DRI_BUFFER_BACK_LEFT]);
+
+ *out_count = i/2;
+ if (i == 0)
@@ -1137,8 +1137,8 @@
+}
Index: mesa/src/egl/main/egldisplay.c
===================================================================
---- mesa.orig/src/egl/main/egldisplay.c 2017-08-10 16:08:00.456139559 +1000
-+++ mesa/src/egl/main/egldisplay.c 2017-08-10 16:37:05.000000000 +1000
+--- mesa.orig/src/egl/main/egldisplay.c 2018-01-12 10:34:19.529583858 +1100
++++ mesa/src/egl/main/egldisplay.c 2018-01-12 10:34:19.525583889 +1100
@@ -59,6 +59,9 @@
#include
#include
@@ -1173,8 +1173,8 @@
return _EGL_PLATFORM_MIR;
Index: mesa/src/egl/main/egldisplay.h
===================================================================
---- mesa.orig/src/egl/main/egldisplay.h 2017-08-10 16:08:00.456139559 +1000
-+++ mesa/src/egl/main/egldisplay.h 2017-08-10 16:37:05.000000000 +1000
+--- mesa.orig/src/egl/main/egldisplay.h 2018-01-12 10:34:19.529583858 +1100
++++ mesa/src/egl/main/egldisplay.h 2018-01-12 10:34:19.525583889 +1100
@@ -51,6 +51,7 @@
_EGL_PLATFORM_HAIKU,
_EGL_PLATFORM_SURFACELESS,
diff -Nru mesa-17.2.4/debian/patches/i965-disable-l3-cache-alloc-for-ext-buffers.diff mesa-17.3.3/debian/patches/i965-disable-l3-cache-alloc-for-ext-buffers.diff
--- mesa-17.2.4/debian/patches/i965-disable-l3-cache-alloc-for-ext-buffers.diff 2018-02-01 16:17:31.000000000 +0000
+++ mesa-17.3.3/debian/patches/i965-disable-l3-cache-alloc-for-ext-buffers.diff 1970-01-01 00:00:00.000000000 +0000
@@ -1,167 +0,0 @@
-Subject: [PATCH] i965: Disable L3 cache allocation for external buffers
-From: Chris Wilson
-Date: 24.10.2017 19:06
-
-Through the use of mocs, we can define the cache usage for any surface
-used by the GPU. In particular, we can request that L3 cache be
-allocated for either a read/write miss so that subsequent reads can be
-fetched from cache rather than memory. A consequence of this is that if
-we allocate a L3/LLC cacheline for a read and the object is changed in
-main memory (e.g. a PCIe write bypassing the CPU) then the next read
-will be serviced from the stale cache and not from the new data in
-memory. This is an issue for external PRIME buffers where we may miss
-the updates entirely if the image is small enough to fit within our
-cache.
-
-Currently, we have a single bit to mark all external buffers so use that
-to tell us when it is unsafe to use a cache override in mocs and
-fallback to the PTE value instead (which should be set to the correct
-cache level to be coherent amongst all active parties: PRIME, scanout and
-render). This may be refined in future to limit the override to buffers
-outside the control of mesa; as buffers being shared between mesa
-clients should be able to coordinate themselves without resolves.
-
-Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101691
-Cc: Kenneth Graunke
-Cc: Jason Ekstrand
-Cc: Lyude Paul
-Cc: Timo Aalton
-Cc: Ben Widawsky
-Cc: Daniel Vetter
----
- src/intel/blorp/blorp.c | 1 +
- src/intel/blorp/blorp.h | 1 +
- src/intel/blorp/blorp_genX_exec.h | 2 +-
- src/intel/blorp/blorp_priv.h | 1 +
- src/mesa/drivers/dri/i965/brw_blorp.c | 1 +
- src/mesa/drivers/dri/i965/brw_state.h | 3 ++-
- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 16 +++++++++++-----
- 7 files changed, 18 insertions(+), 7 deletions(-)
-
---- a/src/intel/blorp/blorp.c
-+++ b/src/intel/blorp/blorp.c
-@@ -71,6 +71,7 @@ brw_blorp_surface_info_init(struct blorp
- surf->surf->logical_level0_px.array_len));
-
- info->enabled = true;
-+ info->external = surf->external;
-
- if (format == ISL_FORMAT_UNSUPPORTED)
- format = surf->surf->format;
---- a/src/intel/blorp/blorp.h
-+++ b/src/intel/blorp/blorp.h
-@@ -107,6 +107,7 @@ struct blorp_surf
- enum isl_aux_usage aux_usage;
-
- union isl_color_value clear_color;
-+ bool external;
- };
-
- void
---- a/src/intel/blorp/blorp_genX_exec.h
-+++ b/src/intel/blorp/blorp_genX_exec.h
-@@ -1328,7 +1328,7 @@ blorp_emit_surface_states(struct blorp_b
- blorp_emit_surface_state(batch, ¶ms->src,
- surface_maps[BLORP_TEXTURE_BT_INDEX],
- surface_offsets[BLORP_TEXTURE_BT_INDEX],
-- NULL, false);
-+ NULL, params->src.external);
- }
- }
-
---- a/src/intel/blorp/blorp_priv.h
-+++ b/src/intel/blorp/blorp_priv.h
-@@ -47,6 +47,7 @@ enum {
- struct brw_blorp_surface_info
- {
- bool enabled;
-+ bool external;
-
- struct isl_surf surf;
- struct blorp_address addr;
---- a/src/mesa/drivers/dri/i965/brw_blorp.c
-+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
-@@ -156,6 +156,7 @@ blorp_surf_for_miptree(struct brw_contex
- I915_GEM_DOMAIN_SAMPLER,
- .write_domain = is_render_target ? I915_GEM_DOMAIN_RENDER : 0,
- };
-+ surf->external = mt->bo->external;
-
- surf->aux_usage = aux_usage;
-
---- a/src/mesa/drivers/dri/i965/brw_state.h
-+++ b/src/mesa/drivers/dri/i965/brw_state.h
-@@ -360,6 +360,7 @@ void gen10_init_atoms(struct brw_context
- * may still respect that.
- */
- #define GEN7_MOCS_L3 1
-+#define GEN7_MOCS_PTE 0
-
- /* Ivybridge only: cache in LLC.
- * Specifying zero here means to use the PTE values set by the kernel;
-@@ -385,7 +386,7 @@ void gen10_init_atoms(struct brw_context
- */
- #define BDW_MOCS_WB 0x78
- #define BDW_MOCS_WT 0x58
--#define BDW_MOCS_PTE 0x18
-+#define BDW_MOCS_PTE 0x08
-
- /* Skylake: MOCS is now an index into an array of 62 different caching
- * configurations programmed by the kernel.
---- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
-+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
-@@ -68,12 +68,17 @@ uint32_t tex_mocs[] = {
- };
-
- uint32_t rb_mocs[] = {
-- [7] = GEN7_MOCS_L3,
-+ [7] = GEN7_MOCS_PTE,
- [8] = BDW_MOCS_PTE,
- [9] = SKL_MOCS_PTE,
- [10] = CNL_MOCS_PTE,
- };
-
-+static inline uint32_t get_tex_mocs(struct brw_bo *bo, unsigned int gen)
-+{
-+ return (bo && bo->external ? rb_mocs : tex_mocs)[gen];
-+}
-+
- static void
- get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
- GLenum target, struct isl_view *view,
-@@ -589,7 +594,7 @@ brw_update_texture_surface(struct gl_con
- aux_usage = ISL_AUX_USAGE_NONE;
-
- brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
-- tex_mocs[brw->gen],
-+ get_tex_mocs(mt->bo, brw->gen),
- surf_offset, surf_index,
- I915_GEM_DOMAIN_SAMPLER, 0);
- }
-@@ -615,7 +620,7 @@ brw_emit_buffer_surface_state(struct brw
- .size = buffer_size,
- .format = surface_format,
- .stride = pitch,
-- .mocs = tex_mocs[brw->gen]);
-+ .mocs = get_tex_mocs(bo, brw->gen));
-
- if (bo) {
- brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
-@@ -1164,7 +1169,7 @@ update_renderbuffer_read_surfaces(struct
- aux_usage = ISL_AUX_USAGE_NONE;
-
- brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
-- tex_mocs[brw->gen],
-+ get_tex_mocs(irb->mt->bo, brw->gen),
- surf_offset, surf_index,
- I915_GEM_DOMAIN_SAMPLER, 0);
-
-@@ -1657,7 +1662,7 @@ update_image_surface(struct brw_context
- view.base_array_layer,
- view.array_len));
- brw_emit_surface_state(brw, mt, mt->target, view,
-- ISL_AUX_USAGE_NONE, tex_mocs[brw->gen],
-+ ISL_AUX_USAGE_NONE, get_tex_mocs(mt->bo, brw->gen),
- surf_offset, surf_index,
- I915_GEM_DOMAIN_SAMPLER,
- access == GL_READ_ONLY ? 0 :
diff -Nru mesa-17.2.4/debian/patches/series mesa-17.3.3/debian/patches/series
--- mesa-17.2.4/debian/patches/series 2018-02-01 16:17:31.000000000 +0000
+++ mesa-17.3.3/debian/patches/series 2018-02-01 16:17:32.000000000 +0000
@@ -5,4 +5,3 @@
egl-platform-mir.patch
egl-platform-rs.patch
khr_platform_mir.patch
-i965-disable-l3-cache-alloc-for-ext-buffers.diff
diff -Nru mesa-17.2.4/debian/rules mesa-17.3.3/debian/rules
--- mesa-17.2.4/debian/rules 2018-02-01 16:17:31.000000000 +0000
+++ mesa-17.3.3/debian/rules 2018-02-01 16:17:32.000000000 +0000
@@ -133,7 +133,7 @@
--enable-shared-glapi \
--enable-texture-float \
--disable-xvmc \
- --disable-omx \
+ --disable-omx-bellagio \
$(confflags_DIRECT_RENDERING) \
$(confflags_GBM) \
$(confflags_DRI3) \
diff -Nru mesa-17.2.4/docs/egl.html mesa-17.3.3/docs/egl.html
--- mesa-17.2.4/docs/egl.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/egl.html 2018-01-18 21:30:28.000000000 +0000
@@ -130,16 +130,6 @@
runtime
-EGL_DRIVER
-
-
-This variable specifies a full path to or the name of an EGL driver. It
-forces the specified EGL driver to be loaded. It comes in handy when one wants
-to test a specific driver. This variable is ignored for setuid/setgid
-binaries.
-
-
-
EGL_PLATFORM
diff -Nru mesa-17.2.4/docs/envvars.html mesa-17.3.3/docs/envvars.html
--- mesa-17.2.4/docs/envvars.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/envvars.html 2018-01-18 21:30:28.000000000 +0000
@@ -29,12 +29,12 @@
LIBGL_DEBUG - If defined debug information will be printed to stderr.
If set to 'verbose' additional information will be printed.
LIBGL_DRIVERS_PATH - colon-separated list of paths to search for DRI drivers
- LIBGL_ALWAYS_INDIRECT - forces an indirect rendering context/connection.
- LIBGL_ALWAYS_SOFTWARE - if set, always use software rendering
- LIBGL_NO_DRAWARRAYS - if set do not use DrawArrays GLX protocol (for debugging)
+ LIBGL_ALWAYS_INDIRECT - if set to `true`, forces an indirect rendering context/connection.
+ LIBGL_ALWAYS_SOFTWARE - if set to `true`, always use software rendering
+ LIBGL_NO_DRAWARRAYS - if set to `true`, do not use DrawArrays GLX protocol (for debugging)
LIBGL_SHOW_FPS - print framerate to stdout based on the number of glXSwapBuffers
calls per second.
- LIBGL_DRI3_DISABLE - disable DRI3 if set (the value does not matter)
+ LIBGL_DRI3_DISABLE - disable DRI3 if set to `true`.
@@ -46,7 +46,7 @@
MESA_NO_MMX - if set, disables Intel MMX optimizations
MESA_NO_3DNOW - if set, disables AMD 3DNow! optimizations
MESA_NO_SSE - if set, disables Intel SSE optimizations
- MESA_NO_ERROR - if set error checking is disabled as per KHR_no_error.
+ MESA_NO_ERROR - if set to 1, error checking is disabled as per KHR_no_error.
This will result in undefined behaviour for invalid use of the api, but
can reduce CPU use for apps that are known to be error free.
MESA_DEBUG - if set, error messages are printed to stderr. For example,
@@ -117,7 +117,7 @@
glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as
"130". Mesa will not really implement all the features of the given language version
if it's higher than what's normally reported. (for developers only)
- MESA_GLSL_CACHE_DISABLE - if set, disables the GLSL shader cache
+ MESA_GLSL_CACHE_DISABLE - if set to `true`, disables the GLSL shader cache
MESA_GLSL_CACHE_MAX_SIZE - if set, determines the maximum size of
the on-disk cache of compiled GLSL programs. Should be set to a number
optionally followed by 'K', 'M', or 'G' to specify a size in
@@ -133,6 +133,8 @@
home directory.
MESA_GLSL - shading language compiler options
MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally disabled.
+ MESA_SHADER_CAPTURE_PATH - see Capturing Shaders
+MESA_SHADER_DUMP_PATH and MESA_SHADER_READ_PATH - see Experimenting with Shader Replacements
@@ -190,11 +192,13 @@
perfmon - emit messages about AMD_performance_monitor
pix - emit messages about pixel operations
prim - emit messages about drawing primitives
+ reemit - mark all state dirty on each draw call
sf - emit messages about the strips & fans unit (for old gens, includes the SF program)
shader_time - record how much GPU time is spent in each shader
spill_fs - force spilling of all registers in the scalar backend (useful to debug spilling code)
spill_vec4 - force spilling of all registers in the vec4 backend (useful to debug spilling code)
state - emit messages about state flag tracking
+ submit - emit batchbuffer usage statistics
sync - after sending each batch, emit a message and wait for that batch to finish rendering
tcs - dump shader assembly for tessellation control shaders
tes - dump shader assembly for tessellation evaluation shaders
@@ -240,7 +244,7 @@
Use kill -10 to toggle the hud as desired.
GALLIUM_HUD_DUMP_DIR - specifies a directory for writing the displayed
hud values into files.
- GALLIUM_DRIVER - useful in combination with LIBGL_ALWAYS_SOFTWARE=1 for
+ GALLIUM_DRIVER - useful in combination with LIBGL_ALWAYS_SOFTWARE=true for
choosing one of the software renderers "softpipe", "llvmpipe" or "swr".
GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc.
rather than stderr.
diff -Nru mesa-17.2.4/docs/features.txt mesa-17.3.3/docs/features.txt
--- mesa-17.2.4/docs/features.txt 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/features.txt 2018-01-18 21:30:28.000000000 +0000
@@ -131,7 +131,7 @@
GL_ARB_texture_buffer_object_rgb32 DONE (i965/gen6+, llvmpipe, softpipe, swr)
GL_ARB_texture_cube_map_array DONE (i965/gen6+, nv50, llvmpipe, softpipe)
GL_ARB_texture_gather DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
- GL_ARB_texture_query_lod DONE (i965, nv50, softpipe)
+ GL_ARB_texture_query_lod DONE (i965, nv50, llvmpipe, softpipe)
GL_ARB_transform_feedback2 DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
GL_ARB_transform_feedback3 DONE (i965/gen7+, llvmpipe, softpipe, swr)
@@ -221,6 +221,22 @@
GL_KHR_robustness DONE (i965)
GL_EXT_shader_integer_mix DONE (all drivers that support GLSL)
+GL 4.6, GLSL 4.60
+
+ GL_ARB_gl_spirv in progress (Nicolai Hähnle, Ian Romanick)
+ GL_ARB_indirect_parameters DONE (i965/gen7+, nvc0, radeonsi)
+ GL_ARB_pipeline_statistics_query DONE (i965, nvc0, radeonsi, llvmpipe, softpipe, swr)
+ GL_ARB_polygon_offset_clamp DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, swr)
+ GL_ARB_shader_atomic_counter_ops DONE (i965/gen7+, nvc0, radeonsi, softpipe)
+ GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi)
+ GL_ARB_shader_group_vote DONE (i965, nvc0, radeonsi)
+ GL_ARB_spirv_extensions in progress (Nicolai Hähnle, Ian Romanick)
+ GL_ARB_texture_filter_anisotropic DONE (i965, nv50, nvc0, r600, radeonsi, softpipe (*), llvmpipe (*))
+ GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+, radeonsi, llvmpipe, softpipe)
+ GL_KHR_no_error started (Timothy Arceri)
+
+(*) softpipe and llvmpipe advertise 16x anisotropy but simply ignore the setting
+
These are the extensions cherry-picked to make GLES 3.1
GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
@@ -282,20 +298,14 @@
GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi)
GL_ARB_ES3_2_compatibility DONE (i965/gen8+)
GL_ARB_fragment_shader_interlock not started
- GL_ARB_gl_spirv not started
GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe)
- GL_ARB_indirect_parameters DONE (nvc0, radeonsi)
GL_ARB_parallel_shader_compile not started, but Chia-I Wu did some related work in 2014
- GL_ARB_pipeline_statistics_query DONE (i965, nvc0, radeonsi, softpipe, swr)
GL_ARB_post_depth_coverage DONE (i965)
GL_ARB_robustness_isolation not started
GL_ARB_sample_locations not started
GL_ARB_seamless_cubemap_per_texture DONE (i965, nvc0, radeonsi, r600, softpipe, swr)
- GL_ARB_shader_atomic_counter_ops DONE (i965/gen7+, nvc0, radeonsi, softpipe)
GL_ARB_shader_ballot DONE (i965/gen8+, nvc0, radeonsi)
GL_ARB_shader_clock DONE (i965/gen7+, nv50, nvc0, radeonsi)
- GL_ARB_shader_draw_parameters DONE (i965, nvc0, radeonsi)
- GL_ARB_shader_group_vote DONE (i965, nvc0, radeonsi)
GL_ARB_shader_stencil_export DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr)
GL_ARB_shader_viewport_layer_array DONE (i965/gen6+, nvc0, radeonsi)
GL_ARB_sparse_buffer DONE (radeonsi/CIK+)
@@ -303,15 +313,19 @@
GL_ARB_sparse_texture2 not started
GL_ARB_sparse_texture_clamp not started
GL_ARB_texture_filter_minmax not started
- GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+)
+ GL_EXT_memory_object DONE (radeonsi)
+ GL_EXT_memory_object_fd DONE (radeonsi)
+ GL_EXT_memory_object_win32 not started
+ GL_EXT_semaphore not started
+ GL_EXT_semaphore_fd not started
+ GL_EXT_semaphore_win32 not started
GL_KHR_blend_equation_advanced_coherent DONE (i965/gen9+)
- GL_KHR_no_error started (Timothy Arceri)
GL_KHR_texture_compression_astc_hdr DONE (i965/bxt)
GL_KHR_texture_compression_astc_sliced_3d DONE (i965/gen9+)
GL_OES_depth_texture_cube_map DONE (all drivers that support GLSL 1.30+)
GL_OES_EGL_image DONE (all drivers)
GL_OES_EGL_image_external_essl3 not started
- GL_OES_required_internalformat not started - GLES2 extension based on OpenGL ES 3.0 feature
+ GL_OES_required_internalformat DONE (all drivers)
GL_OES_surfaceless_context DONE (all drivers)
GL_OES_texture_compression_astc DONE (core only)
GL_OES_texture_float DONE (i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe)
@@ -333,6 +347,47 @@
GL_ARB_shadow_ambient Superseded by GL_ARB_fragment_program
GL_ARB_vertex_blend Superseded by GL_ARB_vertex_program
+Vulkan 1.0 -- all DONE: anv, radv
+
+Khronos extensions that are not part of any Vulkan version:
+ VK_KHR_16bit_storage in progress (Alejandro)
+ VK_KHR_android_surface not started
+ VK_KHR_dedicated_allocation DONE (anv, radv)
+ VK_KHR_descriptor_update_template DONE (anv, radv)
+ VK_KHR_display not started
+ VK_KHR_display_swapchain not started
+ VK_KHR_external_fence not started
+ VK_KHR_external_fence_capabilities not started
+ VK_KHR_external_fence_fd not started
+ VK_KHR_external_fence_win32 not started
+ VK_KHR_external_memory DONE (anv, radv)
+ VK_KHR_external_memory_capabilities DONE (anv, radv)
+ VK_KHR_external_memory_fd DONE (anv, radv)
+ VK_KHR_external_memory_win32 not started
+ VK_KHR_external_semaphore DONE (radv)
+ VK_KHR_external_semaphore_capabilities DONE (radv)
+ VK_KHR_external_semaphore_fd DONE (radv)
+ VK_KHR_external_semaphore_win32 not started
+ VK_KHR_get_memory_requirements2 DONE (anv, radv)
+ VK_KHR_get_physical_device_properties2 DONE (anv, radv)
+ VK_KHR_get_surface_capabilities2 DONE (anv)
+ VK_KHR_incremental_present DONE (anv, radv)
+ VK_KHR_maintenance1 DONE (anv, radv)
+ VK_KHR_mir_surface not started
+ VK_KHR_push_descriptor DONE (anv, radv)
+ VK_KHR_sampler_mirror_clamp_to_edge DONE (anv, radv)
+ VK_KHR_shader_draw_parameters DONE (anv, radv)
+ VK_KHR_shared_presentable_image not started
+ VK_KHR_storage_buffer_storage_class DONE (anv, radv)
+ VK_KHR_surface DONE (anv, radv)
+ VK_KHR_swapchain DONE (anv, radv)
+ VK_KHR_variable_pointers DONE (anv, radv)
+ VK_KHR_wayland_surface DONE (anv, radv)
+ VK_KHR_win32_keyed_mutex not started
+ VK_KHR_win32_surface not started
+ VK_KHR_xcb_surface DONE (anv, radv)
+ VK_KHR_xlib_surface DONE (anv, radv)
+
A graphical representation of this information can be found at
https://mesamatrix.net/
diff -Nru mesa-17.2.4/docs/helpwanted.html mesa-17.3.3/docs/helpwanted.html
--- mesa-17.2.4/docs/helpwanted.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/helpwanted.html 2018-01-18 21:30:28.000000000 +0000
@@ -35,17 +35,8 @@
Enable gcc -Wstrict-aliasing=2 -fstrict-aliasing and track down aliasing
issues in the code.
-Windows driver building, testing and maintenance.
-Fixing MSVC builds.
-
Contribute more tests to
Piglit .
-
-Automatic testing.
-
-It would be great if someone would set up an automated system for grabbing
-the latest Mesa code and run tests (such as piglit) then report issues to
-the mailing list.
@@ -58,24 +49,16 @@
-Driver specific To-Do lists:
+Legacy Driver specific To-Do lists:
-
- LLVMpipe - Software driver using LLVM for runtime code generation.
-
- radeonsi - Driver for AMD Southern Island.
r600g - Driver for ATI/AMD R600 - Northern Island.
r300g - Driver for ATI R300 - R500.
-
- i915g - Driver for Intel i915/i945.
diff -Nru mesa-17.2.4/docs/index.html mesa-17.3.3/docs/index.html
--- mesa-17.2.4/docs/index.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/index.html 2018-01-18 21:30:28.000000000 +0000
@@ -16,6 +16,61 @@
News
+October 19, 2017
+
+Mesa 17.2.3 is released.
+This is a bug-fix release.
+
+
+October 2, 2017
+
+Mesa 17.2.2 is released.
+This is a bug-fix release.
+
+
+September 25, 2017
+
+Mesa 17.1.10 is released.
+This is a bug-fix release.
+
+
+September 17, 2017
+
+Mesa 17.2.1 is released.
+This is a bug-fix release.
+
+
+September 8, 2017
+
+Mesa 17.1.9 is released.
+This is a bug-fix release.
+
+
+September 4, 2017
+
+Mesa 17.2.0 is released. This is a
+new development release. See the release notes for more information
+about the release.
+
+
+August 28, 2017
+
+Mesa 17.1.8 is released.
+This is a bug-fix release.
+
+
+August 21, 2017
+
+Mesa 17.1.7 is released.
+This is a bug-fix release.
+
+
+August 7, 2017
+
+Mesa 17.1.6 is released.
+This is a bug-fix release.
+
+
July 14, 2017
Mesa 17.1.5 is released.
diff -Nru mesa-17.2.4/docs/libGL.txt mesa-17.3.3/docs/libGL.txt
--- mesa-17.2.4/docs/libGL.txt 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/libGL.txt 2018-01-18 21:30:28.000000000 +0000
@@ -59,7 +59,7 @@
Indirect Rendering
You can force indirect rendering mode by setting the LIBGL_ALWAYS_INDIRECT
-environment variable. Hardware acceleration will not be used.
+environment variable to `true`. Hardware acceleration will not be used.
diff -Nru mesa-17.2.4/docs/llvmpipe.html mesa-17.3.3/docs/llvmpipe.html
--- mesa-17.2.4/docs/llvmpipe.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/llvmpipe.html 2018-01-18 21:30:28.000000000 +0000
@@ -20,7 +20,7 @@
The Gallium llvmpipe driver is a software rasterizer that uses LLVM to
do runtime code generation.
Shaders, point/line/triangle rasterization and vertex processing are
-implemented with LLVM IR which is translated to x86 or x86-64 machine
+implemented with LLVM IR which is translated to x86, x86-64, or ppc64le machine
code.
Also, the driver is multithreaded to take advantage of multiple CPU cores
(up to 8 at this time).
@@ -32,18 +32,24 @@
- An x86 or amd64 processor; 64-bit mode recommended.
+ For x86 or amd64 processors, 64-bit mode is recommended.
Support for SSE2 is strongly encouraged. Support for SSE3 and SSE4.1 will
yield the most efficient code. The fewer features the CPU has the more
- likely is that you run into underperforming, buggy, or incomplete code.
+ likely it is that you will run into underperforming, buggy, or incomplete code.
+
+
+ For ppc64le processors, use of the Altivec feature (the Vector
+ Facility) is recommended if supported; use of the VSX feature (the
+ Vector-Scalar Facility) is recommended if supported AND Mesa is
+ built with LLVM version 4.0 or later.
See /proc/cpuinfo to know what your CPU supports.
- LLVM: version 3.4 recommended; 3.3 or later required.
+ Unless otherwise stated, LLVM version 3.4 is recommended; 3.3 or later is required.
For Linux, on a recent Debian based distribution do:
@@ -51,6 +57,12 @@
aptitude install llvm-dev
+ If you want development snapshot builds of LLVM for Debian and derived
+ distributions like Ubuntu, you can use the APT repository at apt.llvm.org , which are maintained by Debian's LLVM maintainer.
+
+
For a RPM-based distribution do:
@@ -228,8 +240,8 @@
-Some of this tests can output results and benchmarks to a tab-separated-file
-for posterior analysis, e.g.:
+Some of these tests can output results and benchmarks to a tab-separated file
+for later analysis, e.g.:
build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
@@ -240,8 +252,8 @@
- When looking to this code by the first time start in lp_state_fs.c, and
- then skim through the lp_bld_* functions called in there, and the comments
+ When looking at this code for the first time, start in lp_state_fs.c, and
+ then skim through the lp_bld_* functions called there, and the comments
at the top of the lp_bld_*.c functions.
diff -Nru mesa-17.2.4/docs/release-calendar.html mesa-17.3.3/docs/release-calendar.html
--- mesa-17.2.4/docs/release-calendar.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/release-calendar.html 2018-01-18 21:30:28.000000000 +0000
@@ -39,54 +39,73 @@
Notes
-17.1
-2017-07-28
-17.1.6
-Emil Velikov
+17.2
+2017-10-27
+17.2.4
+Andres Gomez
-2017-08-11
-17.1.7
-Juan A. Suarez Romero
+2017-11-10
+17.2.5
+Andres Gomez
-2017-08-25
-17.1.8
+2017-11-24
+17.2.6
Andres Gomez
-Final planned release for the 17.1 series
+
+
+
+2017-12-08
+17.2.7
+Emil Velikov
+Final planned release for the 17.2 series
+
+
+17.3
+2017-10-20
+17.3.0-rc1
+Emil Velikov
+
-17.2
-2017-07-21
-17.2.0-rc1
+2017-10-27
+17.3.0-rc2
Emil Velikov
-2017-07-28
-17.2.0-rc2
+2017-11-03
+17.3.0-rc3
Emil Velikov
-2017-08-04
-17.2.0-rc3
+2017-11-10
+17.3.0-rc4
Emil Velikov
+May be promoted to 17.3.0 final
+
+
+2017-11-24
+17.3.1
+Andres Gomez
-2017-08-11
-17.2.0-rc4
+2017-12-08
+17.3.2
Emil Velikov
-May be promoted to 17.2.0 final
+
-2017-08-25
-17.2.1
+2017-12-22
+17.3.3
Emil Velikov
+
diff -Nru mesa-17.2.4/docs/releasing.html mesa-17.3.3/docs/releasing.html
--- mesa-17.2.4/docs/releasing.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/releasing.html 2018-01-18 21:30:28.000000000 +0000
@@ -428,6 +428,7 @@
+ # Set MAKEFLAGS if you haven't already
git clean -fXd; git clean -nxd
read # quick cross check any outstanding files
export __version=`cat VERSION`
@@ -436,9 +437,12 @@
chmod 755 -fR $__build_root; rm -rf $__build_root
mkdir -p $__build_root && cd $__build_root
- # For the distcheck, you may want to specify which LLVM to use:
+ # For the native builds - such as distcheck, scons, sanity test, you
+ # may want to specify which LLVM to use:
# export LLVM_CONFIG=/usr/lib/llvm-3.9/bin/llvm-config
- $__mesa_root/autogen.sh && make -j2 distcheck
+
+ # Do a full distcheck
+ $__mesa_root/autogen.sh && make distcheck
# Build check the tarballs (scons, linux)
tar -xaf mesa-$__version.tar.xz && cd mesa-$__version
@@ -446,27 +450,27 @@
cd .. && rm -rf mesa-$__version
# Build check the tarballs (scons, windows/mingw)
- # You may need to unset LLVM if you set it before:
- # unset LLVM_CONFIG
+ # Temporary drop LLVM_CONFIG, unless you have a Windows/mingw one.
+ # save_LLVM_CONFIG=`echo $LLVM_CONFIG`; unset LLVM_CONFIG
tar -xaf mesa-$__version.tar.xz && cd mesa-$__version
scons platform=windows toolchain=crossmingw
cd .. && rm -rf mesa-$__version
# Test the automake binaries
tar -xaf mesa-$__version.tar.xz && cd mesa-$__version
- # You may want to specify which LLVM to use:
+ # Restore LLVM_CONFIG, if applicable:
+ # export LLVM_CONFIG=`echo $save_LLVM_CONFIG`; unset save_LLVM_CONFIG
./configure \
--with-dri-drivers=i965,swrast \
--with-gallium-drivers=swrast \
--with-vulkan-drivers=intel \
--enable-llvm-shared-libs \
--enable-llvm \
- --with-llvm-prefix=/usr/lib/llvm-3.9 \
--enable-glx-tls \
--enable-gbm \
--enable-egl \
--with-platforms=x11,drm,wayland,surfaceless
- make -j2 && DESTDIR=`pwd`/test make -j6 install
+ make && DESTDIR=`pwd`/test make install
__glxinfo_cmd='glxinfo 2>&1 | egrep -o "Mesa.*|Gallium.*|.*dri\.so"'
__glxgears_cmd='glxgears 2>&1 | grep -v "configuration file"'
__es2info_cmd='es2_info 2>&1 | egrep "GL_VERSION|GL_RENDERER|.*dri\.so"'
@@ -479,12 +483,12 @@
eval $__glxgears_cmd
eval $__es2info_cmd
eval $__es2gears_cmd
- export LIBGL_ALWAYS_SOFTWARE=1
+ export LIBGL_ALWAYS_SOFTWARE=true
eval $__glxinfo_cmd
eval $__glxgears_cmd
eval $__es2info_cmd
eval $__es2gears_cmd
- export LIBGL_ALWAYS_SOFTWARE=1
+ export LIBGL_ALWAYS_SOFTWARE=true
export GALLIUM_DRIVER=softpipe
eval $__glxinfo_cmd
eval $__glxgears_cmd
diff -Nru mesa-17.2.4/docs/relnotes/17.1.10.html mesa-17.3.3/docs/relnotes/17.1.10.html
--- mesa-17.2.4/docs/relnotes/17.1.10.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.1.10.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,155 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.1.10 Release Notes / September 25, 2017
+
+
+Mesa 17.1.10 is a bug fix release which fixes bugs found since the 17.1.9 release.
+
+
+Mesa 17.1.10 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+a48ce6b643a728b2b0f926151930525b3670fbff1fb688527fd9051eab9f30a4 mesa-17.1.10.tar.gz
+cbc0d681cc4df47d8deb5a36f45b420978128522fd665b2cd4c7096316f11bdb mesa-17.1.10.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+Bug 102844 - memory leak with glDeleteProgram for shader program type GL_COMPUTE_SHADER
+
+
+
+
+
Changes
+
+
Alexandre Demers (1):
+
+ osmesa: link with libunwind if enabled (v2)
+
+
+
Andres Gomez (12):
+
+ docs: add sha256 checksums for 17.1.9
+ cherry-ignore: add "st/mesa: skip draw calls with pipe_draw_info::count == 0"
+ cherry-ignore: add "radv: use amdgpu_bo_va_op_raw."
+ cherry-ignore: add "radv: use simpler indirect packet 3 if possible."
+ cherry-ignore: add "radeonsi: don't always apply the PrimID instancing bug workaround on SI"
+ cherry-ignore: add "intel/eu/validate: Look up types on demand in execution_type()"
+ cherry-ignore: add "radv: gfx9 fixes"
+ cherry-ignore: add "radv/gfx9: set mip0-depth correctly for 2d arrays/3d images"
+ cherry-ignore: add "radv/gfx9: fix image resource handling."
+ cherry-ignore: add "docs/egl: remove reference to EGL_DRIVERS_PATH"
+ cherry-ignore: add "radv: Disable multilayer & multilevel DCC."
+ cherry-ignore: add "radv: Don't allocate CMASK for linear images."
+
+
+
Dave Airlie (2):
+
+ radv/ac: bump params array for image atomic comp swap
+ st/glsl->tgsi: fix u64 to bool comparisons.
+
+
+
Emil Velikov (2):
+
+ egl/x11/dri3: adding missing __DRI_BACKGROUND_CALLABLE extension
+ automake: enable libunwind in `make distcheck'
+
+
+
Eric Anholt (3):
+
+ broadcom/vc4: Fix use-after-free for flushing when writing to a texture.
+ broadcom/vc4: Fix use-after-free trying to mix a quad and tile clear.
+ broadcom/vc4: Fix use-after-free when deleting a program.
+
+
+
George Kyriazis (1):
+
+ swr: invalidate attachment on transition change
+
+
+
Gert Wollny (2):
+
+ travis: force llvm-3.3 for "make Gallium ST Other"
+ travis: Add libunwind-dev to gallium/make builds
+
+
+
Jason Ekstrand (1):
+
+ i965/blorp: Set r8stencil_needs_update when writing stencil
+
+
+
Juan A. Suarez Romero (9):
+
+ cherry-ignore: add "ac/surface: match Z and stencil tile config"
+ cherry-ignore: add "radv/nir: call opt_remove_phis after trivial continues."
+ cherry-ignore: add "amd/common: add workaround for cube map array layer clamping"
+ cherry-ignore: add "radeonsi: workaround for gather4 on integer cube maps"
+ cherry-ignore: add "Scons: Add LLVM 5.0 support"
+ cherry-ignore: add "ac/surface: handle S8 on gfx9"
+ cherry-ignore: add "radv: Check for GFX9 for 1D arrays in image_size intrinsic."
+ cherry-ignore: add "glsl/linker: fix output variable overlap check"
+ Update version to 17.1.10
+
+
+
Józef Kucia (1):
+
+ anv: Fix descriptors copying
+
+
+
Matt Turner (2):
+
+ util: Link libmesautil into u_atomic_test
+ util/u_atomic: Add implementation of __sync_val_compare_and_swap_8
+
+
+
Nicolai Hähnle (1):
+
+ radeonsi: apply a mask to gl_SampleMaskIn in the PS prolog
+
+
+
Nicolai Hähnle (4):
+
+ st/glsl_to_tgsi: only the first (inner-most) array reference can be a 2D index
+ amd/common: round cube array slice in ac_prepare_cube_coords
+ radeonsi: set MIP_POINT_PRECLAMP to 0
+ radeonsi: fix array textures layer coordinate
+
+
+
Tapani Pälli (1):
+
+ mesa: free current ComputeProgram state in _mesa_free_context_data
+
+
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes/17.1.6.html mesa-17.3.3/docs/relnotes/17.1.6.html
--- mesa-17.2.4/docs/relnotes/17.1.6.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.1.6.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,225 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.1.6 Release Notes / August 7, 2017
+
+
+Mesa 17.1.6 is a bug fix release which fixes bugs found since the 17.1.5 release.
+
+
+Mesa 17.1.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+971831bc1e748b3e8367eee6b9eb509bad2970e3c2f8520ad25f5caa12ca5491 mesa-17.1.6.tar.gz
+0686deadde1f126b20aa67e47e8c50502043eee4ecdf60d5009ffda3cebfee50 mesa-17.1.6.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+
+Bug 97957 - Awful screen tearing in a separate X server with DRI3
+
+Bug 101683 - Some games hang while loading when compositing is shut off or absent
+
+Bug 101867 - Launch options window renders black in Feral Games in current Mesa trunk
+
+
+
+
+
Changes
+
+
Andres Gomez (1):
+
+ docs: add sha256 checksums for 17.1.5
+
+
+
Bas Nieuwenhuizen (1):
+
+ radv: Don't underflow non-visible VRAM size.
+
+
+
Brian Paul (1):
+
+ svga: fix texture swizzle writemasking
+
+
+
Chad Versace (1):
+
+ anv/image: Fix VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT
+
+
+
Chris Wilson (1):
+
+ i965: Resolve framebuffers before signaling the fence
+
+
+
Connor Abbott (1):
+
+ nir: fix algebraic optimizations
+
+
+
Daniel Stone (1):
+
+ st/dri: Check get-handle return value in queryImage
+
+
+
Dave Airlie (5):
+
+ radv: fix non-0 based layer clears.
+ radv: fix buffer views on SI/CIK.
+ radv/ac: realign SI workaround with radeonsi.
+ radv/ac: port SI TC L1 write corruption fix.
+ radv: for stencil only set Z tile mode index to same value
+
+
+
Emil Velikov (23):
+
+ cherry-ignore: add "anv: Round u_vector element sizes to a power of two"
+ anv: advertise v6 of the wayland surface extension
+ radv: advertise v6 of the wayland surface extension
+ swrast: add dri2ConfigQueryExtension to the correct extension list
+ cherry-ignore: add "anv: Transition MCS buffers from the undefined layout"
+ swr: don't forget to link AVX/AVX2 against pthreads
+ cherry-ignore: add "i965: Fix offset addition in get_isl_surf"
+ cherry-ignore: add "i965: Fix = vs == in MCS aux usage assert."
+ cherry-ignore: add a couple of radeon commits
+ cherry-ignore: add "swr/rast: non-regex knob fallback code for gcc < 4.9"
+ cherry-ignore: add "swr: fix transform feedback logic"
+ cherry-ignore: add a couple of radeonsi/gfx9 commits
+ cherry-ignore: ignore reverted st/mesa commit
+ cherry-ignore: add bindless textures fix
+ cherry-ignore: add "st/glsl_to_tgsi: fix getting the image type for array of structs"
+ cherry-ignore: add yet another bindless textures fix
+ bin/cherry-ignore: add radeonsi "fix of a fix"
+ travis: lower SWR requirement to GCC 4.8, aka std=c++11
+ i965: use strtol to convert the integer deviceID override
+ swr: remove unneeded fallback strcasecmp define
+ cherry-ignore: add a bunch more commits to the list
+ fixup! cherry-ignore: add a bunch more commits to the list
+ Update version to 17.1.6
+
+
+
Eric Anholt (1):
+
+ broadcom/vc4: Prefer blit via rendering to the software fallback.
+
+
+
Eric Engestrom (1):
+
+ configure: only install khrplatform.h if needed
+
+
+
Iago Toral Quiroga (2):
+
+ anv/cmd_buffer: fix off by one error in assertion
+ anv: only expose up to 28 vertex attributes
+
+
+
Ilia Mirkin (1):
+
+ nv50/ir: fix threads calculation for non-compute shaders
+
+
+
Jason Ekstrand (5):
+
+ anv/cmd_buffer: Properly handle render passes with 0 attachments
+ anv: Stop leaking the no_aux sampler surface state
+ anv/image: Add INPUT_ATTACHMENT to the list of required usages
+ nir/vars_to_ssa: Handle missing struct members in foreach_deref_node
+ spirv: Fix SpvImageFormatR16ui
+
+
+
Juan A. Suarez Romero (2):
+
+ anv/pipeline: use unsigned long long constant to check enable vertex inputs
+ anv/pipeline: do not use BITFIELD64_BIT()
+
+
+
Kenneth Graunke (1):
+
+ nir: Use nir_src_copy instead of direct assignments.
+
+
+
Lionel Landwerlin (1):
+
+ i965: perf: flush batchbuffers at the beginning of queries
+
+
+
Lucas Stach (1):
+
+ etnaviv: fix memory leak when BO allocation fails
+
+
+
Marek Olšák (2):
+
+ st/mesa: always unconditionally revalidate main framebuffer after SwapBuffers
+ gallium/radeon: make S_FIXED function signed and move it to shared code
+
+
+
Mark Thompson (1):
+
+ st/va: Fix scaling list ordering for H.265
+
+
+
Nicolai Hähnle (4):
+
+ radeonsi/gfx9: fix crash building monolithic merged ES-GS shader
+ radeonsi: fix detection of DRAW_INDIRECT_MULTI on SI
+ radeonsi/gfx9: reduce max threads per block to 1024 on gfx9+
+ gallium/radeon: fix ARB_query_buffer_object conversion to boolean
+
+
+
Thomas Hellstrom (2):
+
+ loader/dri3: Use dri3_find_back in loader_dri3_swap_buffers_msc
+ dri3: Wait for all pending swapbuffers to be scheduled before touching the front
+
+
+
Tim Rowley (3):
+
+ gallium/util: fix nondeterministic avx512 detection
+ swr/rast: quit using linux-specific gettid()
+ swr/rast: fix scons gen_knobs.h dependency
+
+
+
Timothy Arceri (1):
+
+ nir: fix nir_opt_copy_prop_vars() for arrays of arrays
+
+
+
Wladimir J. van der Laan (1):
+
+ etnaviv: Clear lbl_usage array correctly
+
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes/17.1.7.html mesa-17.3.3/docs/relnotes/17.1.7.html
--- mesa-17.2.4/docs/relnotes/17.1.7.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.1.7.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,148 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.1.7 Release Notes / August 21, 2017
+
+
+Mesa 17.1.7 is a bug fix release which fixes bugs found since the 17.1.6 release.
+
+
+Mesa 17.1.7 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+7ca484fe3194e8185d9a20261845bfd284cc40d0f3fda690d317f85ac7b91af5 mesa-17.1.7.tar.gz
+69f472a874b1122404fa0bd13e2d6bf87eb3b9ad9c21d2f39872a96d83d9e5f5 mesa-17.1.7.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+
+Bug 101334 - AMD SI cards: Some vulkan apps freeze the system
+
+Bug 101766 - Assertion `!"invalid type"' failed when constant expression involves literal of different type
+
+Bug 102024 - FORMAT_FEATURE_SAMPLED_IMAGE_BIT not supported for D16_UNORM and D32_SFLOAT
+
+Bug 102148 - Crash when running qopenglwidget example on mesa llvmpipe win32
+
+Bug 102241 - gallium/wgl: SwapBuffers freezing regularly with swap interval enabled
+
+
+
+
+
Changes
+
+
Andres Gomez (8):
+
+ cherry-ignore: add "swr: use the correct variable for no undefined symbols"
+ cherry-ignore: add "radeon/ac: use ds_swizzle for derivs on si/cik."
+ cherry-ignore: add "configure: remove trailing "-a" in swr architecture teststable: 17.2 nomination only."
+ cherry-ignore: added 17.2 nominations.
+ cherry-ignore: add "radv: Handle VK_ATTACHMENT_UNUSED in color attachments."
+ cherry-ignore: add "virgl: drop precise modifier."
+ cherry-ignore: add "radv: handle 10-bit format clamping workaround."
+ Update version to 17.1.7
+
+
+
Chris Wilson (1):
+
+ i965/blit: Remember to include miptree buffer offset in relocs
+
+
+
Connor Abbott (1):
+
+ ac/nir: fix lsb emission
+
+
+
Dave Airlie (5):
+
+ intel/vec4/gs: reset nr_pull_param if DUAL_INSTANCED compile failed.
+ radv: avoid GPU hangs if someone does a resolve with non-multisample src (v2)
+ radv: fix f16->f32 denorm handling for SI/CIK. (v2)
+ radv: fix MSAA on SI gpus.
+ radv: force cs/ps/l2 flush at end of command stream. (v2)
+
+
+
Emil Velikov (3):
+
+ docs: add sha256 checksums for 17.1.6
+ egl/x11: don't leak xfixes_query in the error path
+ egl: avoid eglCreatePlatform*Surface{EXT,} crash with invalid dpy
+
+
+
Eric Anholt (1):
+
+ util: Fix build on old glibc.
+
+
+
Frank Richter (3):
+
+ st/mesa: fix a null pointer access
+ st/wgl: check for negative delta in wait_swap_interval()
+ gallium/os: fix os_time_get_nano() to roll over less
+
+
+
Ilia Mirkin (3):
+
+ glsl/ast: update rhs in addition to the var's constant_value
+ nv50/ir: fix srcMask computation for TG4 and TXF
+ nv50/ir: fix TXQ srcMask
+
+
+
Jason Ekstrand (1):
+
+ anv/formats: Allow sampling on depth-only formats on gen7
+
+
+
Karol Herbst (1):
+
+ nv50/ir: fix ConstantFolding with saturation
+
+
+
Kenneth Graunke (1):
+
+ i965: Delete pitch alignment assertion in get_blit_intratile_offset_el.
+
+
+
Marek Olšák (2):
+
+ ac: fail shader compilation if libelf is replaced by an incompatible version
+ radeonsi: disable CE by default
+
+
+
Tim Rowley (1):
+
+ swr/rast: Fix invalid casting for calls to Interlocked* functions
+
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes/17.1.8.html mesa-17.3.3/docs/relnotes/17.1.8.html
--- mesa-17.2.4/docs/relnotes/17.1.8.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.1.8.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,115 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.1.8 Release Notes / August 28, 2017
+
+
+Mesa 17.1.8 is a bug fix release which fixes bugs found since the 17.1.7 release.
+
+
+Mesa 17.1.8 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+faa59a677e88fd5224cdfebcdb6ca9ad3e3c64bd562baa8d5c3c1faeef1066b6 mesa-17.1.8.tar.gz
+75ed2eaeae26ddd536150f294386468ae2e1a7717948c41cd14b7875be5269db mesa-17.1.8.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+
+Bug 101910 - [BYT] ES31-CTS.functional.copy_image.non_compressed.viewclass_96_bits.rgb32f_rgb32f
+
+Bug 102308 - segfault in glCompressedTextureSubImage3D
+
+
+
+
+
Changes
+
+
Andres Gomez (6):
+
+ docs: add sha256 checksums for 17.1.7
+ cherry-ignore: cherry-ignore: added 17.2 nominations.
+ cherry-ignore: add "i965/tex: Don't pass samples to miptree_create_for_teximage"
+ cherry-ignore: add "i965: Make a BRW_NEW_FAST_CLEAR_COLOR dirty bit."
+ cherry-ignore: add "egl/drm: Fix misused x and y offsets in swrast_*_image*"
+ Update version to 17.1.8
+
+
+
Christoph Haag (1):
+
+ mesa: only copy requested compressed teximage cubemap faces
+
+
+
Dave Airlie (1):
+
+ radv: don't crash if we have no framebuffer
+
+
+
Ilia Mirkin (2):
+
+ glsl: add a few missing int64 constant propagation cases
+ nv50/ir: properly set sType for TXF ops to U32
+
+
+
Jason Ekstrand (1):
+
+ i965: Stop looking at NewDriverState when emitting 3DSTATE_URB
+
+
+
Kai Chen (1):
+
+ egl/wayland: Use roundtrips when awaiting buffer release
+
+
+
Lionel Landwerlin (1):
+
+ i965: perf: minimize the chances to spread queries across batchbuffers
+
+
+
Marek Olšák (1):
+
+ radeonsi/gfx9: add a temporary workaround for a tessellation driver bug
+
+
+
Tim Rowley (1):
+
+ swr/rast: switch gen_knobs.cpp license
+
+
+
Topi Pohjolainen (1):
+
+ intel/blorp: Adjust intra-tile x when faking rgb with red-only
+
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes/17.1.9.html mesa-17.3.3/docs/relnotes/17.1.9.html
--- mesa-17.2.4/docs/relnotes/17.1.9.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.1.9.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,144 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.1.9 Release Notes / September 8, 2017
+
+
+Mesa 17.1.9 is a bug fix release which fixes bugs found since the 17.1.8 release.
+
+
+Mesa 17.1.9 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+4325401b07b5f44759da781bc8d7c0a4a7244e09a702d16c037090986e07ee22 mesa-17.1.9.tar.gz
+5f51ad94341696097d5df7b838183534478216858ac0fc8de183671a36ffea1a mesa-17.1.9.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+Bug 100613 - Regression in Mesa 17 on s390x (zSystems)
+
+Bug 102454 - glibc 2.26 doesn't provide anymore xlocale.h
+
+Bug 102467 - src/mesa/state_tracker/st_cb_readpixels.c:178]: (warning) Redundant assignment
+
+
+
+
+
Changes
+
+
Andres Gomez (8):
+
+ docs: add sha256 checksums for 17.1.8
+ cherry-ignore: added 17.2 nominations.
+ cherry-ignore: add "nir: Fix system_value_from_intrinsic for subgroups"
+ cherry-ignore: add "i965: Fix crash in fallback GTT mapping."
+ cherry-ignore: add "radeonsi/gfx9: always flush DB metadata on framebuffer changes"
+ cherry-ignore: add "radv: Fix vkCopyImage with both depth and stencil aspects."
+ cherry-ignore: add "radeonsi/gfx9: proper workaround for LS/HS VGPR initialization bug"
+ Update version to 17.1.9
+
+
+
Bas Nieuwenhuizen (3):
+
+ radv: Fix off by one in MAX_VBS assert.
+ radv: Fix sparse BO mapping merging.
+ radv: Actually set the cmd_buffer usage_flags.
+
+
+
Ben Crocker (1):
+
+ llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load
+
+
+
Charmaine Lee (1):
+
+ vbo: fix offset in minmax cache key
+
+
+
Christian Gmeiner (1):
+
+ etnaviv: use correct param for etna_compatible_rs_format(..)
+
+
+
Emil Velikov (3):
+
+ egl: don't NULL deref the .get_capabilities function pointer
+ egl/wayland: plug leaks in dri2_wl_create_window_surface() error path
+ egl/wayland: polish object teardown in dri2_wl_destroy_surface
+
+
+
Eric Engestrom (1):
+
+ util: improve compiler guard
+
+
+
Grazvydas Ignotas (2):
+
+ radv: clear dynamic_shader_stages on create
+ radv: don't assert on empty hash table
+
+
+
Ilia Mirkin (2):
+
+ glsl: fix counting of vertex shader output slots used by explicit vars
+ st/mesa: fix handling of vertex array double inputs
+
+
+
Jason Ekstrand (2):
+
+ anv/formats: Nicely handle unknown VkFormat enums
+ spirv: Add support for the HelperInvocation builtin
+
+
+
Karol Herbst (1):
+
+ nvc0: write 0 to pipeline_statistics.cs_invocations
+
+
+
Michael Olbrich (1):
+
+ egl/dri2: only destroy created objects
+
+
+
Ray Strode (1):
+
+ gallivm: correct channel shift logic on big endian
+
+
+
Roland Scheidegger (1):
+
+ st/mesa: fix view template initialization in try_pbo_readpixels
+
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes/17.2.4.html mesa-17.3.3/docs/relnotes/17.2.4.html
--- mesa-17.2.4/docs/relnotes/17.2.4.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.2.4.html 1970-01-01 00:00:00.000000000 +0000
@@ -1,131 +0,0 @@
-
-
-
-
- Mesa Release Notes
-
-
-
-
-
-
-
-
-
-
Mesa 17.2.4 Release Notes / October 30, 2017
-
-
-Mesa 17.2.4 is a bug fix release which fixes bugs found since the 17.2.3 release.
-
-
-Mesa 17.2.4 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5. OpenGL
-4.5 is only available if requested at context creation
-because compatibility contexts are not supported.
-
-
-
-
SHA256 checksums
-
-TBD
-
-
-
-
New features
-
None
-
-
-
Bug fixes
-
-
-Bug 102774 - [BDW] [Bisected] Absolute constant buffers break VAAPI in mpv
-
-Bug 103388 - Linking libcltgsi.la (llvm/codegen/libclllvm_la-common.lo) fails with "error: no match for 'operator-'" with GCC-7, Mesa from Git and current LLVM revisions
-
-
-
-
-
Changes
-
Andres Gomez (8):
-
- cherry-ignore: configure.ac: rework llvm detection and handling
- cherry-ignore: glsl: fix derived cs variables
- cherry-ignore: added 17.3 nominations.
- cherry-ignore: radv: Don't use vgpr indexing for outputs on GFX9.
- cherry-ignore: radv: Disallow indirect outputs for GS on GFX9 as well.
- cherry-ignore: mesa/bufferobj: don't double negate the range
- cherry-ignore: broadcom/vc5: Propagate vc4 aliasing fix to vc5.
- Update version to 17.2.4
-
-
-
Bas Nieuwenhuizen (1):
-
- ac/nir: Fix nir_texop_lod on GFX for 1D arrays.
-
-
-
Dave Airlie (1):
-
- radv/image: bump all the offset to uint64_t.
-
-
-
Emil Velikov (1):
-
- docs: add sha256 checksums for 17.2.3
-
-
-
Henri Verbeet (1):
-
- vulkan/wsi: Free the event in x11_manage_fifo_queues().
-
-
-
Jan Vesely (1):
-
- clover: Fix compilation after clang r315871
-
-
-
Jason Ekstrand (4):
-
- nir/intrinsics: Set the correct num_indices for load_output
- intel/fs: Handle flag read/write aliasing in needs_src_copy
- anv/pipeline: Call nir_lower_system_valaues after brw_preprocess_nir
- intel/eu: Use EXECUTE_1 for JMPI
-
-
-
Kenneth Graunke (1):
-
- i965: Revert absolute mode for constant buffer pointers.
-
-
-
Marek Olšák (1):
-
- Revert "mesa: fix texture updates for ATI_fragment_shader"
-
-
-
Matthew Nicholls (1):
-
- ac/nir: generate correct instruction for atomic min/max on unsigned images
-
-
-
Michel Dänzer (1):
-
- st/mesa: Initialize textures array in st_framebuffer_validate
-
-
-
Samuel Pitoiset (1):
-
- radv: add the draw count buffer to the list of buffers
-
-
-
Stefan Schake (1):
-
- broadcom/vc4: Fix aliasing issue
-
-
-
-
-
-
diff -Nru mesa-17.2.4/docs/relnotes/17.3.0.html mesa-17.3.3/docs/relnotes/17.3.0.html
--- mesa-17.2.4/docs/relnotes/17.3.0.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.3.0.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,246 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.3.0 Release Notes / December 8. 2017
+
+
+Mesa 17.3.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 17.3.1.
+
+
+Mesa 17.3.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+0cb1ffe2b4637d80f08df3bdfeb300352dcffd8ff4f6711278639b084e3f07f9 mesa-17.3.0.tar.gz
+29a0a3a6c39990d491a1a58ed5c692e596b3bfc6c01d0b45e0b787116c50c6d9 mesa-17.3.0.tar.xz
+
+
+
+
New features
+
+
+Note: some of the new features are only available with certain drivers.
+
+
+
+libtxc_dxtn is now integrated into Mesa. GL_EXT_texture_compression_s3tc and GL_ANGLE_texture_compression_dxt are now always enabled on drivers that support them
+GL_ARB_indirect_parameters on i965/gen7+
+GL_ARB_polygon_offset_clamp on i965, nv50, nvc0, r600, radeonsi, llvmpipe, swr
+GL_ARB_transform_feedback_overflow_query on radeonsi
+GL_ARB_texture_filter_anisotropic on i965, nv50, nvc0, r600, radeonsi
+GL_EXT_memory_object on radeonsi
+GL_EXT_memory_object_fd on radeonsi
+EGL_ANDROID_native_fence_sync on radeonsi with a future kernel (possibly 4.15)
+EGL_IMG_context_priority on i965
+
+
+
Bug fixes
+
+
+
+Bug 97532 - Regression: GLB 2.7 & Glmark-2 GLES versions segfault due to linker precision error (259fc505) on dead variable
+
+Bug 100438 - glsl/ir.cpp:1376: ir_dereference_variable::ir_dereference_variable(ir_variable*): Assertion `var != NULL' failed.
+
+Bug 100613 - Regression in Mesa 17 on s390x (zSystems)
+
+Bug 101334 - AMD SI cards: Some vulkan apps freeze the system
+
+Bug 101378 - interpolateAtSample check for input parameter is too strict
+
+Bug 101655 - Explicit sync support for android
+
+Bug 101691 - gfx corruption on windowed 3d-apps running on dGPU
+
+Bug 101709 - [llvmpipe] piglit gl-1.0-scissor-offscreen regression
+
+Bug 101766 - Assertion `!"invalid type"' failed when constant expression involves literal of different type
+
+Bug 101832 - [PATCH][regression][bisect] Xorg fails to start after f50aa21456d82c8cb6fbaa565835f1acc1720a5d
+
+Bug 101851 - [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'
+
+Bug 101867 - Launch options window renders black in Feral Games in current Mesa trunk
+
+Bug 101876 - SIGSEGV when launching Steam
+
+Bug 101910 - [BYT] ES31-CTS.functional.copy_image.non_compressed.viewclass_96_bits.rgb32f_rgb32f
+
+Bug 101925 - playstore/webview crash
+
+Bug 101941 - Getting different output depending on attribute declaration order
+
+Bug 101961 - Serious Sam Fusion hangs system completely
+
+Bug 101981 - Commit ddc32537d6db69198e88ef0dfe19770bf9daa536 breaks rendering in multiple applications
+
+Bug 101982 - Weston crashes when running an OpenGL program on i965
+
+Bug 101983 - [G33] ES2-CTS.functional.shaders.struct.uniform.sampler_nested* regression
+
+Bug 101989 - ES3-CTS.functional.state_query.integers.viewport_getinteger regression
+
+Bug 102006 - gstreamer vaapih264enc segfault
+
+Bug 102014 - Mesa git build broken by commit bc7f41e11d325280db12e7b9444501357bc13922
+
+Bug 102015 - [Regression,bisected]: Segfaults with various programs
+
+Bug 102024 - FORMAT_FEATURE_SAMPLED_IMAGE_BIT not supported for D16_UNORM and D32_SFLOAT
+
+Bug 102038 - assertion failure in update_framebuffer_size
+
+Bug 102050 - commit b4f639d02a causes build breakage on Android 32bit builds
+
+Bug 102052 - No package 'expat' found
+
+Bug 102062 - Segfault at eglCreateContext in android-x86
+
+Bug 102125 - [softpipe] piglit arb_texture_view-targets regression
+
+Bug 102148 - Crash when running qopenglwidget example on mesa llvmpipe win32
+
+Bug 102177 - [SKL] ES31-CTS.core.sepshaderobjs.StateInteraction fails sporadically
+
+Bug 102201 - [regression, SI] GPU crash in Unigine Valley
+
+Bug 102241 - gallium/wgl: SwapBuffers freezing regularly with swap interval enabled
+
+Bug 102274 - assertion failure in ir_validate.cpp:240
+
+Bug 102308 - segfault in glCompressedTextureSubImage3D
+
+Bug 102358 - WarThunder freezes at start, with activated vsync (vblank_mode=2)
+
+Bug 102377 - PIPE_*_4BYTE_ALIGNED_ONLY caps crashing
+
+Bug 102429 - [regression, SI] Performance decrease in Unigine Valley & Heaven
+
+Bug 102435 - [skl,kbl] [drm] GPU HANG: ecode 9:0:0x86df7cf9, in csgo_linux64 [4947], reason: Hang on rcs, action: reset
+
+Bug 102454 - glibc 2.26 doesn't provide anymore xlocale.h
+
+Bug 102461 - [llvmpipe] piglit glean fragprog1 XPD test 1 regression
+
+Bug 102467 - src/mesa/state_tracker/st_cb_readpixels.c:178]: (warning) Redundant assignment
+
+Bug 102496 - Frontbuffer rendering corruption on mesa master
+
+Bug 102502 - [bisected] Kodi crashes since commit 707d2e8b - gallium: fold u_trim_pipe_prim call from st/mesa to drivers
+
+Bug 102530 - [bisected] Kodi crashes when launching a stream - commit bd2662bf
+
+Bug 102552 - Null dereference due to not checking return value of util_format_description
+
+Bug 102565 - u_debug_stack.c:114: undefined reference to `_Ux86_64_getcontext'
+
+Bug 102573 - fails to build on armel
+
+Bug 102665 - test_glsl_to_tgsi_lifetime.cpp:53:67: error: ‘>>’ should be ‘> >’ within a nested template argument list
+
+Bug 102677 - [OpenGL CTS] KHR-GL45.CommonBugs.CommonBug_PerVertexValidation fails
+
+Bug 102680 - [OpenGL CTS] KHR-GL45.shader_ballot_tests.ShaderBallotBitmasks fails
+
+Bug 102685 - piglit.spec.glsl-1_50.compiler.vs-redeclares-pervertex-out-before-global-redeclaration
+
+Bug 102774 - [BDW] [Bisected] Absolute constant buffers break VAAPI in mpv
+
+Bug 102809 - Rust shadows(?) flash random colours
+
+Bug 102844 - memory leak with glDeleteProgram for shader program type GL_COMPUTE_SHADER
+
+Bug 102847 - swr fail to build with llvm-5.0.0
+
+Bug 102852 - Scons: Support the new Scons 3.0.0
+
+Bug 102904 - piglit and gl45 cts linker tests regressed
+
+Bug 102924 - mesa (git version) images too dark
+
+Bug 102940 - Regression: Vulkan KMS rendering crashes since 17.2
+
+Bug 102955 - HyperZ related rendering issue in ARK: Survival Evolved
+
+Bug 102999 - [BISECTED,REGRESSION] Failing Android EGL dEQP with RGBA configs
+
+Bug 103002 - string_buffer_test.cpp:43: error: ISO C++ forbids initialization of member ‘str1’
+
+Bug 103085 - [ivb byt hsw] piglit.spec.arb_indirect_parameters.tf-count-arrays
+
+Bug 103098 - [OpenGL CTS] KHR-GL45.enhanced_layouts.varying_structure_locations fails
+
+Bug 103101 - [SKL][bisected] DiRT Rally GPU hang
+
+Bug 103115 - [BSW BXT GLK] dEQP-VK.spirv_assembly.instruction.compute.sconvert.int32_to_int64
+
+Bug 103128 - [softpipe] piglit fs-ldexp regression
+
+Bug 103142 - R600g+sb: optimizer apparently stuck in an endless loop
+
+Bug 103214 - GLES CTS functional.state_query.indexed.atomic_counter regression
+
+Bug 103227 - [G965 G45 ILK] ES2-CTS.gtf.GL2ExtensionTests.texture_float.texture_float regression
+
+Bug 103247 - Performance regression: car chase, manhattan
+
+Bug 103253 - blob.h:138:1: error: unknown type name 'ssize_t'
+
+Bug 103265 - [llvmpipe] piglit depth-tex-compare regression
+
+Bug 103323 - Possible unintended error message in file pixel.c line 286
+
+Bug 103388 - Linking libcltgsi.la (llvm/codegen/libclllvm_la-common.lo) fails with "error: no match for 'operator-'" with GCC-7, Mesa from Git and current LLVM revisions
+
+Bug 103393 - glDispatchComputeGroupSizeARB : gl_GlobalInvocationID.x != gl_WorkGroupID.x * gl_LocalGroupSizeARB.x + gl_LocalInvocationID.x
+
+Bug 103412 - gallium/wgl: Another fix to context creation without prior SetPixelFormat()
+
+Bug 103519 - wayland egl apps crash on start with mesa 17.2
+
+Bug 103529 - [GM45] GPU hang with mpv fullscreen (bisected)
+
+Bug 103537 - i965: Shadow of Mordor broken since commit 379b24a40d3d34ffdaaeb1b328f50e28ecb01468 on Haswell
+
+Bug 103544 - Graphical glitches r600 in game this war of mine linux native
+
+Bug 103616 - Increased difference from reference image in shaders
+
+Bug 103628 - [BXT, GLK, BSW] KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks
+
+Bug 103759 - plasma desktop corrupted rendering
+
+Bug 103787 - [BDW,BSW] gpu hang on spec.arb_pipeline_statistics_query.arb_pipeline_statistics_query-comp
+
+Bug 103909 - anv_allocator.c:113:1: error: static declaration of ‘memfd_create’ follows non-static declaration
+
+
+
+
Changes
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes/17.3.1.html mesa-17.3.3/docs/relnotes/17.3.1.html
--- mesa-17.2.4/docs/relnotes/17.3.1.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.3.1.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,191 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.3.1 Release Notes / December 21, 2017
+
+
+Mesa 17.3.1 is a bug fix release which fixes bugs found since the 17.3.0 release.
+
+
+Mesa 17.3.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+b0bb0419dbe3043ed4682a28eaf95721f427ca3f23a3c2a7dc77dbe8a3b6384d mesa-17.3.1.tar.gz
+9ae607e0998a586fb2c866cfc8e45e6f52d1c56cb1b41288253ea83eada824c1 mesa-17.3.1.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+
+Bug 94739 - Mesa 11.1.2 implementation error: bad format MESA_FORMAT_Z_FLOAT32 in _mesa_unpack_uint_24_8_depth_stencil_row
+
+Bug 102710 - vkCmdBlitImage with arrayLayers > 1 fails
+
+Bug 103579 - Vertex shader causes compiler to crash in SPIRV-to-NIR
+
+Bug 103966 - Mesa 17.2.5 implementation error: bad format MESA_FORMAT_Z_FLOAT32 in _mesa_unpack_uint_24_8_depth_stencil_row
+
+Bug 104119 - radv: OpBitFieldInsert produces 0 with a loop counter for Insert
+
+Bug 104143 - r600/sb: clobbers gl_Position -> gl_FragCoord
+
+
+
+
+
Changes
+
+
Alex Smith (1):
+
+ radv: Add LLVM version to the device name string
+
+
+
Bas Nieuwenhuizen (3):
+
+ spirv: Fix loading an entire block at once.
+ radv: Don't advertise VK_EXT_debug_report.
+ radv: Fix multi-layer blits.
+
+
+
Ben Crocker (1):
+
+ docs/llvmpipe: document ppc64le as alternative architecture to x86.
+
+
+
Brian Paul (2):
+
+ xlib: call _mesa_warning() instead of fprintf()
+ gallium/aux: include nr_samples in util_resource_size() computation
+
+
+
Bruce Cherniak (1):
+
+ swr: Fix KNOB_MAX_WORKER_THREADS thread creation override.
+
+
+
Dave Airlie (1):
+
+ radv: port merge tess info from anv
+
+
+
Emil Velikov (5):
+
+ docs: add sha256 checksums for 17.3.0
+ util: scons: wire up the sha1 test
+ cherry-ignore: meson: fix strtof locale support check
+ cherry-ignore: util: add mesa-sha1 test to meson
+ Update version to 17.3.1
+
+
+
Eric Anholt (1):
+
+ broadcom/vc4: Fix handling of GFXH-515 workaround with a start vertex count.
+
+
+
Eric Engestrom (1):
+
+ compiler: use NDEBUG to guard asserts
+
+
+
Fabian Bieler (2):
+
+ glsl: Match order of gl_LightSourceParameters elements.
+ glsl: Fix gl_NormalScale.
+
+
+
Gert Wollny (1):
+
+ r600/sb: do not convert if-blocks that contain indirect array access
+
+
+
James Legg (1):
+
+ nir/opcodes: Fix constant-folding of bitfield_insert
+
+
+
Jason Ekstrand (1):
+
+ i965: Switch over to fully external-or-not MOCS scheme
+
+
+
Juan A. Suarez Romero (1):
+
+ travis: disable Meson build
+
+
+
Kenneth Graunke (2):
+
+ meta: Initialize depth/clear values on declaration.
+ meta: Fix ClearTexture with GL_DEPTH_COMPONENT.
+
+
+
Leo Liu (1):
+
+ radeon/vce: move destroy command before feedback command
+
+
+
Marek Olšák (4):
+
+ radeonsi: flush the context after resource_copy_region for buffer exports
+ radeonsi: allow DMABUF exports for local buffers
+ winsys/amdgpu: disable local BOs again due to worse performance
+ radeonsi: don't call force_dcc_off for buffers
+
+
+
Matt Turner (2):
+
+ util: Assume little endian in the absence of platform-specific handling
+ util: Add a SHA1 unit test program
+
+
+
Nicolai Hähnle (1):
+
+ radeonsi: fix the R600_RESOURCE_FLAG_UNMAPPABLE check
+
+
+
Pierre Moreau (1):
+
+ nvc0/ir: Properly lower 64-bit shifts when the shift value is >32
+
+
+
Timothy Arceri (1):
+
+ glsl: get correct member type when processing xfb ifc arrays
+
+
+
Vadym Shovkoplias (2):
+
+ glx/dri3: Remove unused deviceName variable
+ util/disk_cache: Remove unneeded free() on always null string
+
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes/17.3.2.html mesa-17.3.3/docs/relnotes/17.3.2.html
--- mesa-17.2.4/docs/relnotes/17.3.2.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.3.2.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,109 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.3.2 Release Notes / January 8, 2018
+
+
+Mesa 17.3.2 is a bug fix release which fixes bugs found since the 17.3.1 release.
+
+
+Mesa 17.3.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+f997e80f14c385f9a2ba827c2b74aebf1b7426712ca4a81c631ef9f78e437bf4 mesa-17.3.2.tar.gz
+e2844a13f2d6f8f24bee65804a51c42d8dc6ae9c36cff7ee61d0940e796d64c6 mesa-17.3.2.tar.xz
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+
+Bug 97852 - Unreal Engine corrupted preview viewport
+
+Bug 103801 - [i965] >Observer_ issue
+
+Bug 104288 - Steamroll needs allow_glsl_cross_stage_interpolation_mismatch=true
+
+
+
+
+
Changes
+
+
Bas Nieuwenhuizen (1):
+
+ radv: Fix DCC compatible formats.
+
+
+
Brendan King (1):
+
+ egl: link libEGL against the dynamic version of libglapi
+
+
+
Dave Airlie (6):
+
+ radv/gfx9: add support for 3d images to blit 2d paths
+ radv: handle depth/stencil image copy with layouts better. (v3.1)
+ radv/meta: fix blit paths for depth/stencil (v2.1)
+ radv: fix issue with multisample positions and interp_var_at_sample.
+ radv/gfx9: add 3d sampler image->buffer copy shader. (v3)
+ radv: don't do format replacement on tc compat htile surfaces.
+
+
+
Emil Velikov (2):
+
+ docs: add sha256 checksums for 17.3.1
+ Update version to 17.3.2
+
+
+
Eric Engestrom (1):
+
+ egl: let each platform decided how to handle LIBGL_ALWAYS_SOFTWARE
+
+
+
Rob Herring (1):
+
+ egl/android: Fix build break with dri2_initialize_android _EGLDisplay parameter
+
+
+
Samuel Pitoiset (2):
+
+ radv/gfx9: fix primitive topology when adjacency is used
+ radv: use a faster version for nir_op_pack_half_2x16
+
+
+
Tapani Pälli (2):
+
+ mesa: add AllowGLSLCrossStageInterpolationMismatch workaround
+ drirc: set allow_glsl_cross_stage_interpolation_mismatch for more games
+
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes/17.3.3.html mesa-17.3.3/docs/relnotes/17.3.3.html
--- mesa-17.2.4/docs/relnotes/17.3.3.html 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/docs/relnotes/17.3.3.html 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,150 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 17.3.3 Release Notes / January 18, 2018
+
+
+Mesa 17.3.3 is a bug fix release which fixes bugs found since the 17.3.2 release.
+
+
+Mesa 17.3.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5. OpenGL
+4.5 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+TBD
+
+
+
+
New features
+
None
+
+
+
Bug fixes
+
+
+
+Bug 104214 - Dota crashes when switching from game to desktop
+
+Bug 104492 - Compute Shader: Wrong alignment when assigning struct value to structured SSBO
+
+Bug 104551 - Check if Mako templates for Python are installed
+
+
+
+
+
Changes
+
+
Alex Smith (3):
+
+ anv: Add missing unlock in anv_scratch_pool_alloc
+ anv: Take write mask into account in has_color_buffer_write_enabled
+ anv: Make sure state on primary is correct after CmdExecuteCommands
+
+
+
Andres Gomez (1):
+
+ anv: Import mako templates only during execution of anv_extensions
+
+
+
Bas Nieuwenhuizen (11):
+
+ radv: Invert condition for all samples identical during resolve.
+ radv: Flush caches before subpass resolve.
+ radv: Fix fragment resolve destination offset.
+ radv: Use correct framebuffer size for partial FS resolves.
+ radv: Always use fragment resolve if dest uses DCC.
+ Revert "radv/gfx9: fix block compression texture views."
+ radv: Use correct HTILE expanded words.
+ radv: Allow writing 0 scissors.
+ ac/nir: Handle loading data from compact arrays.
+ radv: Invalidate L1 for VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT.
+ ac/nir: Sanitize location_frac for local variables.
+
+
+
Dave Airlie (8):
+
+ radv: fix events on compute queues.
+ radv: fix pipeline statistics end query on compute queue
+ radv/gfx9: fix 3d image to image transfers on compute queues.
+ radv/gfx9: fix 3d image clears on compute queues
+ radv/gfx9: fix buffer to image for 3d images on compute queues
+ radv/gfx9: fix block compression texture views.
+ radv/gfx9: use a bigger hammer to flush cb/db caches.
+ radv/gfx9: use correct swizzle parameter to work out border swizzle.
+
+
+
Emil Velikov (1):
+
+ docs: add sha256 checksums for 17.3.2
+
+
+
Florian Will (1):
+
+ glsl: Respect std430 layout in lower_buffer_access
+
+
+
Juan A. Suarez Romero (6):
+
+ cherry-ignore: intel/fs: Use the original destination region for int MUL lowering
+ cherry-ignore: i965/fs: Use UW types when using V immediates
+ cherry-ignore: main: Clear shader program data whenever ProgramBinary is called
+ cherry-ignore: egl: pass the dri2_dpy to the $plat_teardown functions
+ cherry-ignore: vulkan/wsi: free cmd pools
+ Update version to 17.3.3
+
+
+
Józef Kucia (1):
+
+ radeonsi: fix alpha-to-coverage if color writes are disabled
+
+
+
Kenneth Graunke (2):
+
+ i965: Require space for MI_BATCHBUFFER_END.
+ i965: Torch public intel_batchbuffer_emit_dword/float helpers.
+
+
+
Lucas Stach (1):
+
+ etnaviv: disable in-place resolve for non-supertiled surfaces
+
+
+
Samuel Iglesias Gonsálvez (1):
+
+ anv: VkDescriptorSetLayoutBinding can have descriptorCount == 0
+
+
+
Thomas Hellstrom (1):
+
+ loader/dri3: Avoid freeing renderbuffers in use
+
+
+
Tim Rowley (1):
+
+ swr/rast: fix invalid sign masks in avx512 simdlib code
+
+
+
+
+
+
diff -Nru mesa-17.2.4/docs/relnotes.html mesa-17.3.3/docs/relnotes.html
--- mesa-17.2.4/docs/relnotes.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/relnotes.html 2018-01-18 21:30:28.000000000 +0000
@@ -21,6 +21,15 @@
Note, path set must exist before running for dumping or replacing to work.
When both are set, these paths should be different so the dumped shaders do
-not clobber the replacement shaders.
+not clobber the replacement shaders. Also, the filenames of the replacement shaders
+should match the filenames of the corresponding dumped shaders.
+
+
+Capturing Shaders
+
+
+Setting MESA_SHADER_CAPTURE_PATH to a directory will cause the compiler
+to write .shader_test files for use with
+shader-db , a tool
+which compiler developers can use to gather statistics about shaders
+(instructions, cycles, memory accesses, and so on).
+
+
+Notably, this captures linked GLSL shaders - with all stages together -
+as well as ARB programs.
GLSL Version
diff -Nru mesa-17.2.4/docs/sourcetree.html mesa-17.3.3/docs/sourcetree.html
--- mesa-17.2.4/docs/sourcetree.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/sourcetree.html 2018-01-18 21:30:28.000000000 +0000
@@ -145,7 +145,7 @@
xvmc - XvMC state tracker
vdpau - VDPAU state tracker
va - VA-API state tracker
- omx - OpenMAX state tracker
+ omx_bellagio - OpenMAX Bellagio state tracker
winsys -
diff -Nru mesa-17.2.4/docs/specs/enums.txt mesa-17.3.3/docs/specs/enums.txt
--- mesa-17.2.4/docs/specs/enums.txt 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/specs/enums.txt 2018-01-18 21:30:28.000000000 +0000
@@ -46,14 +46,14 @@
GL_DEBUG_ASSERT_MESA 0x875B
GL_MESA_program_debug: (obsolete)
- GL_FRAGMENT_PROGRAM_CALLBACK_MESA 0x????
- GL_VERTEX_PROGRAM_CALLBACK_MESA 0x????
- GL_FRAGMENT_PROGRAM_POSITION_MESA 0x????
- GL_VERTEX_PROGRAM_POSITION_MESA 0x????
- GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA 0x????
- GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA 0x????
- GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA 0x????
- GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA 0x????
+ GL_FRAGMENT_PROGRAM_POSITION_MESA 0x8BB0
+ GL_FRAGMENT_PROGRAM_CALLBACK_MESA 0x8BB1
+ GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA 0x8BB2
+ GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA 0x8BB3
+ GL_VERTEX_PROGRAM_POSITION_MESA 0x8BB4
+ GL_VERTEX_PROGRAM_CALLBACK_MESA 0x8BB5
+ GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA 0x8BB6
+ GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA 0x8BB7
GL_MESAX_texture_stack:
GL_TEXTURE_1D_STACK_MESAX 0x8759
@@ -63,6 +63,11 @@
GL_TEXTURE_1D_STACK_BINDING_MESAX 0x875D
GL_TEXTURE_2D_STACK_BINDING_MESAX 0x875E
+GL_MESA_tile_raster_order
+ GL_TILE_RASTER_ORDER_FIXED_MESA 0x8BB8
+ GL_TILE_RASTER_ORDER_INCREASING_X_MESA 0x8BB9
+ GL_TILE_RASTER_ORDER_INCREASING_Y_MESA 0x8BBA
+
EGL_MESA_drm_image
EGL_DRM_BUFFER_FORMAT_MESA 0x31D0
EGL_DRM_BUFFER_USE_MESA 0x31D1
diff -Nru mesa-17.2.4/docs/specs/OLD/MESA_program_debug.spec mesa-17.3.3/docs/specs/OLD/MESA_program_debug.spec
--- mesa-17.2.4/docs/specs/OLD/MESA_program_debug.spec 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/specs/OLD/MESA_program_debug.spec 2018-01-18 21:30:28.000000000 +0000
@@ -133,7 +133,7 @@
GetFloatv and GetIntegerv:
FRAGMENT_PROGRAM_POSITION_MESA 0x8bb0
- VERTEX_PROGRAM_POSITION_MESA 0x8bb4
+ VERTEX_PROGRAM_POSITION_MESA 0x8bb5
Accepted by the parameter of GetPointerv:
diff -Nru mesa-17.2.4/docs/submittingpatches.html mesa-17.3.3/docs/submittingpatches.html
--- mesa-17.2.4/docs/submittingpatches.html 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/docs/submittingpatches.html 2018-01-18 21:30:28.000000000 +0000
@@ -151,6 +151,18 @@
to check for regressions.
+
+As mentioned at the begining, patches should be bisectable.
+A good way to test this is to make use of the `git rebase` command,
+to run your tests on each commit. Assuming your branch is based off
+origin/master
, you can run:
+
+$ git rebase --interactive --exec "make check" origin/master
+
+replacing "make check"
with whatever other test you want to
+run.
+
+
Mailing Patches
diff -Nru mesa-17.2.4/include/drm-uapi/drm_fourcc.h mesa-17.3.3/include/drm-uapi/drm_fourcc.h
--- mesa-17.2.4/include/drm-uapi/drm_fourcc.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/drm-uapi/drm_fourcc.h 2018-01-18 21:30:28.000000000 +0000
@@ -185,6 +185,8 @@
#define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
/* add more to the end as needed */
+#define DRM_FORMAT_RESERVED ((1ULL << 56) - 1)
+
#define fourcc_mod_code(vendor, val) \
((((__u64)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | (val & 0x00ffffffffffffffULL))
@@ -197,6 +199,15 @@
*/
/*
+ * Invalid Modifier
+ *
+ * This modifier can be used as a sentinel to terminate the format modifiers
+ * list, or to initialize a variable with an invalid modifier. It might also be
+ * used to report an error back to userspace for certain APIs.
+ */
+#define DRM_FORMAT_MOD_INVALID fourcc_mod_code(NONE, DRM_FORMAT_RESERVED)
+
+/*
* Linear Layout
*
* Just plain linear layout. Note that this is different from no specifying any
@@ -253,6 +264,26 @@
#define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3)
/*
+ * Intel color control surface (CCS) for render compression
+ *
+ * The framebuffer format must be one of the 8:8:8:8 RGB formats.
+ * The main surface will be plane index 0 and must be Y/Yf-tiled,
+ * the CCS will be plane index 1.
+ *
+ * Each CCS tile matches a 1024x512 pixel area of the main surface.
+ * To match certain aspects of the 3D hardware the CCS is
+ * considered to be made up of normal 128Bx32 Y tiles, Thus
+ * the CCS pitch must be specified in multiples of 128 bytes.
+ *
+ * In reality the CCS tile appears to be a 64Bx64 Y tile, composed
+ * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks.
+ * But that fact is not relevant unless the memory is accessed
+ * directly.
+ */
+#define I915_FORMAT_MOD_Y_TILED_CCS fourcc_mod_code(INTEL, 4)
+#define I915_FORMAT_MOD_Yf_TILED_CCS fourcc_mod_code(INTEL, 5)
+
+/*
* Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
*
* Macroblocks are laid in a Z-shape, and each pixel data is following the
diff -Nru mesa-17.2.4/include/drm-uapi/drm.h mesa-17.3.3/include/drm-uapi/drm.h
--- mesa-17.2.4/include/drm-uapi/drm.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/drm-uapi/drm.h 2018-01-18 21:30:28.000000000 +0000
@@ -694,6 +694,7 @@
struct drm_syncobj_create {
__u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
__u32 flags;
};
@@ -712,6 +713,24 @@
__u32 pad;
};
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+struct drm_syncobj_wait {
+ __u64 handles;
+ /* absolute timeout */
+ __s64 timeout_nsec;
+ __u32 count_handles;
+ __u32 flags;
+ __u32 first_signaled; /* only valid when not waiting all */
+ __u32 pad;
+};
+
+struct drm_syncobj_array {
+ __u64 handles;
+ __u32 count_handles;
+ __u32 pad;
+};
+
#if defined(__cplusplus)
}
#endif
@@ -834,6 +853,9 @@
#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy)
#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle)
#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array)
/**
* Device specific ioctls should only be in their respective headers
diff -Nru mesa-17.2.4/include/drm-uapi/drm_mode.h mesa-17.3.3/include/drm-uapi/drm_mode.h
--- mesa-17.2.4/include/drm-uapi/drm_mode.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/drm-uapi/drm_mode.h 2018-01-18 21:30:28.000000000 +0000
@@ -712,6 +712,56 @@
__u64 user_data;
};
+struct drm_format_modifier_blob {
+#define FORMAT_BLOB_CURRENT 1
+ /* Version of this blob format */
+ __u32 version;
+
+ /* Flags */
+ __u32 flags;
+
+ /* Number of fourcc formats supported */
+ __u32 count_formats;
+
+ /* Where in this blob the formats exist (in bytes) */
+ __u32 formats_offset;
+
+ /* Number of drm_format_modifiers */
+ __u32 count_modifiers;
+
+ /* Where in this blob the modifiers exist (in bytes) */
+ __u32 modifiers_offset;
+
+ /* __u32 formats[] */
+ /* struct drm_format_modifier modifiers[] */
+};
+
+struct drm_format_modifier {
+ /* Bitmask of formats in get_plane format list this info applies to. The
+ * offset allows a sliding window of which 64 formats (bits).
+ *
+ * Some examples:
+ * In today's world with < 65 formats, and formats 0, and 2 are
+ * supported
+ * 0x0000000000000005
+ * ^-offset = 0, formats = 5
+ *
+ * If the number formats grew to 128, and formats 98-102 are
+ * supported with the modifier:
+ *
+ * 0x0000003c00000000 0000000000000000
+ * ^
+ * |__offset = 64, formats = 0x3c00000000
+ *
+ */
+ __u64 formats;
+ __u32 offset;
+ __u32 pad;
+
+ /* The modifier that applies to the >get_plane format list bitmask. */
+ __u64 modifier;
+};
+
/**
* Create a new 'blob' data property, copying length bytes from data pointer,
* and returning new blob ID.
diff -Nru mesa-17.2.4/include/drm-uapi/i915_drm.h mesa-17.3.3/include/drm-uapi/i915_drm.h
--- mesa-17.2.4/include/drm-uapi/i915_drm.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/drm-uapi/i915_drm.h 2018-01-18 21:30:28.000000000 +0000
@@ -260,6 +260,8 @@
#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34
#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35
#define DRM_I915_PERF_OPEN 0x36
+#define DRM_I915_PERF_ADD_CONFIG 0x37
+#define DRM_I915_PERF_REMOVE_CONFIG 0x38
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -315,6 +317,8 @@
#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
+#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
+#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -393,10 +397,20 @@
#define I915_PARAM_MIN_EU_IN_POOL 39
#define I915_PARAM_MMAP_GTT_VERSION 40
-/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
* priorities and the driver will attempt to execute batches in priority order.
+ * The param returns a capability bitmask, nonzero implies that the scheduler
+ * is enabled, with different features present according to the mask.
+ *
+ * The initial priority for each batch is supplied by the context and is
+ * controlled via I915_CONTEXT_PARAM_PRIORITY.
*/
#define I915_PARAM_HAS_SCHEDULER 41
+#define I915_SCHEDULER_CAP_ENABLED (1ul << 0)
+#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1)
+#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
+
#define I915_PARAM_HUC_STATUS 42
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
@@ -431,6 +445,11 @@
*/
#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
+
typedef struct drm_i915_getparam {
__s32 param;
/*
@@ -812,6 +831,17 @@
__u64 rsvd2;
};
+struct drm_i915_gem_exec_fence {
+ /**
+ * User's handle for a drm_syncobj to wait on or signal.
+ */
+ __u32 handle;
+
+#define I915_EXEC_FENCE_WAIT (1<<0)
+#define I915_EXEC_FENCE_SIGNAL (1<<1)
+ __u32 flags;
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
@@ -826,7 +856,11 @@
__u32 DR1;
__u32 DR4;
__u32 num_cliprects;
- /** This is a struct drm_clip_rect *cliprects */
+ /**
+ * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
+ * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
+ * struct drm_i915_gem_exec_fence *fences.
+ */
__u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (7<<0)
#define I915_EXEC_DEFAULT (0<<0)
@@ -927,7 +961,14 @@
* element).
*/
#define I915_EXEC_BATCH_FIRST (1<<18)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
+
+/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
+ * define an array of i915_gem_exec_fence structures which specify a set of
+ * dma fences to wait upon or signal.
+ */
+#define I915_EXEC_FENCE_ARRAY (1<<19)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1277,14 +1318,16 @@
* be specified
*/
__u64 offset;
+#define I915_REG_READ_8B_WA (1ul << 0)
+
__u64 val; /* Return value */
};
/* Known registers:
*
* Render engine timestamp - 0x2358 + 64bit - gen7+
* - Note this register returns an invalid value if using the default
- * single instruction 8byte read, in order to workaround that use
- * offset (0x2538 | 1) instead.
+ * single instruction 8byte read, in order to workaround that pass
+ * flag I915_REG_READ_8B_WA in offset field.
*
*/
@@ -1327,6 +1370,10 @@
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
#define I915_CONTEXT_PARAM_BANNABLE 0x5
+#define I915_CONTEXT_PARAM_PRIORITY 0x6
+#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
+#define I915_CONTEXT_DEFAULT_PRIORITY 0
+#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
__u64 value;
};
@@ -1467,6 +1514,27 @@
DRM_I915_PERF_RECORD_MAX /* non-ABI */
};
+/**
+ * Structure to upload perf dynamic configuration into the kernel.
+ */
+struct drm_i915_perf_oa_config {
+ /** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+ char uuid[36];
+
+ __u32 n_mux_regs;
+ __u32 n_boolean_regs;
+ __u32 n_flex_regs;
+
+ /*
+ * These fields are pointers to tuples of u32 values (register
+ * address, value). For example the expected length of the buffer
+ * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+ */
+ __u64 mux_regs_ptr;
+ __u64 boolean_regs_ptr;
+ __u64 flex_regs_ptr;
+};
+
#if defined(__cplusplus)
}
#endif
diff -Nru mesa-17.2.4/include/drm-uapi/README mesa-17.3.3/include/drm-uapi/README
--- mesa-17.2.4/include/drm-uapi/README 2018-02-01 16:17:31.000000000 +0000
+++ mesa-17.3.3/include/drm-uapi/README 2018-02-01 16:17:32.000000000 +0000
@@ -3,6 +3,9 @@
Whenever either of those driver needs new definitions for new kernel
APIs, these files should be updated.
+These files in master should only be updated once the changes have landed
+in the drm-next tree.
+
You can copy files installed after running this from the kernel
repository, at version the drivers require :
@@ -10,9 +13,9 @@
The last update was done at the following kernel commit :
-commit 6d61e70ccc21606ffb8a0a03bd3aba24f659502b
-Merge: 338ffbf7cb5e c0bc126f97fb
+commit 7846b12fe0b5feab5446d892f41b5140c1419109
+Merge: 7ebdb0d d78acfe
Author: Dave Airlie
-Date: Tue Jun 27 07:24:49 2017 +1000
+Date: Tue Aug 29 10:38:14 2017 +1000
- Backmerge tag 'v4.12-rc7' into drm-next
+ Merge branch 'drm-vmwgfx-next' of git://people.freedesktop.org/~syeh/repos_linux into drm-next
diff -Nru mesa-17.2.4/include/drm-uapi/vc4_drm.h mesa-17.3.3/include/drm-uapi/vc4_drm.h
--- mesa-17.2.4/include/drm-uapi/vc4_drm.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/drm-uapi/vc4_drm.h 2018-01-18 21:30:28.000000000 +0000
@@ -40,6 +40,7 @@
#define DRM_VC4_GET_PARAM 0x07
#define DRM_VC4_SET_TILING 0x08
#define DRM_VC4_GET_TILING 0x09
+#define DRM_VC4_LABEL_BO 0x0a
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -51,6 +52,7 @@
#define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
#define DRM_IOCTL_VC4_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
#define DRM_IOCTL_VC4_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
+#define DRM_IOCTL_VC4_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
struct drm_vc4_submit_rcl_surface {
__u32 hindex; /* Handle index, or ~0 if not present. */
@@ -153,6 +155,16 @@
__u32 pad:24;
#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0)
+/* By default, the kernel gets to choose the order that the tiles are
+ * rendered in. If this is set, then the tiles will be rendered in a
+ * raster order, with the right-to-left vs left-to-right and
+ * top-to-bottom vs bottom-to-top dictated by
+ * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*. This allows overlapping
+ * blits to be implemented using the 3D engine.
+ */
+#define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3)
__u32 flags;
/* Returned value of the seqno of this render job (for the
@@ -292,6 +304,7 @@
#define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3
#define DRM_VC4_PARAM_SUPPORTS_ETC1 4
#define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5
+#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6
struct drm_vc4_get_param {
__u32 param;
@@ -311,6 +324,15 @@
__u64 modifier;
};
+/**
+ * struct drm_vc4_label_bo - Attach a name to a BO for debug purposes.
+ */
+struct drm_vc4_label_bo {
+ __u32 handle;
+ __u32 len;
+ __u64 name;
+};
+
#if defined(__cplusplus)
}
#endif
diff -Nru mesa-17.2.4/include/EGL/eglplatform.h mesa-17.3.3/include/EGL/eglplatform.h
--- mesa-17.2.4/include/EGL/eglplatform.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/EGL/eglplatform.h 2018-01-18 21:30:28.000000000 +0000
@@ -97,8 +97,7 @@
#elif defined(__ANDROID__) || defined(ANDROID)
-#include
-
+struct ANativeWindow;
struct egl_native_pixmap_t;
typedef struct ANativeWindow* EGLNativeWindowType;
diff -Nru mesa-17.2.4/include/GL/glcorearb.h mesa-17.3.3/include/GL/glcorearb.h
--- mesa-17.2.4/include/GL/glcorearb.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/GL/glcorearb.h 2018-01-18 21:30:28.000000000 +0000
@@ -578,15 +578,17 @@
#define GL_TEXTURE_DEPTH_SIZE 0x884A
#define GL_TEXTURE_COMPARE_MODE 0x884C
#define GL_TEXTURE_COMPARE_FUNC 0x884D
-#define GL_FUNC_ADD 0x8006
-#define GL_FUNC_SUBTRACT 0x800A
-#define GL_FUNC_REVERSE_SUBTRACT 0x800B
-#define GL_MIN 0x8007
-#define GL_MAX 0x8008
+#define GL_BLEND_COLOR 0x8005
+#define GL_BLEND_EQUATION 0x8009
#define GL_CONSTANT_COLOR 0x8001
#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002
#define GL_CONSTANT_ALPHA 0x8003
#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004
+#define GL_FUNC_ADD 0x8006
+#define GL_FUNC_REVERSE_SUBTRACT 0x800B
+#define GL_FUNC_SUBTRACT 0x800A
+#define GL_MIN 0x8007
+#define GL_MAX 0x8008
typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSPROC) (GLenum mode, const GLint *first, const GLsizei *count, GLsizei drawcount);
typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSPROC) (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount);
@@ -2893,6 +2895,42 @@
#endif
#endif /* GL_VERSION_4_5 */
+#ifndef GL_VERSION_4_6
+#define GL_VERSION_4_6 1
+#define GL_SHADER_BINARY_FORMAT_SPIR_V 0x9551
+#define GL_SPIR_V_BINARY 0x9552
+#define GL_PARAMETER_BUFFER 0x80EE
+#define GL_PARAMETER_BUFFER_BINDING 0x80EF
+#define GL_CONTEXT_FLAG_NO_ERROR_BIT 0x00000008
+#define GL_VERTICES_SUBMITTED 0x82EE
+#define GL_PRIMITIVES_SUBMITTED 0x82EF
+#define GL_VERTEX_SHADER_INVOCATIONS 0x82F0
+#define GL_TESS_CONTROL_SHADER_PATCHES 0x82F1
+#define GL_TESS_EVALUATION_SHADER_INVOCATIONS 0x82F2
+#define GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED 0x82F3
+#define GL_FRAGMENT_SHADER_INVOCATIONS 0x82F4
+#define GL_COMPUTE_SHADER_INVOCATIONS 0x82F5
+#define GL_CLIPPING_INPUT_PRIMITIVES 0x82F6
+#define GL_CLIPPING_OUTPUT_PRIMITIVES 0x82F7
+#define GL_POLYGON_OFFSET_CLAMP 0x8E1B
+#define GL_SPIR_V_EXTENSIONS 0x9553
+#define GL_NUM_SPIR_V_EXTENSIONS 0x9554
+#define GL_TEXTURE_MAX_ANISOTROPY 0x84FE
+#define GL_MAX_TEXTURE_MAX_ANISOTROPY 0x84FF
+#define GL_TRANSFORM_FEEDBACK_OVERFLOW 0x82EC
+#define GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW 0x82ED
+typedef void (APIENTRYP PFNGLSPECIALIZESHADERPROC) (GLuint shader, const GLchar *pEntryPoint, GLuint numSpecializationConstants, const GLuint *pConstantIndex, const GLuint *pConstantValue);
+typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC) (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC) (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+typedef void (APIENTRYP PFNGLPOLYGONOFFSETCLAMPPROC) (GLfloat factor, GLfloat units, GLfloat clamp);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glSpecializeShader (GLuint shader, const GLchar *pEntryPoint, GLuint numSpecializationConstants, const GLuint *pConstantIndex, const GLuint *pConstantValue);
+GLAPI void APIENTRY glMultiDrawArraysIndirectCount (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+GLAPI void APIENTRY glMultiDrawElementsIndirectCount (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+GLAPI void APIENTRY glPolygonOffsetClamp (GLfloat factor, GLfloat units, GLfloat clamp);
+#endif
+#endif /* GL_VERSION_4_6 */
+
#ifndef GL_ARB_ES2_compatibility
#define GL_ARB_ES2_compatibility 1
#endif /* GL_ARB_ES2_compatibility */
@@ -3306,19 +3344,17 @@
#ifndef GL_ARB_imaging
#define GL_ARB_imaging 1
-#define GL_BLEND_COLOR 0x8005
-#define GL_BLEND_EQUATION 0x8009
#endif /* GL_ARB_imaging */
#ifndef GL_ARB_indirect_parameters
#define GL_ARB_indirect_parameters 1
#define GL_PARAMETER_BUFFER_ARB 0x80EE
#define GL_PARAMETER_BUFFER_BINDING_ARB 0x80EF
-typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTARBPROC) (GLenum mode, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
-typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTARBPROC) (GLenum mode, GLenum type, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTARBPROC) (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTARBPROC) (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
#ifdef GL_GLEXT_PROTOTYPES
-GLAPI void APIENTRY glMultiDrawArraysIndirectCountARB (GLenum mode, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
-GLAPI void APIENTRY glMultiDrawElementsIndirectCountARB (GLenum mode, GLenum type, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+GLAPI void APIENTRY glMultiDrawArraysIndirectCountARB (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+GLAPI void APIENTRY glMultiDrawElementsIndirectCountARB (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
#endif
#endif /* GL_ARB_indirect_parameters */
@@ -3396,6 +3432,10 @@
#define GL_PIXEL_UNPACK_BUFFER_BINDING_ARB 0x88EF
#endif /* GL_ARB_pixel_buffer_object */
+#ifndef GL_ARB_polygon_offset_clamp
+#define GL_ARB_polygon_offset_clamp 1
+#endif /* GL_ARB_polygon_offset_clamp */
+
#ifndef GL_ARB_post_depth_coverage
#define GL_ARB_post_depth_coverage 1
#endif /* GL_ARB_post_depth_coverage */
@@ -3625,6 +3665,10 @@
#define GL_ARB_sparse_texture_clamp 1
#endif /* GL_ARB_sparse_texture_clamp */
+#ifndef GL_ARB_spirv_extensions
+#define GL_ARB_spirv_extensions 1
+#endif /* GL_ARB_spirv_extensions */
+
#ifndef GL_ARB_stencil_texturing
#define GL_ARB_stencil_texturing 1
#endif /* GL_ARB_stencil_texturing */
@@ -3690,6 +3734,10 @@
#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900F
#endif /* GL_ARB_texture_cube_map_array */
+#ifndef GL_ARB_texture_filter_anisotropic
+#define GL_ARB_texture_filter_anisotropic 1
+#endif /* GL_ARB_texture_filter_anisotropic */
+
#ifndef GL_ARB_texture_filter_minmax
#define GL_ARB_texture_filter_minmax 1
#define GL_TEXTURE_REDUCTION_MODE_ARB 0x9366
@@ -3851,6 +3899,16 @@
#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008
#endif /* GL_KHR_no_error */
+#ifndef GL_KHR_parallel_shader_compile
+#define GL_KHR_parallel_shader_compile 1
+#define GL_MAX_SHADER_COMPILER_THREADS_KHR 0x91B0
+#define GL_COMPLETION_STATUS_KHR 0x91B1
+typedef void (APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSKHRPROC) (GLuint count);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glMaxShaderCompilerThreadsKHR (GLuint count);
+#endif
+#endif /* GL_KHR_parallel_shader_compile */
+
#ifndef GL_KHR_robust_buffer_access_behavior
#define GL_KHR_robust_buffer_access_behavior 1
#endif /* GL_KHR_robust_buffer_access_behavior */
@@ -4752,6 +4810,12 @@
#define GL_BLEND_ADVANCED_COHERENT_NV 0x9285
#endif /* GL_NV_blend_equation_advanced_coherent */
+#ifndef GL_NV_blend_minmax_factor
+#define GL_NV_blend_minmax_factor 1
+#define GL_FACTOR_MIN_AMD 0x901C
+#define GL_FACTOR_MAX_AMD 0x901D
+#endif /* GL_NV_blend_minmax_factor */
+
#ifndef GL_NV_clip_space_w_scaling
#define GL_NV_clip_space_w_scaling 1
#define GL_VIEWPORT_POSITION_W_SCALE_NV 0x937C
@@ -5442,6 +5506,10 @@
#endif
#endif /* GL_NV_texture_barrier */
+#ifndef GL_NV_texture_rectangle_compressed
+#define GL_NV_texture_rectangle_compressed 1
+#endif /* GL_NV_texture_rectangle_compressed */
+
#ifndef GL_NV_uniform_buffer_unified_memory
#define GL_NV_uniform_buffer_unified_memory 1
#define GL_UNIFORM_BUFFER_UNIFIED_NV 0x936E
diff -Nru mesa-17.2.4/include/GL/glext.h mesa-17.3.3/include/GL/glext.h
--- mesa-17.2.4/include/GL/glext.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/GL/glext.h 2018-01-18 21:30:28.000000000 +0000
@@ -51,7 +51,7 @@
#define GLAPI extern
#endif
-#define GL_GLEXT_VERSION 20170608
+#define GL_GLEXT_VERSION 20171010
/* Generated C header for:
* API: gl
@@ -353,15 +353,17 @@
#define GL_TEXTURE_FILTER_CONTROL 0x8500
#define GL_DEPTH_TEXTURE_MODE 0x884B
#define GL_COMPARE_R_TO_TEXTURE 0x884E
-#define GL_FUNC_ADD 0x8006
-#define GL_FUNC_SUBTRACT 0x800A
-#define GL_FUNC_REVERSE_SUBTRACT 0x800B
-#define GL_MIN 0x8007
-#define GL_MAX 0x8008
+#define GL_BLEND_COLOR 0x8005
+#define GL_BLEND_EQUATION 0x8009
#define GL_CONSTANT_COLOR 0x8001
#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002
#define GL_CONSTANT_ALPHA 0x8003
#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004
+#define GL_FUNC_ADD 0x8006
+#define GL_FUNC_REVERSE_SUBTRACT 0x800B
+#define GL_FUNC_SUBTRACT 0x800A
+#define GL_MIN 0x8007
+#define GL_MAX 0x8008
typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSPROC) (GLenum mode, const GLint *first, const GLsizei *count, GLsizei drawcount);
typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSPROC) (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount);
@@ -2865,6 +2867,42 @@
#endif
#endif /* GL_VERSION_4_5 */
+#ifndef GL_VERSION_4_6
+#define GL_VERSION_4_6 1
+#define GL_SHADER_BINARY_FORMAT_SPIR_V 0x9551
+#define GL_SPIR_V_BINARY 0x9552
+#define GL_PARAMETER_BUFFER 0x80EE
+#define GL_PARAMETER_BUFFER_BINDING 0x80EF
+#define GL_CONTEXT_FLAG_NO_ERROR_BIT 0x00000008
+#define GL_VERTICES_SUBMITTED 0x82EE
+#define GL_PRIMITIVES_SUBMITTED 0x82EF
+#define GL_VERTEX_SHADER_INVOCATIONS 0x82F0
+#define GL_TESS_CONTROL_SHADER_PATCHES 0x82F1
+#define GL_TESS_EVALUATION_SHADER_INVOCATIONS 0x82F2
+#define GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED 0x82F3
+#define GL_FRAGMENT_SHADER_INVOCATIONS 0x82F4
+#define GL_COMPUTE_SHADER_INVOCATIONS 0x82F5
+#define GL_CLIPPING_INPUT_PRIMITIVES 0x82F6
+#define GL_CLIPPING_OUTPUT_PRIMITIVES 0x82F7
+#define GL_POLYGON_OFFSET_CLAMP 0x8E1B
+#define GL_SPIR_V_EXTENSIONS 0x9553
+#define GL_NUM_SPIR_V_EXTENSIONS 0x9554
+#define GL_TEXTURE_MAX_ANISOTROPY 0x84FE
+#define GL_MAX_TEXTURE_MAX_ANISOTROPY 0x84FF
+#define GL_TRANSFORM_FEEDBACK_OVERFLOW 0x82EC
+#define GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW 0x82ED
+typedef void (APIENTRYP PFNGLSPECIALIZESHADERPROC) (GLuint shader, const GLchar *pEntryPoint, GLuint numSpecializationConstants, const GLuint *pConstantIndex, const GLuint *pConstantValue);
+typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC) (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC) (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+typedef void (APIENTRYP PFNGLPOLYGONOFFSETCLAMPPROC) (GLfloat factor, GLfloat units, GLfloat clamp);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glSpecializeShader (GLuint shader, const GLchar *pEntryPoint, GLuint numSpecializationConstants, const GLuint *pConstantIndex, const GLuint *pConstantValue);
+GLAPI void APIENTRY glMultiDrawArraysIndirectCount (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+GLAPI void APIENTRY glMultiDrawElementsIndirectCount (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+GLAPI void APIENTRY glPolygonOffsetClamp (GLfloat factor, GLfloat units, GLfloat clamp);
+#endif
+#endif /* GL_VERSION_4_6 */
+
#ifndef GL_ARB_ES2_compatibility
#define GL_ARB_ES2_compatibility 1
#endif /* GL_ARB_ES2_compatibility */
@@ -3468,8 +3506,6 @@
#ifndef GL_ARB_imaging
#define GL_ARB_imaging 1
-#define GL_BLEND_COLOR 0x8005
-#define GL_BLEND_EQUATION 0x8009
#define GL_CONVOLUTION_1D 0x8010
#define GL_CONVOLUTION_2D 0x8011
#define GL_SEPARABLE_2D 0x8012
@@ -3606,11 +3642,11 @@
#define GL_ARB_indirect_parameters 1
#define GL_PARAMETER_BUFFER_ARB 0x80EE
#define GL_PARAMETER_BUFFER_BINDING_ARB 0x80EF
-typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTARBPROC) (GLenum mode, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
-typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTARBPROC) (GLenum mode, GLenum type, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTARBPROC) (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTARBPROC) (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
#ifdef GL_GLEXT_PROTOTYPES
-GLAPI void APIENTRY glMultiDrawArraysIndirectCountARB (GLenum mode, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
-GLAPI void APIENTRY glMultiDrawElementsIndirectCountARB (GLenum mode, GLenum type, GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+GLAPI void APIENTRY glMultiDrawArraysIndirectCountARB (GLenum mode, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
+GLAPI void APIENTRY glMultiDrawElementsIndirectCountARB (GLenum mode, GLenum type, const void *indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
#endif
#endif /* GL_ARB_indirect_parameters */
@@ -3887,6 +3923,10 @@
#define GL_COORD_REPLACE_ARB 0x8862
#endif /* GL_ARB_point_sprite */
+#ifndef GL_ARB_polygon_offset_clamp
+#define GL_ARB_polygon_offset_clamp 1
+#endif /* GL_ARB_polygon_offset_clamp */
+
#ifndef GL_ARB_post_depth_coverage
#define GL_ARB_post_depth_coverage 1
#endif /* GL_ARB_post_depth_coverage */
@@ -4290,6 +4330,10 @@
#define GL_ARB_sparse_texture_clamp 1
#endif /* GL_ARB_sparse_texture_clamp */
+#ifndef GL_ARB_spirv_extensions
+#define GL_ARB_spirv_extensions 1
+#endif /* GL_ARB_spirv_extensions */
+
#ifndef GL_ARB_stencil_texturing
#define GL_ARB_stencil_texturing 1
#endif /* GL_ARB_stencil_texturing */
@@ -4442,6 +4486,10 @@
#define GL_DOT3_RGBA_ARB 0x86AF
#endif /* GL_ARB_texture_env_dot3 */
+#ifndef GL_ARB_texture_filter_anisotropic
+#define GL_ARB_texture_filter_anisotropic 1
+#endif /* GL_ARB_texture_filter_anisotropic */
+
#ifndef GL_ARB_texture_filter_minmax
#define GL_ARB_texture_filter_minmax 1
#define GL_TEXTURE_REDUCTION_MODE_ARB 0x9366
@@ -4947,6 +4995,16 @@
#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008
#endif /* GL_KHR_no_error */
+#ifndef GL_KHR_parallel_shader_compile
+#define GL_KHR_parallel_shader_compile 1
+#define GL_MAX_SHADER_COMPILER_THREADS_KHR 0x91B0
+#define GL_COMPLETION_STATUS_KHR 0x91B1
+typedef void (APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSKHRPROC) (GLuint count);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glMaxShaderCompilerThreadsKHR (GLuint count);
+#endif
+#endif /* GL_KHR_parallel_shader_compile */
+
#ifndef GL_KHR_robust_buffer_access_behavior
#define GL_KHR_robust_buffer_access_behavior 1
#endif /* GL_KHR_robust_buffer_access_behavior */
@@ -5651,6 +5709,10 @@
#define GL_AMD_shader_explicit_vertex_parameter 1
#endif /* GL_AMD_shader_explicit_vertex_parameter */
+#ifndef GL_AMD_shader_image_load_store_lod
+#define GL_AMD_shader_image_load_store_lod 1
+#endif /* GL_AMD_shader_image_load_store_lod */
+
#ifndef GL_AMD_shader_stencil_export
#define GL_AMD_shader_stencil_export 1
#endif /* GL_AMD_shader_stencil_export */
@@ -7245,6 +7307,17 @@
#endif
#endif /* GL_EXT_draw_range_elements */
+#ifndef GL_EXT_external_buffer
+#define GL_EXT_external_buffer 1
+typedef void *GLeglClientBufferEXT;
+typedef void (APIENTRYP PFNGLBUFFERSTORAGEEXTERNALEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr size, GLeglClientBufferEXT clientBuffer, GLbitfield flags);
+typedef void (APIENTRYP PFNGLNAMEDBUFFERSTORAGEEXTERNALEXTPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, GLeglClientBufferEXT clientBuffer, GLbitfield flags);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBufferStorageExternalEXT (GLenum target, GLintptr offset, GLsizeiptr size, GLeglClientBufferEXT clientBuffer, GLbitfield flags);
+GLAPI void APIENTRY glNamedBufferStorageExternalEXT (GLuint buffer, GLintptr offset, GLsizeiptr size, GLeglClientBufferEXT clientBuffer, GLbitfield flags);
+#endif
+#endif /* GL_EXT_external_buffer */
+
#ifndef GL_EXT_fog_coord
#define GL_EXT_fog_coord 1
#define GL_FOG_COORDINATE_SOURCE_EXT 0x8450
@@ -9151,6 +9224,13 @@
#define GL_MESA_shader_integer_functions 1
#endif /* GL_MESA_shader_integer_functions */
+#ifndef GL_MESA_tile_raster_order
+#define GL_MESA_tile_raster_order 1
+#define GL_TILE_RASTER_ORDER_FIXED_MESA 0x8BB8
+#define GL_TILE_RASTER_ORDER_INCREASING_X_MESA 0x8BB9
+#define GL_TILE_RASTER_ORDER_INCREASING_Y_MESA 0x8BBA
+#endif /* GL_MESA_tile_raster_order */
+
#ifndef GL_MESA_window_pos
#define GL_MESA_window_pos 1
typedef void (APIENTRYP PFNGLWINDOWPOS2DMESAPROC) (GLdouble x, GLdouble y);
@@ -9377,6 +9457,10 @@
#define GL_BLEND_ADVANCED_COHERENT_NV 0x9285
#endif /* GL_NV_blend_equation_advanced_coherent */
+#ifndef GL_NV_blend_minmax_factor
+#define GL_NV_blend_minmax_factor 1
+#endif /* GL_NV_blend_minmax_factor */
+
#ifndef GL_NV_blend_square
#define GL_NV_blend_square 1
#endif /* GL_NV_blend_square */
@@ -10457,6 +10541,32 @@
#endif
#endif /* GL_NV_primitive_restart */
+#ifndef GL_NV_query_resource
+#define GL_NV_query_resource 1
+#define GL_QUERY_RESOURCE_TYPE_VIDMEM_ALLOC_NV 0x9540
+#define GL_QUERY_RESOURCE_MEMTYPE_VIDMEM_NV 0x9542
+#define GL_QUERY_RESOURCE_SYS_RESERVED_NV 0x9544
+#define GL_QUERY_RESOURCE_TEXTURE_NV 0x9545
+#define GL_QUERY_RESOURCE_RENDERBUFFER_NV 0x9546
+#define GL_QUERY_RESOURCE_BUFFEROBJECT_NV 0x9547
+typedef GLint (APIENTRYP PFNGLQUERYRESOURCENVPROC) (GLenum queryType, GLint tagId, GLuint bufSize, GLint *buffer);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI GLint APIENTRY glQueryResourceNV (GLenum queryType, GLint tagId, GLuint bufSize, GLint *buffer);
+#endif
+#endif /* GL_NV_query_resource */
+
+#ifndef GL_NV_query_resource_tag
+#define GL_NV_query_resource_tag 1
+typedef void (APIENTRYP PFNGLGENQUERYRESOURCETAGNVPROC) (GLsizei n, GLint *tagIds);
+typedef void (APIENTRYP PFNGLDELETEQUERYRESOURCETAGNVPROC) (GLsizei n, const GLint *tagIds);
+typedef void (APIENTRYP PFNGLQUERYRESOURCETAGNVPROC) (GLint tagId, const GLchar *tagString);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGenQueryResourceTagNV (GLsizei n, GLint *tagIds);
+GLAPI void APIENTRY glDeleteQueryResourceTagNV (GLsizei n, const GLint *tagIds);
+GLAPI void APIENTRY glQueryResourceTagNV (GLint tagId, const GLchar *tagString);
+#endif
+#endif /* GL_NV_query_resource_tag */
+
#ifndef GL_NV_register_combiners
#define GL_NV_register_combiners 1
#define GL_REGISTER_COMBINERS_NV 0x8522
@@ -10733,6 +10843,10 @@
#define GL_MAX_RECTANGLE_TEXTURE_SIZE_NV 0x84F8
#endif /* GL_NV_texture_rectangle */
+#ifndef GL_NV_texture_rectangle_compressed
+#define GL_NV_texture_rectangle_compressed 1
+#endif /* GL_NV_texture_rectangle_compressed */
+
#ifndef GL_NV_texture_shader
#define GL_NV_texture_shader 1
#define GL_OFFSET_TEXTURE_RECTANGLE_NV 0x864C
diff -Nru mesa-17.2.4/include/GL/glxext.h mesa-17.3.3/include/GL/glxext.h
--- mesa-17.2.4/include/GL/glxext.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/GL/glxext.h 2018-01-18 21:30:28.000000000 +0000
@@ -34,7 +34,7 @@
** https://github.com/KhronosGroup/OpenGL-Registry
*/
-#define GLX_GLXEXT_VERSION 20170606
+#define GLX_GLXEXT_VERSION 20170728
/* Generated C header for:
* API: glx
diff -Nru mesa-17.2.4/include/GL/internal/dri_interface.h mesa-17.3.3/include/GL/internal/dri_interface.h
--- mesa-17.2.4/include/GL/internal/dri_interface.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/GL/internal/dri_interface.h 2018-01-18 21:30:28.000000000 +0000
@@ -48,6 +48,8 @@
typedef struct drm_clip_rect drm_clip_rect_t;
#endif
+#include
+
/**
* \name DRI interface structures
*
@@ -724,11 +726,26 @@
#define __DRI_ATTRIB_TEXTURE_2D_BIT 0x02
#define __DRI_ATTRIB_TEXTURE_RECTANGLE_BIT 0x04
+/* __DRI_ATTRIB_SWAP_METHOD */
+/* Note that with the exception of __DRI_ATTRIB_SWAP_NONE, we need to define
+ * the same tokens as GLX. This is because old and current X servers will
+ * transmit the driconf value grabbed from the AIGLX driver untranslated as
+ * the GLX fbconfig value. __DRI_ATTRIB_SWAP_NONE is only used by dri drivers
+ * to signal to the dri core that the driconfig is single-buffer.
+ */
+#define __DRI_ATTRIB_SWAP_NONE 0x0000
+#define __DRI_ATTRIB_SWAP_EXCHANGE 0x8061
+#define __DRI_ATTRIB_SWAP_COPY 0x8062
+#define __DRI_ATTRIB_SWAP_UNDEFINED 0x8063
+
/**
* This extension defines the core DRI functionality.
+ *
+ * Version >= 2 indicates that getConfigAttrib with __DRI_ATTRIB_SWAP_METHOD
+ * returns a reliable value.
*/
#define __DRI_CORE "DRI_Core"
-#define __DRI_CORE_VERSION 1
+#define __DRI_CORE_VERSION 2
struct __DRIcoreExtensionRec {
__DRIextension base;
@@ -967,7 +984,15 @@
};
#define __DRI_DRI2_LOADER "DRI_DRI2Loader"
-#define __DRI_DRI2_LOADER_VERSION 3
+#define __DRI_DRI2_LOADER_VERSION 4
+
+enum dri_loader_cap {
+ /* Whether the loader handles RGBA channel ordering correctly. If not,
+ * only BGRA ordering can be exposed.
+ */
+ DRI_LOADER_CAP_RGBA_ORDERING,
+};
+
struct __DRIdri2LoaderExtensionRec {
__DRIextension base;
@@ -1017,6 +1042,18 @@
int *width, int *height,
unsigned int *attachments, int count,
int *out_count, void *loaderPrivate);
+
+ /**
+ * Return a loader capability value. If the loader doesn't know the enum,
+ * it will return 0.
+ *
+ * \param loaderPrivate The last parameter of createNewScreen or
+ * createNewScreen2.
+ * \param cap See the enum.
+ *
+ * \since 4
+ */
+ unsigned (*getCapability)(void *loaderPrivate, enum dri_loader_cap cap);
};
/**
@@ -1063,6 +1100,12 @@
#define __DRI_CTX_RESET_LOSE_CONTEXT 1
/*@}*/
+#define __DRI_CTX_ATTRIB_PRIORITY 4
+
+#define __DRI_CTX_PRIORITY_LOW 0
+#define __DRI_CTX_PRIORITY_MEDIUM 1
+#define __DRI_CTX_PRIORITY_HIGH 2
+
/**
* \name Reasons that __DRIdri2Extension::createContextAttribs might fail
*/
@@ -1143,7 +1186,7 @@
* extensions.
*/
#define __DRI_IMAGE "DRI_IMAGE"
-#define __DRI_IMAGE_VERSION 15
+#define __DRI_IMAGE_VERSION 17
/**
* These formats correspond to the similarly named MESA_FORMAT_*
@@ -1155,7 +1198,7 @@
* by the driver (YUV planar formats) but serve as a base image for
* creating sub-images for the different planes within the image.
*
- * R8, GR88 and NONE should not be used with createImageFormName or
+ * R8, GR88 and NONE should not be used with createImageFromName or
* createImage, and are returned by query from sub images created with
* createImageFromNames (NONE, see above) and fromPlane (R8 & GR88).
*/
@@ -1176,7 +1219,7 @@
#define __DRI_IMAGE_USE_SHARE 0x0001
#define __DRI_IMAGE_USE_SCANOUT 0x0002
-#define __DRI_IMAGE_USE_CURSOR 0x0004 /* Depricated */
+#define __DRI_IMAGE_USE_CURSOR 0x0004 /* Deprecated */
#define __DRI_IMAGE_USE_LINEAR 0x0008
/* The buffer will only be read by an external process after SwapBuffers,
* in contrary to gbm buffers, front buffers and fake front buffers, which
@@ -1323,6 +1366,13 @@
#define __BLIT_FLAG_FLUSH 0x0001
#define __BLIT_FLAG_FINISH 0x0002
+/**
+ * queryDmaBufFormatModifierAttribs attributes
+ */
+
+/* Available in version 16 */
+#define __DRI_IMAGE_FORMAT_MODIFIER_ATTRIB_PLANE_COUNT 0x0001
+
typedef struct __DRIimageRec __DRIimage;
typedef struct __DRIimageExtensionRec __DRIimageExtension;
struct __DRIimageExtensionRec {
@@ -1333,6 +1383,7 @@
int name, int pitch,
void *loaderPrivate);
+ /* Deprecated since version 17; see createImageFromRenderbuffer2 */
__DRIimage *(*createImageFromRenderbuffer)(__DRIcontext *context,
int renderbuffer,
void *loaderPrivate);
@@ -1563,6 +1614,40 @@
int max, uint64_t *modifiers,
unsigned int *external_only,
int *count);
+
+ /**
+ * dmabuf format modifier attribute query for a given format and modifier.
+ *
+ * \param fourcc The format to query. If this format is not supported by
+ * the driver, return false.
+ * \param modifier The modifier to query. If this format+modifier is not
+ * supported by the driver, return false.
+ * \param attrib The __DRI_IMAGE_FORMAT_MODIFIER_ATTRIB to query.
+ * \param value A pointer to where to store the result of the query.
+ *
+ * Returns true upon success.
+ *
+ * \since 16
+ */
+ GLboolean (*queryDmaBufFormatModifierAttribs)(__DRIscreen *screen,
+ uint32_t fourcc, uint64_t modifier,
+ int attrib, uint64_t *value);
+
+ /**
+ * Create a DRI image from the given renderbuffer.
+ *
+ * \param context the current DRI context
+ * \param renderbuffer the GL name of the renderbuffer
+ * \param loaderPrivate for callbacks into the loader related to the image
+ * \param error will be set to one of __DRI_IMAGE_ERROR_xxx
+ * \return the newly created image on success, or NULL otherwise
+ *
+ * \since 17
+ */
+ __DRIimage *(*createImageFromRenderbuffer2)(__DRIcontext *context,
+ int renderbuffer,
+ void *loaderPrivate,
+ unsigned *error);
};
@@ -1635,13 +1720,29 @@
*
* This extension provides the XML string containing driver options for use by
* the loader in supporting the driconf application.
+ *
+ * v2:
+ * - Add the getXml getter function which allows the driver more flexibility in
+ * how the XML is provided.
+ * - Deprecate the direct xml pointer. It is only provided as a fallback for
+ * older versions of libGL and must not be used by clients that are aware of
+ * the newer version. Future driver versions may set it to NULL.
*/
#define __DRI_CONFIG_OPTIONS "DRI_ConfigOptions"
-#define __DRI_CONFIG_OPTIONS_VERSION 1
+#define __DRI_CONFIG_OPTIONS_VERSION 2
typedef struct __DRIconfigOptionsExtensionRec {
__DRIextension base;
- const char *xml;
+ const char *xml; /**< deprecated since v2, use getXml instead */
+
+ /**
+ * Get an XML string that describes available driver options for use by a
+ * config application.
+ *
+ * The returned string must be heap-allocated. The caller is responsible for
+ * freeing it.
+ */
+ char *(*getXml)(const char *driver_name);
} __DRIconfigOptionsExtension;
/**
@@ -1687,6 +1788,14 @@
*/
#define __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB 0x000c
+/* Bitmaks of supported/available context priorities - must match
+ * __EGL_CONTEXT_PRIORITY_LOW_BIT et al
+ */
+#define __DRI2_RENDERER_HAS_CONTEXT_PRIORITY 0x000d
+#define __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_LOW (1 << 0)
+#define __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM (1 << 1)
+#define __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_HIGH (1 << 2)
+
typedef struct __DRI2rendererQueryExtensionRec __DRI2rendererQueryExtension;
struct __DRI2rendererQueryExtensionRec {
__DRIextension base;
@@ -1711,7 +1820,7 @@
};
#define __DRI_IMAGE_LOADER "DRI_IMAGE_LOADER"
-#define __DRI_IMAGE_LOADER_VERSION 1
+#define __DRI_IMAGE_LOADER_VERSION 3
struct __DRIimageLoaderExtensionRec {
__DRIextension base;
@@ -1747,6 +1856,28 @@
* into __DRIdri2ExtensionRec::createNewDrawable
*/
void (*flushFrontBuffer)(__DRIdrawable *driDrawable, void *loaderPrivate);
+
+ /**
+ * Return a loader capability value. If the loader doesn't know the enum,
+ * it will return 0.
+ *
+ * \since 2
+ */
+ unsigned (*getCapability)(void *loaderPrivate, enum dri_loader_cap cap);
+
+ /**
+ * Flush swap buffers
+ *
+ * Make sure any outstanding swap buffers have been submitted to the
+ * device.
+ *
+ * \param driDrawable Drawable whose swaps need to be flushed
+ * \param loaderPrivate Loader's private data that was previously passed
+ * into __DRIdri2ExtensionRec::createNewDrawable
+ *
+ * \since 3
+ */
+ void (*flushSwapBuffers)(__DRIdrawable *driDrawable, void *loaderPrivate);
};
/**
diff -Nru mesa-17.2.4/include/GL/mesa_glinterop.h mesa-17.3.3/include/GL/mesa_glinterop.h
--- mesa-17.2.4/include/GL/mesa_glinterop.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/GL/mesa_glinterop.h 2018-01-18 21:30:28.000000000 +0000
@@ -58,12 +58,16 @@
#endif
/* Forward declarations to avoid inclusion of GL/glx.h */
+#ifndef GLX_H
struct _XDisplay;
struct __GLXcontextRec;
+#endif
/* Forward declarations to avoid inclusion of EGL/egl.h */
+#ifndef __egl_h_
typedef void *EGLDisplay;
typedef void *EGLContext;
+#endif
/** Returned error codes. */
enum {
diff -Nru mesa-17.2.4/include/GL/wglext.h mesa-17.3.3/include/GL/wglext.h
--- mesa-17.2.4/include/GL/wglext.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/GL/wglext.h 2018-01-18 21:30:28.000000000 +0000
@@ -39,7 +39,7 @@
#include
#endif
-#define WGL_WGLEXT_VERSION 20170606
+#define WGL_WGLEXT_VERSION 20170817
/* Generated C header for:
* API: wgl
diff -Nru mesa-17.2.4/include/GLES2/gl2ext.h mesa-17.3.3/include/GLES2/gl2ext.h
--- mesa-17.2.4/include/GLES2/gl2ext.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/GLES2/gl2ext.h 2018-01-18 21:30:28.000000000 +0000
@@ -38,7 +38,7 @@
#define GL_APIENTRYP GL_APIENTRY*
#endif
-/* Generated on date 20170606 */
+/* Generated on date 20170804 */
/* Generated C header for:
* API: gles2
@@ -324,12 +324,12 @@
typedef void (GL_APIENTRYP PFNGLDRAWELEMENTSBASEVERTEXOESPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLint basevertex);
typedef void (GL_APIENTRYP PFNGLDRAWRANGEELEMENTSBASEVERTEXOESPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex);
typedef void (GL_APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXOESPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex);
-typedef void (GL_APIENTRYP PFNGLMULTIDRAWELEMENTSBASEVERTEXOESPROC) (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei primcount, const GLint *basevertex);
+typedef void (GL_APIENTRYP PFNGLMULTIDRAWELEMENTSBASEVERTEXEXTPROC) (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei primcount, const GLint *basevertex);
#ifdef GL_GLEXT_PROTOTYPES
GL_APICALL void GL_APIENTRY glDrawElementsBaseVertexOES (GLenum mode, GLsizei count, GLenum type, const void *indices, GLint basevertex);
GL_APICALL void GL_APIENTRY glDrawRangeElementsBaseVertexOES (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex);
GL_APICALL void GL_APIENTRY glDrawElementsInstancedBaseVertexOES (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex);
-GL_APICALL void GL_APIENTRY glMultiDrawElementsBaseVertexOES (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei primcount, const GLint *basevertex);
+GL_APICALL void GL_APIENTRY glMultiDrawElementsBaseVertexEXT (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei primcount, const GLint *basevertex);
#endif
#endif /* GL_OES_draw_elements_base_vertex */
@@ -1311,12 +1311,10 @@
typedef void (GL_APIENTRYP PFNGLDRAWELEMENTSBASEVERTEXEXTPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLint basevertex);
typedef void (GL_APIENTRYP PFNGLDRAWRANGEELEMENTSBASEVERTEXEXTPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex);
typedef void (GL_APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXEXTPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex);
-typedef void (GL_APIENTRYP PFNGLMULTIDRAWELEMENTSBASEVERTEXEXTPROC) (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei primcount, const GLint *basevertex);
#ifdef GL_GLEXT_PROTOTYPES
GL_APICALL void GL_APIENTRY glDrawElementsBaseVertexEXT (GLenum mode, GLsizei count, GLenum type, const void *indices, GLint basevertex);
GL_APICALL void GL_APIENTRY glDrawRangeElementsBaseVertexEXT (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex);
GL_APICALL void GL_APIENTRY glDrawElementsInstancedBaseVertexEXT (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex);
-GL_APICALL void GL_APIENTRY glMultiDrawElementsBaseVertexEXT (GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei primcount, const GLint *basevertex);
#endif
#endif /* GL_EXT_draw_elements_base_vertex */
@@ -1340,6 +1338,17 @@
#endif
#endif /* GL_EXT_draw_transform_feedback */
+#ifndef GL_EXT_external_buffer
+#define GL_EXT_external_buffer 1
+typedef void *GLeglClientBufferEXT;
+typedef void (GL_APIENTRYP PFNGLBUFFERSTORAGEEXTERNALEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr size, GLeglClientBufferEXT clientBuffer, GLbitfield flags);
+typedef void (GL_APIENTRYP PFNGLNAMEDBUFFERSTORAGEEXTERNALEXTPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, GLeglClientBufferEXT clientBuffer, GLbitfield flags);
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glBufferStorageExternalEXT (GLenum target, GLintptr offset, GLsizeiptr size, GLeglClientBufferEXT clientBuffer, GLbitfield flags);
+GL_APICALL void GL_APIENTRY glNamedBufferStorageExternalEXT (GLuint buffer, GLintptr offset, GLsizeiptr size, GLeglClientBufferEXT clientBuffer, GLbitfield flags);
+#endif
+#endif /* GL_EXT_external_buffer */
+
#ifndef GL_EXT_float_blend
#define GL_EXT_float_blend 1
#endif /* GL_EXT_float_blend */
@@ -2001,18 +2010,42 @@
#define GL_TEXTURE_ASTC_DECODE_PRECISION_EXT 0x8F69
#endif /* GL_EXT_texture_compression_astc_decode_mode */
+#ifndef GL_EXT_texture_compression_bptc
+#define GL_EXT_texture_compression_bptc 1
+#define GL_COMPRESSED_RGBA_BPTC_UNORM_EXT 0x8E8C
+#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_EXT 0x8E8D
+#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_EXT 0x8E8E
+#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_EXT 0x8E8F
+#endif /* GL_EXT_texture_compression_bptc */
+
#ifndef GL_EXT_texture_compression_dxt1
#define GL_EXT_texture_compression_dxt1 1
#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
#endif /* GL_EXT_texture_compression_dxt1 */
+#ifndef GL_EXT_texture_compression_rgtc
+#define GL_EXT_texture_compression_rgtc 1
+#define GL_COMPRESSED_RED_RGTC1_EXT 0x8DBB
+#define GL_COMPRESSED_SIGNED_RED_RGTC1_EXT 0x8DBC
+#define GL_COMPRESSED_RED_GREEN_RGTC2_EXT 0x8DBD
+#define GL_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT 0x8DBE
+#endif /* GL_EXT_texture_compression_rgtc */
+
#ifndef GL_EXT_texture_compression_s3tc
#define GL_EXT_texture_compression_s3tc 1
#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
#endif /* GL_EXT_texture_compression_s3tc */
+#ifndef GL_EXT_texture_compression_s3tc_srgb
+#define GL_EXT_texture_compression_s3tc_srgb 1
+#define GL_COMPRESSED_SRGB_S3TC_DXT1_EXT 0x8C4C
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT 0x8C4D
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT 0x8C4E
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT 0x8C4F
+#endif /* GL_EXT_texture_compression_s3tc_srgb */
+
#ifndef GL_EXT_texture_cube_map_array
#define GL_EXT_texture_cube_map_array 1
#define GL_TEXTURE_CUBE_MAP_ARRAY_EXT 0x9009
@@ -2405,6 +2438,12 @@
#define GL_BLEND_ADVANCED_COHERENT_NV 0x9285
#endif /* GL_NV_blend_equation_advanced_coherent */
+#ifndef GL_NV_blend_minmax_factor
+#define GL_NV_blend_minmax_factor 1
+#define GL_FACTOR_MIN_AMD 0x901C
+#define GL_FACTOR_MAX_AMD 0x901D
+#endif /* GL_NV_blend_minmax_factor */
+
#ifndef GL_NV_conditional_render
#define GL_NV_conditional_render 1
#define GL_QUERY_WAIT_NV 0x8E13
@@ -3078,6 +3117,14 @@
#define GL_SHARED_EDGE_NV 0xC0
#endif /* GL_NV_path_rendering_shared_edge */
+#ifndef GL_NV_pixel_buffer_object
+#define GL_NV_pixel_buffer_object 1
+#define GL_PIXEL_PACK_BUFFER_NV 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER_NV 0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING_NV 0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING_NV 0x88EF
+#endif /* GL_NV_pixel_buffer_object */
+
#ifndef GL_NV_polygon_mode
#define GL_NV_polygon_mode 1
#define GL_POLYGON_MODE_NV 0x0B40
diff -Nru mesa-17.2.4/include/meson.build mesa-17.3.3/include/meson.build
--- mesa-17.2.4/include/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/include/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,68 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_drm_uapi = include_directories('drm-uapi')
+inc_vulkan = include_directories('vulkan')
+
+if with_gles1
+ install_headers(
+ 'GLES/egl.h', 'GLES/gl.h', 'GLES/glext.h', 'GLES/glplatform.h',
+ subdir : 'GLES',
+ )
+endif
+
+if with_gles2
+ install_headers(
+ 'GLES2/gl2.h', 'GLES2/gl2ext.h', 'GLES2/gl2platform.h',
+ subdir : 'GLES2',
+ )
+ install_headers(
+ 'GLES3/gl3.h', 'GLES3/gl32.h', 'GLES3/gl32.h', 'GLES3/gl3ext.h',
+ 'GLES3/gl3platform.h',
+ subdir : 'GLES3',
+ )
+endif
+
+if with_gles1 or with_gles2 # or with_egl
+ install_headers('KHR/khrplatform.h', subdir : 'KHR')
+endif
+
+if with_opengl
+ install_headers(
+ 'GL/gl.h', 'GL/glext.h', 'GL/glcorearb.h', 'GL/gl_mangle.h',
+ subdir : 'GL',
+ )
+endif
+
+if with_glx != 'disabled'
+ install_headers('GL/glx.h', 'GL/glext.h', 'GL/glx_mangle.h', subdir : 'GL')
+endif
+
+if with_osmesa
+ install_headers('GL/osmesa.h', subdir : 'GL')
+endif
+
+if with_egl
+ install_headers(
+ 'EGL/eglext.h', 'EGL/egl.h', 'EGL/eglextchromium.h', 'EGL/eglmesaext.h',
+ 'EGL/eglplatform.h',
+ subdir : 'EGL',
+ )
+endif
diff -Nru mesa-17.2.4/include/pci_ids/i965_pci_ids.h mesa-17.3.3/include/pci_ids/i965_pci_ids.h
--- mesa-17.2.4/include/pci_ids/i965_pci_ids.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/pci_ids/i965_pci_ids.h 2018-01-18 21:30:28.000000000 +0000
@@ -151,7 +151,7 @@
CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1")
CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
-CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
+CHIPSET(0x5917, kbl_gt2, "Intel(R) UHD Graphics 620 (Kabylake GT2)")
CHIPSET(0x5912, kbl_gt2, "Intel(R) HD Graphics 630 (Kaby Lake GT2)")
CHIPSET(0x5916, kbl_gt2, "Intel(R) HD Graphics 620 (Kaby Lake GT2)")
CHIPSET(0x591A, kbl_gt2, "Intel(R) HD Graphics P630 (Kaby Lake GT2)")
@@ -160,8 +160,8 @@
CHIPSET(0x591E, kbl_gt2, "Intel(R) HD Graphics 615 (Kaby Lake GT2)")
CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3")
-CHIPSET(0x5926, kbl_gt3, "Intel(R) Iris Plus Graphics 640 (Kaby Lake GT3)")
-CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 (Kaby Lake GT3)")
+CHIPSET(0x5926, kbl_gt3, "Intel(R) Iris Plus Graphics 640 (Kaby Lake GT3e)")
+CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 (Kaby Lake GT3e)")
CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)")
CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
diff -Nru mesa-17.2.4/include/vulkan/vk_android_native_buffer.h mesa-17.3.3/include/vulkan/vk_android_native_buffer.h
--- mesa-17.2.4/include/vulkan/vk_android_native_buffer.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/include/vulkan/vk_android_native_buffer.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __VK_ANDROID_NATIVE_BUFFER_H__
+#define __VK_ANDROID_NATIVE_BUFFER_H__
+
+/* MESA: A hack to avoid #ifdefs in driver code. */
+#ifdef ANDROID
+#include
+#include
+#else
+typedef void *buffer_handle_t;
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VK_ANDROID_native_buffer 1
+
+#define VK_ANDROID_NATIVE_BUFFER_EXTENSION_NUMBER 11
+#define VK_ANDROID_NATIVE_BUFFER_SPEC_VERSION 5
+#define VK_ANDROID_NATIVE_BUFFER_EXTENSION_NAME "VK_ANDROID_native_buffer"
+
+#define VK_ANDROID_NATIVE_BUFFER_ENUM(type,id) ((type)(1000000000 + (1000 * (VK_ANDROID_NATIVE_BUFFER_EXTENSION_NUMBER - 1)) + (id)))
+#define VK_STRUCTURE_TYPE_NATIVE_BUFFER_ANDROID VK_ANDROID_NATIVE_BUFFER_ENUM(VkStructureType, 0)
+
+typedef struct {
+ VkStructureType sType; // must be VK_STRUCTURE_TYPE_NATIVE_BUFFER_ANDROID
+ const void* pNext;
+
+ // Buffer handle and stride returned from gralloc alloc()
+ buffer_handle_t handle;
+ int stride;
+
+ // Gralloc format and usage requested when the buffer was allocated.
+ int format;
+ int usage;
+} VkNativeBufferANDROID;
+
+typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainGrallocUsageANDROID)(VkDevice device, VkFormat format, VkImageUsageFlags imageUsage, int* grallocUsage);
+typedef VkResult (VKAPI_PTR *PFN_vkAcquireImageANDROID)(VkDevice device, VkImage image, int nativeFenceFd, VkSemaphore semaphore, VkFence fence);
+typedef VkResult (VKAPI_PTR *PFN_vkQueueSignalReleaseImageANDROID)(VkQueue queue, uint32_t waitSemaphoreCount, const VkSemaphore* pWaitSemaphores, VkImage image, int* pNativeFenceFd);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainGrallocUsageANDROID(
+ VkDevice device,
+ VkFormat format,
+ VkImageUsageFlags imageUsage,
+ int* grallocUsage
+);
+VKAPI_ATTR VkResult VKAPI_CALL vkAcquireImageANDROID(
+ VkDevice device,
+ VkImage image,
+ int nativeFenceFd,
+ VkSemaphore semaphore,
+ VkFence fence
+);
+VKAPI_ATTR VkResult VKAPI_CALL vkQueueSignalReleaseImageANDROID(
+ VkQueue queue,
+ uint32_t waitSemaphoreCount,
+ const VkSemaphore* pWaitSemaphores,
+ VkImage image,
+ int* pNativeFenceFd
+);
+// -- DEPRECATED --
+VKAPI_ATTR VkResult VKAPI_CALL vkImportNativeFenceANDROID(
+ VkDevice device,
+ VkSemaphore semaphore,
+ int nativeFenceFd
+);
+VKAPI_ATTR VkResult VKAPI_CALL vkQueueSignalNativeFenceANDROID(
+ VkQueue queue,
+ int* pNativeFenceFd
+);
+// ----------------
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __VK_ANDROID_NATIVE_BUFFER_H__
diff -Nru mesa-17.2.4/include/vulkan/vulkan.h mesa-17.3.3/include/vulkan/vulkan.h
--- mesa-17.2.4/include/vulkan/vulkan.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/include/vulkan/vulkan.h 2018-01-18 21:30:28.000000000 +0000
@@ -34,16 +34,16 @@
(((major) << 22) | ((minor) << 12) | (patch))
// DEPRECATED: This define has been removed. Specific version defines (e.g. VK_API_VERSION_1_0), or the VK_MAKE_VERSION macro, should be used instead.
-//#define VK_API_VERSION VK_MAKE_VERSION(1, 0, 0)
+//#define VK_API_VERSION VK_MAKE_VERSION(1, 0, 0) // Patch version should always be set to 0
// Vulkan 1.0 version number
-#define VK_API_VERSION_1_0 VK_MAKE_VERSION(1, 0, 0)
+#define VK_API_VERSION_1_0 VK_MAKE_VERSION(1, 0, 0)// Patch version should always be set to 0
#define VK_VERSION_MAJOR(version) ((uint32_t)(version) >> 22)
#define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
#define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
// Version of this file
-#define VK_HEADER_VERSION 54
+#define VK_HEADER_VERSION 63
#define VK_NULL_HANDLE 0
@@ -147,6 +147,7 @@
VK_ERROR_INVALID_SHADER_NV = -1000012000,
VK_ERROR_OUT_OF_POOL_MEMORY_KHR = -1000069000,
VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR = -1000072003,
+ VK_ERROR_NOT_PERMITTED_EXT = -1000174001,
VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL,
VK_RESULT_END_RANGE = VK_INCOMPLETE,
VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FRAGMENTED_POOL + 1),
@@ -241,16 +242,16 @@
VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2_KHR = 1000059007,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SPARSE_IMAGE_FORMAT_INFO_2_KHR = 1000059008,
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHX = 1000060000,
- VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHX = 1000060001,
- VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHX = 1000060002,
VK_STRUCTURE_TYPE_DEVICE_GROUP_RENDER_PASS_BEGIN_INFO_KHX = 1000060003,
VK_STRUCTURE_TYPE_DEVICE_GROUP_COMMAND_BUFFER_BEGIN_INFO_KHX = 1000060004,
VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO_KHX = 1000060005,
VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHX = 1000060006,
+ VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHX = 1000060010,
+ VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_DEVICE_GROUP_INFO_KHX = 1000060013,
+ VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_DEVICE_GROUP_INFO_KHX = 1000060014,
VK_STRUCTURE_TYPE_DEVICE_GROUP_PRESENT_CAPABILITIES_KHX = 1000060007,
VK_STRUCTURE_TYPE_IMAGE_SWAPCHAIN_CREATE_INFO_KHX = 1000060008,
VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHX = 1000060009,
- VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHX = 1000060010,
VK_STRUCTURE_TYPE_DEVICE_GROUP_PRESENT_INFO_KHX = 1000060011,
VK_STRUCTURE_TYPE_DEVICE_GROUP_SWAPCHAIN_CREATE_INFO_KHX = 1000060012,
VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 1000061000,
@@ -293,7 +294,7 @@
VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_LIMITS_NVX = 1000086004,
VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_FEATURES_NVX = 1000086005,
VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_W_SCALING_STATE_CREATE_INFO_NV = 1000087000,
- VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT = 1000090000,
+ VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT = 1000090000,
VK_STRUCTURE_TYPE_DISPLAY_POWER_INFO_EXT = 1000091000,
VK_STRUCTURE_TYPE_DEVICE_EVENT_INFO_EXT = 1000091001,
VK_STRUCTURE_TYPE_DISPLAY_EVENT_INFO_EXT = 1000091002,
@@ -313,6 +314,10 @@
VK_STRUCTURE_TYPE_FENCE_GET_WIN32_HANDLE_INFO_KHR = 1000114002,
VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR = 1000115000,
VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR = 1000115001,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR = 1000117000,
+ VK_STRUCTURE_TYPE_RENDER_PASS_INPUT_ATTACHMENT_ASPECT_CREATE_INFO_KHR = 1000117001,
+ VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO_KHR = 1000117002,
+ VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR = 1000117003,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SURFACE_INFO_2_KHR = 1000119000,
VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR = 1000119001,
VK_STRUCTURE_TYPE_SURFACE_FORMAT_2_KHR = 1000119002,
@@ -323,16 +328,33 @@
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR = 1000127001,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT = 1000130000,
VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT = 1000130001,
+ VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT = 1000143000,
+ VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT = 1000143001,
+ VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT = 1000143002,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT = 1000143003,
+ VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT = 1000143004,
VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR = 1000146000,
VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR = 1000146001,
VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2_KHR = 1000146002,
VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR = 1000146003,
VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2_KHR = 1000146004,
+ VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR = 1000147000,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_FEATURES_EXT = 1000148000,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_PROPERTIES_EXT = 1000148001,
VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_ADVANCED_STATE_CREATE_INFO_EXT = 1000148002,
VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_TO_COLOR_STATE_CREATE_INFO_NV = 1000149000,
VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_MODULATION_STATE_CREATE_INFO_NV = 1000152000,
+ VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO_KHR = 1000156000,
+ VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO_KHR = 1000156001,
+ VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO_KHR = 1000156002,
+ VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO_KHR = 1000156003,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES_KHR = 1000156004,
+ VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES_KHR = 1000156005,
+ VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR = 1000157000,
+ VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR = 1000157001,
+ VK_STRUCTURE_TYPE_VALIDATION_CACHE_CREATE_INFO_EXT = 1000160000,
+ VK_STRUCTURE_TYPE_SHADER_MODULE_VALIDATION_CACHE_CREATE_INFO_EXT = 1000160001,
+ VK_STRUCTURE_TYPE_DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT = 1000174000,
VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO,
VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1),
@@ -553,6 +575,40 @@
VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG = 1000054005,
VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG = 1000054006,
VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG = 1000054007,
+ VK_FORMAT_G8B8G8R8_422_UNORM_KHR = 1000156000,
+ VK_FORMAT_B8G8R8G8_422_UNORM_KHR = 1000156001,
+ VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR = 1000156002,
+ VK_FORMAT_G8_B8R8_2PLANE_420_UNORM_KHR = 1000156003,
+ VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM_KHR = 1000156004,
+ VK_FORMAT_G8_B8R8_2PLANE_422_UNORM_KHR = 1000156005,
+ VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM_KHR = 1000156006,
+ VK_FORMAT_R10X6_UNORM_PACK16_KHR = 1000156007,
+ VK_FORMAT_R10X6G10X6_UNORM_2PACK16_KHR = 1000156008,
+ VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16_KHR = 1000156009,
+ VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16_KHR = 1000156010,
+ VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16_KHR = 1000156011,
+ VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16_KHR = 1000156012,
+ VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16_KHR = 1000156013,
+ VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16_KHR = 1000156014,
+ VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16_KHR = 1000156015,
+ VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16_KHR = 1000156016,
+ VK_FORMAT_R12X4_UNORM_PACK16_KHR = 1000156017,
+ VK_FORMAT_R12X4G12X4_UNORM_2PACK16_KHR = 1000156018,
+ VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16_KHR = 1000156019,
+ VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16_KHR = 1000156020,
+ VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16_KHR = 1000156021,
+ VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16_KHR = 1000156022,
+ VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16_KHR = 1000156023,
+ VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16_KHR = 1000156024,
+ VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16_KHR = 1000156025,
+ VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16_KHR = 1000156026,
+ VK_FORMAT_G16B16G16R16_422_UNORM_KHR = 1000156027,
+ VK_FORMAT_B16G16R16G16_422_UNORM_KHR = 1000156028,
+ VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM_KHR = 1000156029,
+ VK_FORMAT_G16_B16R16_2PLANE_420_UNORM_KHR = 1000156030,
+ VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR = 1000156031,
+ VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR = 1000156032,
+ VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_KHR = 1000156033,
VK_FORMAT_BEGIN_RANGE = VK_FORMAT_UNDEFINED,
VK_FORMAT_END_RANGE = VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
VK_FORMAT_RANGE_SIZE = (VK_FORMAT_ASTC_12x12_SRGB_BLOCK - VK_FORMAT_UNDEFINED + 1),
@@ -621,6 +677,8 @@
VK_IMAGE_LAYOUT_PREINITIALIZED = 8,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002,
VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR = 1000111000,
+ VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR = 1000117000,
+ VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR = 1000117001,
VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED,
VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1),
@@ -851,6 +909,7 @@
VK_DYNAMIC_STATE_STENCIL_REFERENCE = 8,
VK_DYNAMIC_STATE_VIEWPORT_W_SCALING_NV = 1000087000,
VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT = 1000099000,
+ VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT = 1000143000,
VK_DYNAMIC_STATE_BEGIN_RANGE = VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_END_RANGE = VK_DYNAMIC_STATE_STENCIL_REFERENCE,
VK_DYNAMIC_STATE_RANGE_SIZE = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1),
@@ -1009,6 +1068,8 @@
VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = 1000085000,
VK_OBJECT_TYPE_OBJECT_TABLE_NVX = 1000086000,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX = 1000086001,
+ VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR = 1000156000,
+ VK_OBJECT_TYPE_VALIDATION_CACHE_EXT = 1000160000,
VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_UNKNOWN,
VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_COMMAND_POOL,
VK_OBJECT_TYPE_RANGE_SIZE = (VK_OBJECT_TYPE_COMMAND_POOL - VK_OBJECT_TYPE_UNKNOWN + 1),
@@ -1035,6 +1096,13 @@
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = 0x00004000,
VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = 0x00008000,
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT = 0x00010000,
+ VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT_KHR = 0x00020000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT_KHR = 0x00040000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT_KHR = 0x00080000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_BIT_KHR = 0x00100000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_FORCEABLE_BIT_KHR = 0x00200000,
+ VK_FORMAT_FEATURE_DISJOINT_BIT_KHR = 0x00400000,
+ VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT_KHR = 0x00800000,
VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VkFormatFeatureFlagBits;
typedef VkFlags VkFormatFeatureFlags;
@@ -1060,6 +1128,11 @@
VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010,
VK_IMAGE_CREATE_BIND_SFR_BIT_KHX = 0x00000040,
VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR = 0x00000020,
+ VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT_KHR = 0x00000080,
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR = 0x00000100,
+ VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT = 0x00001000,
+ VK_IMAGE_CREATE_DISJOINT_BIT_KHR = 0x00000200,
+ VK_IMAGE_CREATE_ALIAS_BIT_KHR = 0x00000400,
VK_IMAGE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VkImageCreateFlagBits;
typedef VkFlags VkImageCreateFlags;
@@ -1133,6 +1206,9 @@
VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002,
VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004,
VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008,
+ VK_IMAGE_ASPECT_PLANE_0_BIT_KHR = 0x00000010,
+ VK_IMAGE_ASPECT_PLANE_1_BIT_KHR = 0x00000020,
+ VK_IMAGE_ASPECT_PLANE_2_BIT_KHR = 0x00000040,
VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VkImageAspectFlagBits;
typedef VkFlags VkImageAspectFlags;
@@ -1366,6 +1442,27 @@
} VkStencilFaceFlagBits;
typedef VkFlags VkStencilFaceFlags;
+typedef struct VkApplicationInfo {
+ VkStructureType sType;
+ const void* pNext;
+ const char* pApplicationName;
+ uint32_t applicationVersion;
+ const char* pEngineName;
+ uint32_t engineVersion;
+ uint32_t apiVersion;
+} VkApplicationInfo;
+
+typedef struct VkInstanceCreateInfo {
+ VkStructureType sType;
+ const void* pNext;
+ VkInstanceCreateFlags flags;
+ const VkApplicationInfo* pApplicationInfo;
+ uint32_t enabledLayerCount;
+ const char* const* ppEnabledLayerNames;
+ uint32_t enabledExtensionCount;
+ const char* const* ppEnabledExtensionNames;
+} VkInstanceCreateInfo;
+
typedef void* (VKAPI_PTR *PFN_vkAllocationFunction)(
void* pUserData,
size_t size,
@@ -1395,29 +1492,6 @@
VkInternalAllocationType allocationType,
VkSystemAllocationScope allocationScope);
-typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void);
-
-typedef struct VkApplicationInfo {
- VkStructureType sType;
- const void* pNext;
- const char* pApplicationName;
- uint32_t applicationVersion;
- const char* pEngineName;
- uint32_t engineVersion;
- uint32_t apiVersion;
-} VkApplicationInfo;
-
-typedef struct VkInstanceCreateInfo {
- VkStructureType sType;
- const void* pNext;
- VkInstanceCreateFlags flags;
- const VkApplicationInfo* pApplicationInfo;
- uint32_t enabledLayerCount;
- const char* const* ppEnabledLayerNames;
- uint32_t enabledExtensionCount;
- const char* const* ppEnabledExtensionNames;
-} VkInstanceCreateInfo;
-
typedef struct VkAllocationCallbacks {
void* pUserData;
PFN_vkAllocationFunction pfnAllocation;
@@ -1658,6 +1732,7 @@
VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS];
} VkPhysicalDeviceMemoryProperties;
+typedef void (VKAPI_PTR *PFN_vkVoidFunction)(void);
typedef struct VkDeviceQueueCreateInfo {
VkStructureType sType;
const void* pNext;
@@ -3433,6 +3508,7 @@
VK_COLOR_SPACE_ADOBERGB_LINEAR_EXT = 1000104011,
VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT = 1000104012,
VK_COLOR_SPACE_PASS_THROUGH_EXT = 1000104013,
+ VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT = 1000104014,
VK_COLOR_SPACE_BEGIN_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
VK_COLOR_SPACE_END_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
VK_COLOR_SPACE_RANGE_SIZE_KHR = (VK_COLOR_SPACE_SRGB_NONLINEAR_KHR - VK_COLOR_SPACE_SRGB_NONLINEAR_KHR + 1),
@@ -4774,6 +4850,62 @@
int* pFd);
#endif
+#define VK_KHR_maintenance2 1
+#define VK_KHR_MAINTENANCE2_SPEC_VERSION 1
+#define VK_KHR_MAINTENANCE2_EXTENSION_NAME "VK_KHR_maintenance2"
+
+
+typedef enum VkPointClippingBehaviorKHR {
+ VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR = 0,
+ VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY_KHR = 1,
+ VK_POINT_CLIPPING_BEHAVIOR_BEGIN_RANGE_KHR = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR,
+ VK_POINT_CLIPPING_BEHAVIOR_END_RANGE_KHR = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY_KHR,
+ VK_POINT_CLIPPING_BEHAVIOR_RANGE_SIZE_KHR = (VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY_KHR - VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR + 1),
+ VK_POINT_CLIPPING_BEHAVIOR_MAX_ENUM_KHR = 0x7FFFFFFF
+} VkPointClippingBehaviorKHR;
+
+typedef enum VkTessellationDomainOriginKHR {
+ VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR = 0,
+ VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR = 1,
+ VK_TESSELLATION_DOMAIN_ORIGIN_BEGIN_RANGE_KHR = VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR,
+ VK_TESSELLATION_DOMAIN_ORIGIN_END_RANGE_KHR = VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR,
+ VK_TESSELLATION_DOMAIN_ORIGIN_RANGE_SIZE_KHR = (VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR - VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR + 1),
+ VK_TESSELLATION_DOMAIN_ORIGIN_MAX_ENUM_KHR = 0x7FFFFFFF
+} VkTessellationDomainOriginKHR;
+
+typedef struct VkPhysicalDevicePointClippingPropertiesKHR {
+ VkStructureType sType;
+ void* pNext;
+ VkPointClippingBehaviorKHR pointClippingBehavior;
+} VkPhysicalDevicePointClippingPropertiesKHR;
+
+typedef struct VkInputAttachmentAspectReferenceKHR {
+ uint32_t subpass;
+ uint32_t inputAttachmentIndex;
+ VkImageAspectFlags aspectMask;
+} VkInputAttachmentAspectReferenceKHR;
+
+typedef struct VkRenderPassInputAttachmentAspectCreateInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ uint32_t aspectReferenceCount;
+ const VkInputAttachmentAspectReferenceKHR* pAspectReferences;
+} VkRenderPassInputAttachmentAspectCreateInfoKHR;
+
+typedef struct VkImageViewUsageCreateInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ VkImageUsageFlags usage;
+} VkImageViewUsageCreateInfoKHR;
+
+typedef struct VkPipelineTessellationDomainOriginStateCreateInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ VkTessellationDomainOriginKHR domainOrigin;
+} VkPipelineTessellationDomainOriginStateCreateInfoKHR;
+
+
+
#define VK_KHR_get_surface_capabilities2 1
#define VK_KHR_GET_SURFACE_CAPABILITIES_2_SPEC_VERSION 1
#define VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME "VK_KHR_get_surface_capabilities2"
@@ -4827,7 +4959,7 @@
#define VK_KHR_dedicated_allocation 1
-#define VK_KHR_DEDICATED_ALLOCATION_SPEC_VERSION 1
+#define VK_KHR_DEDICATED_ALLOCATION_SPEC_VERSION 3
#define VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME "VK_KHR_dedicated_allocation"
typedef struct VkMemoryDedicatedRequirementsKHR {
@@ -4851,6 +4983,11 @@
#define VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME "VK_KHR_storage_buffer_storage_class"
+#define VK_KHR_relaxed_block_layout 1
+#define VK_KHR_RELAXED_BLOCK_LAYOUT_SPEC_VERSION 1
+#define VK_KHR_RELAXED_BLOCK_LAYOUT_EXTENSION_NAME "VK_KHR_relaxed_block_layout"
+
+
#define VK_KHR_get_memory_requirements2 1
#define VK_KHR_GET_MEMORY_REQUIREMENTS_2_SPEC_VERSION 1
#define VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME "VK_KHR_get_memory_requirements2"
@@ -4908,6 +5045,152 @@
VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements);
#endif
+#define VK_KHR_image_format_list 1
+#define VK_KHR_IMAGE_FORMAT_LIST_SPEC_VERSION 1
+#define VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME "VK_KHR_image_format_list"
+
+typedef struct VkImageFormatListCreateInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ uint32_t viewFormatCount;
+ const VkFormat* pViewFormats;
+} VkImageFormatListCreateInfoKHR;
+
+
+
+#define VK_KHR_sampler_ycbcr_conversion 1
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSamplerYcbcrConversionKHR)
+
+#define VK_KHR_SAMPLER_YCBCR_CONVERSION_SPEC_VERSION 1
+#define VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME "VK_KHR_sampler_ycbcr_conversion"
+
+
+typedef enum VkSamplerYcbcrModelConversionKHR {
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY_KHR = 0,
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY_KHR = 1,
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR = 2,
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR = 3,
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR = 4,
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_BEGIN_RANGE_KHR = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY_KHR,
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_END_RANGE_KHR = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR,
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_RANGE_SIZE_KHR = (VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR - VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY_KHR + 1),
+ VK_SAMPLER_YCBCR_MODEL_CONVERSION_MAX_ENUM_KHR = 0x7FFFFFFF
+} VkSamplerYcbcrModelConversionKHR;
+
+typedef enum VkSamplerYcbcrRangeKHR {
+ VK_SAMPLER_YCBCR_RANGE_ITU_FULL_KHR = 0,
+ VK_SAMPLER_YCBCR_RANGE_ITU_NARROW_KHR = 1,
+ VK_SAMPLER_YCBCR_RANGE_BEGIN_RANGE_KHR = VK_SAMPLER_YCBCR_RANGE_ITU_FULL_KHR,
+ VK_SAMPLER_YCBCR_RANGE_END_RANGE_KHR = VK_SAMPLER_YCBCR_RANGE_ITU_NARROW_KHR,
+ VK_SAMPLER_YCBCR_RANGE_RANGE_SIZE_KHR = (VK_SAMPLER_YCBCR_RANGE_ITU_NARROW_KHR - VK_SAMPLER_YCBCR_RANGE_ITU_FULL_KHR + 1),
+ VK_SAMPLER_YCBCR_RANGE_MAX_ENUM_KHR = 0x7FFFFFFF
+} VkSamplerYcbcrRangeKHR;
+
+typedef enum VkChromaLocationKHR {
+ VK_CHROMA_LOCATION_COSITED_EVEN_KHR = 0,
+ VK_CHROMA_LOCATION_MIDPOINT_KHR = 1,
+ VK_CHROMA_LOCATION_BEGIN_RANGE_KHR = VK_CHROMA_LOCATION_COSITED_EVEN_KHR,
+ VK_CHROMA_LOCATION_END_RANGE_KHR = VK_CHROMA_LOCATION_MIDPOINT_KHR,
+ VK_CHROMA_LOCATION_RANGE_SIZE_KHR = (VK_CHROMA_LOCATION_MIDPOINT_KHR - VK_CHROMA_LOCATION_COSITED_EVEN_KHR + 1),
+ VK_CHROMA_LOCATION_MAX_ENUM_KHR = 0x7FFFFFFF
+} VkChromaLocationKHR;
+
+typedef struct VkSamplerYcbcrConversionCreateInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ VkFormat format;
+ VkSamplerYcbcrModelConversionKHR ycbcrModel;
+ VkSamplerYcbcrRangeKHR ycbcrRange;
+ VkComponentMapping components;
+ VkChromaLocationKHR xChromaOffset;
+ VkChromaLocationKHR yChromaOffset;
+ VkFilter chromaFilter;
+ VkBool32 forceExplicitReconstruction;
+} VkSamplerYcbcrConversionCreateInfoKHR;
+
+typedef struct VkSamplerYcbcrConversionInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ VkSamplerYcbcrConversionKHR conversion;
+} VkSamplerYcbcrConversionInfoKHR;
+
+typedef struct VkBindImagePlaneMemoryInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ VkImageAspectFlagBits planeAspect;
+} VkBindImagePlaneMemoryInfoKHR;
+
+typedef struct VkImagePlaneMemoryRequirementsInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ VkImageAspectFlagBits planeAspect;
+} VkImagePlaneMemoryRequirementsInfoKHR;
+
+typedef struct VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR {
+ VkStructureType sType;
+ void* pNext;
+ VkBool32 samplerYcbcrConversion;
+} VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR;
+
+typedef struct VkSamplerYcbcrConversionImageFormatPropertiesKHR {
+ VkStructureType sType;
+ void* pNext;
+ uint32_t combinedImageSamplerDescriptorCount;
+} VkSamplerYcbcrConversionImageFormatPropertiesKHR;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkCreateSamplerYcbcrConversionKHR)(VkDevice device, const VkSamplerYcbcrConversionCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSamplerYcbcrConversionKHR* pYcbcrConversion);
+typedef void (VKAPI_PTR *PFN_vkDestroySamplerYcbcrConversionKHR)(VkDevice device, VkSamplerYcbcrConversionKHR ycbcrConversion, const VkAllocationCallbacks* pAllocator);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateSamplerYcbcrConversionKHR(
+ VkDevice device,
+ const VkSamplerYcbcrConversionCreateInfoKHR* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkSamplerYcbcrConversionKHR* pYcbcrConversion);
+
+VKAPI_ATTR void VKAPI_CALL vkDestroySamplerYcbcrConversionKHR(
+ VkDevice device,
+ VkSamplerYcbcrConversionKHR ycbcrConversion,
+ const VkAllocationCallbacks* pAllocator);
+#endif
+
+#define VK_KHR_bind_memory2 1
+#define VK_KHR_BIND_MEMORY_2_SPEC_VERSION 1
+#define VK_KHR_BIND_MEMORY_2_EXTENSION_NAME "VK_KHR_bind_memory2"
+
+typedef struct VkBindBufferMemoryInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ VkBuffer buffer;
+ VkDeviceMemory memory;
+ VkDeviceSize memoryOffset;
+} VkBindBufferMemoryInfoKHR;
+
+typedef struct VkBindImageMemoryInfoKHR {
+ VkStructureType sType;
+ const void* pNext;
+ VkImage image;
+ VkDeviceMemory memory;
+ VkDeviceSize memoryOffset;
+} VkBindImageMemoryInfoKHR;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory2KHR)(VkDevice device, uint32_t bindInfoCount, const VkBindBufferMemoryInfoKHR* pBindInfos);
+typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory2KHR)(VkDevice device, uint32_t bindInfoCount, const VkBindImageMemoryInfoKHR* pBindInfos);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory2KHR(
+ VkDevice device,
+ uint32_t bindInfoCount,
+ const VkBindBufferMemoryInfoKHR* pBindInfos);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory2KHR(
+ VkDevice device,
+ uint32_t bindInfoCount,
+ const VkBindImageMemoryInfoKHR* pBindInfos);
+#endif
+
#define VK_EXT_debug_report 1
VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT)
@@ -4951,10 +5234,12 @@
VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT = 30,
VK_DEBUG_REPORT_OBJECT_TYPE_OBJECT_TABLE_NVX_EXT = 31,
VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT = 32,
+ VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = 33,
VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = 1000085000,
+ VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR_EXT = 1000156000,
VK_DEBUG_REPORT_OBJECT_TYPE_BEGIN_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,
- VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT,
- VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = (VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT - VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1),
+ VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT,
+ VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = (VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT - VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1),
VK_DEBUG_REPORT_OBJECT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF
} VkDebugReportObjectTypeEXT;
@@ -4979,7 +5264,6 @@
const char* pMessage,
void* pUserData);
-
typedef struct VkDebugReportCallbackCreateInfoEXT {
VkStructureType sType;
const void* pNext;
@@ -5021,6 +5305,11 @@
#define VK_NV_GLSL_SHADER_EXTENSION_NAME "VK_NV_glsl_shader"
+#define VK_EXT_depth_range_unrestricted 1
+#define VK_EXT_DEPTH_RANGE_UNRESTRICTED_SPEC_VERSION 1
+#define VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME "VK_EXT_depth_range_unrestricted"
+
+
#define VK_IMG_filter_cubic 1
#define VK_IMG_FILTER_CUBIC_SPEC_VERSION 1
#define VK_IMG_FILTER_CUBIC_EXTENSION_NAME "VK_IMG_filter_cubic"
@@ -5088,31 +5377,31 @@
} VkDebugMarkerMarkerInfoEXT;
-typedef VkResult (VKAPI_PTR *PFN_vkDebugMarkerSetObjectTagEXT)(VkDevice device, VkDebugMarkerObjectTagInfoEXT* pTagInfo);
-typedef VkResult (VKAPI_PTR *PFN_vkDebugMarkerSetObjectNameEXT)(VkDevice device, VkDebugMarkerObjectNameInfoEXT* pNameInfo);
-typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerBeginEXT)(VkCommandBuffer commandBuffer, VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+typedef VkResult (VKAPI_PTR *PFN_vkDebugMarkerSetObjectTagEXT)(VkDevice device, const VkDebugMarkerObjectTagInfoEXT* pTagInfo);
+typedef VkResult (VKAPI_PTR *PFN_vkDebugMarkerSetObjectNameEXT)(VkDevice device, const VkDebugMarkerObjectNameInfoEXT* pNameInfo);
+typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerBeginEXT)(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerEndEXT)(VkCommandBuffer commandBuffer);
-typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerInsertEXT)(VkCommandBuffer commandBuffer, VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerInsertEXT)(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkDebugMarkerSetObjectTagEXT(
VkDevice device,
- VkDebugMarkerObjectTagInfoEXT* pTagInfo);
+ const VkDebugMarkerObjectTagInfoEXT* pTagInfo);
VKAPI_ATTR VkResult VKAPI_CALL vkDebugMarkerSetObjectNameEXT(
VkDevice device,
- VkDebugMarkerObjectNameInfoEXT* pNameInfo);
+ const VkDebugMarkerObjectNameInfoEXT* pNameInfo);
VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerBeginEXT(
VkCommandBuffer commandBuffer,
- VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+ const VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerEndEXT(
VkCommandBuffer commandBuffer);
VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerInsertEXT(
VkCommandBuffer commandBuffer,
- VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+ const VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
#endif
#define VK_AMD_gcn_shader 1
@@ -5199,6 +5488,11 @@
+#define VK_AMD_shader_image_load_store_lod 1
+#define VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_SPEC_VERSION 1
+#define VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME "VK_AMD_shader_image_load_store_lod"
+
+
#define VK_KHX_multiview 1
#define VK_KHX_MULTIVIEW_SPEC_VERSION 1
#define VK_KHX_MULTIVIEW_EXTENSION_NAME "VK_KHX_multiview"
@@ -5350,9 +5644,9 @@
#endif /* VK_USE_PLATFORM_WIN32_KHR */
#define VK_KHX_device_group 1
-#define VK_MAX_DEVICE_GROUP_SIZE_KHX 32
-#define VK_KHX_DEVICE_GROUP_SPEC_VERSION 1
+#define VK_KHX_DEVICE_GROUP_SPEC_VERSION 2
#define VK_KHX_DEVICE_GROUP_EXTENSION_NAME "VK_KHX_device_group"
+#define VK_MAX_DEVICE_GROUP_SIZE_KHX 32
typedef enum VkPeerMemoryFeatureFlagBitsKHX {
@@ -5386,28 +5680,6 @@
uint32_t deviceMask;
} VkMemoryAllocateFlagsInfoKHX;
-typedef struct VkBindBufferMemoryInfoKHX {
- VkStructureType sType;
- const void* pNext;
- VkBuffer buffer;
- VkDeviceMemory memory;
- VkDeviceSize memoryOffset;
- uint32_t deviceIndexCount;
- const uint32_t* pDeviceIndices;
-} VkBindBufferMemoryInfoKHX;
-
-typedef struct VkBindImageMemoryInfoKHX {
- VkStructureType sType;
- const void* pNext;
- VkImage image;
- VkDeviceMemory memory;
- VkDeviceSize memoryOffset;
- uint32_t deviceIndexCount;
- const uint32_t* pDeviceIndices;
- uint32_t SFRRectCount;
- const VkRect2D* pSFRRects;
-} VkBindImageMemoryInfoKHX;
-
typedef struct VkDeviceGroupRenderPassBeginInfoKHX {
VkStructureType sType;
const void* pNext;
@@ -5440,6 +5712,22 @@
uint32_t memoryDeviceIndex;
} VkDeviceGroupBindSparseInfoKHX;
+typedef struct VkBindBufferMemoryDeviceGroupInfoKHX {
+ VkStructureType sType;
+ const void* pNext;
+ uint32_t deviceIndexCount;
+ const uint32_t* pDeviceIndices;
+} VkBindBufferMemoryDeviceGroupInfoKHX;
+
+typedef struct VkBindImageMemoryDeviceGroupInfoKHX {
+ VkStructureType sType;
+ const void* pNext;
+ uint32_t deviceIndexCount;
+ const uint32_t* pDeviceIndices;
+ uint32_t SFRRectCount;
+ const VkRect2D* pSFRRects;
+} VkBindImageMemoryDeviceGroupInfoKHX;
+
typedef struct VkDeviceGroupPresentCapabilitiesKHX {
VkStructureType sType;
const void* pNext;
@@ -5486,14 +5774,12 @@
typedef void (VKAPI_PTR *PFN_vkGetDeviceGroupPeerMemoryFeaturesKHX)(VkDevice device, uint32_t heapIndex, uint32_t localDeviceIndex, uint32_t remoteDeviceIndex, VkPeerMemoryFeatureFlagsKHX* pPeerMemoryFeatures);
-typedef VkResult (VKAPI_PTR *PFN_vkBindBufferMemory2KHX)(VkDevice device, uint32_t bindInfoCount, const VkBindBufferMemoryInfoKHX* pBindInfos);
-typedef VkResult (VKAPI_PTR *PFN_vkBindImageMemory2KHX)(VkDevice device, uint32_t bindInfoCount, const VkBindImageMemoryInfoKHX* pBindInfos);
typedef void (VKAPI_PTR *PFN_vkCmdSetDeviceMaskKHX)(VkCommandBuffer commandBuffer, uint32_t deviceMask);
+typedef void (VKAPI_PTR *PFN_vkCmdDispatchBaseKHX)(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
typedef VkResult (VKAPI_PTR *PFN_vkGetDeviceGroupPresentCapabilitiesKHX)(VkDevice device, VkDeviceGroupPresentCapabilitiesKHX* pDeviceGroupPresentCapabilities);
typedef VkResult (VKAPI_PTR *PFN_vkGetDeviceGroupSurfacePresentModesKHX)(VkDevice device, VkSurfaceKHR surface, VkDeviceGroupPresentModeFlagsKHX* pModes);
-typedef VkResult (VKAPI_PTR *PFN_vkAcquireNextImage2KHX)(VkDevice device, const VkAcquireNextImageInfoKHX* pAcquireInfo, uint32_t* pImageIndex);
-typedef void (VKAPI_PTR *PFN_vkCmdDispatchBaseKHX)(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDevicePresentRectanglesKHX)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t* pRectCount, VkRect2D* pRects);
+typedef VkResult (VKAPI_PTR *PFN_vkAcquireNextImage2KHX)(VkDevice device, const VkAcquireNextImageInfoKHX* pAcquireInfo, uint32_t* pImageIndex);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR void VKAPI_CALL vkGetDeviceGroupPeerMemoryFeaturesKHX(
@@ -5503,34 +5789,10 @@
uint32_t remoteDeviceIndex,
VkPeerMemoryFeatureFlagsKHX* pPeerMemoryFeatures);
-VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory2KHX(
- VkDevice device,
- uint32_t bindInfoCount,
- const VkBindBufferMemoryInfoKHX* pBindInfos);
-
-VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory2KHX(
- VkDevice device,
- uint32_t bindInfoCount,
- const VkBindImageMemoryInfoKHX* pBindInfos);
-
VKAPI_ATTR void VKAPI_CALL vkCmdSetDeviceMaskKHX(
VkCommandBuffer commandBuffer,
uint32_t deviceMask);
-VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceGroupPresentCapabilitiesKHX(
- VkDevice device,
- VkDeviceGroupPresentCapabilitiesKHX* pDeviceGroupPresentCapabilities);
-
-VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceGroupSurfacePresentModesKHX(
- VkDevice device,
- VkSurfaceKHR surface,
- VkDeviceGroupPresentModeFlagsKHX* pModes);
-
-VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHX(
- VkDevice device,
- const VkAcquireNextImageInfoKHX* pAcquireInfo,
- uint32_t* pImageIndex);
-
VKAPI_ATTR void VKAPI_CALL vkCmdDispatchBaseKHX(
VkCommandBuffer commandBuffer,
uint32_t baseGroupX,
@@ -5540,11 +5802,25 @@
uint32_t groupCountY,
uint32_t groupCountZ);
+VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceGroupPresentCapabilitiesKHX(
+ VkDevice device,
+ VkDeviceGroupPresentCapabilitiesKHX* pDeviceGroupPresentCapabilities);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceGroupSurfacePresentModesKHX(
+ VkDevice device,
+ VkSurfaceKHR surface,
+ VkDeviceGroupPresentModeFlagsKHX* pModes);
+
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDevicePresentRectanglesKHX(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pRectCount,
VkRect2D* pRects);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHX(
+ VkDevice device,
+ const VkAcquireNextImageInfoKHX* pAcquireInfo,
+ uint32_t* pImageIndex);
#endif
#define VK_EXT_validation_flags 1
@@ -5639,7 +5915,7 @@
VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkObjectTableNVX)
VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNVX)
-#define VK_NVX_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 1
+#define VK_NVX_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 3
#define VK_NVX_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME "VK_NVX_device_generated_commands"
@@ -5929,6 +6205,7 @@
#define VK_EXT_display_surface_counter 1
#define VK_EXT_DISPLAY_SURFACE_COUNTER_SPEC_VERSION 1
#define VK_EXT_DISPLAY_SURFACE_COUNTER_EXTENSION_NAME "VK_EXT_display_surface_counter"
+#define VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT
typedef enum VkSurfaceCounterFlagBitsEXT {
@@ -6204,7 +6481,7 @@
#endif
#define VK_EXT_swapchain_colorspace 1
-#define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 2
+#define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 3
#define VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME "VK_EXT_swapchain_colorspace"
@@ -6328,6 +6605,96 @@
#define VK_AMD_GPU_SHADER_INT16_EXTENSION_NAME "VK_AMD_gpu_shader_int16"
+#define VK_AMD_mixed_attachment_samples 1
+#define VK_AMD_MIXED_ATTACHMENT_SAMPLES_SPEC_VERSION 1
+#define VK_AMD_MIXED_ATTACHMENT_SAMPLES_EXTENSION_NAME "VK_AMD_mixed_attachment_samples"
+
+
+#define VK_AMD_shader_fragment_mask 1
+#define VK_AMD_SHADER_FRAGMENT_MASK_SPEC_VERSION 1
+#define VK_AMD_SHADER_FRAGMENT_MASK_EXTENSION_NAME "VK_AMD_shader_fragment_mask"
+
+
+#define VK_EXT_shader_stencil_export 1
+#define VK_EXT_SHADER_STENCIL_EXPORT_SPEC_VERSION 1
+#define VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME "VK_EXT_shader_stencil_export"
+
+
+#define VK_EXT_sample_locations 1
+#define VK_EXT_SAMPLE_LOCATIONS_SPEC_VERSION 1
+#define VK_EXT_SAMPLE_LOCATIONS_EXTENSION_NAME "VK_EXT_sample_locations"
+
+typedef struct VkSampleLocationEXT {
+ float x;
+ float y;
+} VkSampleLocationEXT;
+
+typedef struct VkSampleLocationsInfoEXT {
+ VkStructureType sType;
+ const void* pNext;
+ VkSampleCountFlagBits sampleLocationsPerPixel;
+ VkExtent2D sampleLocationGridSize;
+ uint32_t sampleLocationsCount;
+ const VkSampleLocationEXT* pSampleLocations;
+} VkSampleLocationsInfoEXT;
+
+typedef struct VkAttachmentSampleLocationsEXT {
+ uint32_t attachmentIndex;
+ VkSampleLocationsInfoEXT sampleLocationsInfo;
+} VkAttachmentSampleLocationsEXT;
+
+typedef struct VkSubpassSampleLocationsEXT {
+ uint32_t subpassIndex;
+ VkSampleLocationsInfoEXT sampleLocationsInfo;
+} VkSubpassSampleLocationsEXT;
+
+typedef struct VkRenderPassSampleLocationsBeginInfoEXT {
+ VkStructureType sType;
+ const void* pNext;
+ uint32_t attachmentInitialSampleLocationsCount;
+ const VkAttachmentSampleLocationsEXT* pAttachmentInitialSampleLocations;
+ uint32_t postSubpassSampleLocationsCount;
+ const VkSubpassSampleLocationsEXT* pPostSubpassSampleLocations;
+} VkRenderPassSampleLocationsBeginInfoEXT;
+
+typedef struct VkPipelineSampleLocationsStateCreateInfoEXT {
+ VkStructureType sType;
+ const void* pNext;
+ VkBool32 sampleLocationsEnable;
+ VkSampleLocationsInfoEXT sampleLocationsInfo;
+} VkPipelineSampleLocationsStateCreateInfoEXT;
+
+typedef struct VkPhysicalDeviceSampleLocationsPropertiesEXT {
+ VkStructureType sType;
+ void* pNext;
+ VkSampleCountFlags sampleLocationSampleCounts;
+ VkExtent2D maxSampleLocationGridSize;
+ float sampleLocationCoordinateRange[2];
+ uint32_t sampleLocationSubPixelBits;
+ VkBool32 variableSampleLocations;
+} VkPhysicalDeviceSampleLocationsPropertiesEXT;
+
+typedef struct VkMultisamplePropertiesEXT {
+ VkStructureType sType;
+ void* pNext;
+ VkExtent2D maxSampleLocationGridSize;
+} VkMultisamplePropertiesEXT;
+
+
+typedef void (VKAPI_PTR *PFN_vkCmdSetSampleLocationsEXT)(VkCommandBuffer commandBuffer, const VkSampleLocationsInfoEXT* pSampleLocationsInfo);
+typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT)(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples, VkMultisamplePropertiesEXT* pMultisampleProperties);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR void VKAPI_CALL vkCmdSetSampleLocationsEXT(
+ VkCommandBuffer commandBuffer,
+ const VkSampleLocationsInfoEXT* pSampleLocationsInfo);
+
+VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMultisamplePropertiesEXT(
+ VkPhysicalDevice physicalDevice,
+ VkSampleCountFlagBits samples,
+ VkMultisamplePropertiesEXT* pMultisampleProperties);
+#endif
+
#define VK_EXT_blend_operation_advanced 1
#define VK_EXT_BLEND_OPERATION_ADVANCED_SPEC_VERSION 2
#define VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME "VK_EXT_blend_operation_advanced"
@@ -6421,6 +6788,102 @@
#define VK_NV_FILL_RECTANGLE_EXTENSION_NAME "VK_NV_fill_rectangle"
+#define VK_EXT_post_depth_coverage 1
+#define VK_EXT_POST_DEPTH_COVERAGE_SPEC_VERSION 1
+#define VK_EXT_POST_DEPTH_COVERAGE_EXTENSION_NAME "VK_EXT_post_depth_coverage"
+
+
+#define VK_EXT_validation_cache 1
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkValidationCacheEXT)
+
+#define VK_EXT_VALIDATION_CACHE_SPEC_VERSION 1
+#define VK_EXT_VALIDATION_CACHE_EXTENSION_NAME "VK_EXT_validation_cache"
+
+
+typedef enum VkValidationCacheHeaderVersionEXT {
+ VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT = 1,
+ VK_VALIDATION_CACHE_HEADER_VERSION_BEGIN_RANGE_EXT = VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT,
+ VK_VALIDATION_CACHE_HEADER_VERSION_END_RANGE_EXT = VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT,
+ VK_VALIDATION_CACHE_HEADER_VERSION_RANGE_SIZE_EXT = (VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT - VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT + 1),
+ VK_VALIDATION_CACHE_HEADER_VERSION_MAX_ENUM_EXT = 0x7FFFFFFF
+} VkValidationCacheHeaderVersionEXT;
+
+typedef VkFlags VkValidationCacheCreateFlagsEXT;
+
+typedef struct VkValidationCacheCreateInfoEXT {
+ VkStructureType sType;
+ const void* pNext;
+ VkValidationCacheCreateFlagsEXT flags;
+ size_t initialDataSize;
+ const void* pInitialData;
+} VkValidationCacheCreateInfoEXT;
+
+typedef struct VkShaderModuleValidationCacheCreateInfoEXT {
+ VkStructureType sType;
+ const void* pNext;
+ VkValidationCacheEXT validationCache;
+} VkShaderModuleValidationCacheCreateInfoEXT;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkCreateValidationCacheEXT)(VkDevice device, const VkValidationCacheCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkValidationCacheEXT* pValidationCache);
+typedef void (VKAPI_PTR *PFN_vkDestroyValidationCacheEXT)(VkDevice device, VkValidationCacheEXT validationCache, const VkAllocationCallbacks* pAllocator);
+typedef VkResult (VKAPI_PTR *PFN_vkMergeValidationCachesEXT)(VkDevice device, VkValidationCacheEXT dstCache, uint32_t srcCacheCount, const VkValidationCacheEXT* pSrcCaches);
+typedef VkResult (VKAPI_PTR *PFN_vkGetValidationCacheDataEXT)(VkDevice device, VkValidationCacheEXT validationCache, size_t* pDataSize, void* pData);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateValidationCacheEXT(
+ VkDevice device,
+ const VkValidationCacheCreateInfoEXT* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkValidationCacheEXT* pValidationCache);
+
+VKAPI_ATTR void VKAPI_CALL vkDestroyValidationCacheEXT(
+ VkDevice device,
+ VkValidationCacheEXT validationCache,
+ const VkAllocationCallbacks* pAllocator);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkMergeValidationCachesEXT(
+ VkDevice device,
+ VkValidationCacheEXT dstCache,
+ uint32_t srcCacheCount,
+ const VkValidationCacheEXT* pSrcCaches);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkGetValidationCacheDataEXT(
+ VkDevice device,
+ VkValidationCacheEXT validationCache,
+ size_t* pDataSize,
+ void* pData);
+#endif
+
+#define VK_EXT_shader_viewport_index_layer 1
+#define VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_SPEC_VERSION 1
+#define VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME "VK_EXT_shader_viewport_index_layer"
+
+
+#define VK_EXT_global_priority 1
+#define VK_EXT_GLOBAL_PRIORITY_SPEC_VERSION 1
+#define VK_EXT_GLOBAL_PRIORITY_EXTENSION_NAME "VK_EXT_global_priority"
+
+
+typedef enum VkQueueGlobalPriorityEXT {
+ VK_QUEUE_GLOBAL_PRIORITY_LOW = 128,
+ VK_QUEUE_GLOBAL_PRIORITY_MEDIUM = 256,
+ VK_QUEUE_GLOBAL_PRIORITY_HIGH = 512,
+ VK_QUEUE_GLOBAL_PRIORITY_REALTIME = 1024,
+ VK_QUEUE_GLOBAL_PRIORITY_BEGIN_RANGE_EXT = VK_QUEUE_GLOBAL_PRIORITY_LOW,
+ VK_QUEUE_GLOBAL_PRIORITY_END_RANGE_EXT = VK_QUEUE_GLOBAL_PRIORITY_REALTIME,
+ VK_QUEUE_GLOBAL_PRIORITY_RANGE_SIZE_EXT = (VK_QUEUE_GLOBAL_PRIORITY_REALTIME - VK_QUEUE_GLOBAL_PRIORITY_LOW + 1),
+ VK_QUEUE_GLOBAL_PRIORITY_MAX_ENUM_EXT = 0x7FFFFFFF
+} VkQueueGlobalPriorityEXT;
+
+typedef struct VkDeviceQueueGlobalPriorityCreateInfoEXT {
+ VkStructureType sType;
+ const void* pNext;
+ VkQueueGlobalPriorityEXT globalPriority;
+} VkDeviceQueueGlobalPriorityCreateInfoEXT;
+
+
+
#ifdef __cplusplus
}
#endif
diff -Nru mesa-17.2.4/m4/ax_check_compile_flag.m4 mesa-17.3.3/m4/ax_check_compile_flag.m4
--- mesa-17.2.4/m4/ax_check_compile_flag.m4 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/m4/ax_check_compile_flag.m4 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,74 @@
+# ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
+#
+# DESCRIPTION
+#
+# Check whether the given FLAG works with the current language's compiler
+# or gives an error. (Warnings, however, are ignored)
+#
+# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
+# success/failure.
+#
+# If EXTRA-FLAGS is defined, it is added to the current language's default
+# flags (e.g. CFLAGS) when the check is done. The check is thus made with
+# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
+# force the compiler to issue an error when a bad flag is given.
+#
+# INPUT gives an alternative input source to AC_COMPILE_IFELSE.
+#
+# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
+# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Guido U. Draheim
+# Copyright (c) 2011 Maarten Bosmans
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see .
+#
+# As a special exception, the respective Autoconf Macro's copyright owner
+# gives unlimited permission to copy, distribute and modify the configure
+# scripts that are the output of Autoconf when processing the Macro. You
+# need not follow the terms of the GNU General Public License when using
+# or distributing such scripts, even though portions of the text of the
+# Macro appear in them. The GNU General Public License (GPL) does govern
+# all other use of the material that constitutes the Autoconf Macro.
+#
+# This special exception to the GPL applies to versions of the Autoconf
+# Macro released by the Autoconf Archive. When you make and distribute a
+# modified version of the Autoconf Macro, you may extend this special
+# exception to the GPL to apply to your modified version as well.
+
+#serial 3
+
+AC_DEFUN([AX_CHECK_COMPILE_FLAG],
+[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
+AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
+AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
+ ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
+ _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+ AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
+ [AS_VAR_SET(CACHEVAR,[yes])],
+ [AS_VAR_SET(CACHEVAR,[no])])
+ _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
+AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
+ [m4_default([$2], :)],
+ [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl AX_CHECK_COMPILE_FLAGS
diff -Nru mesa-17.2.4/Makefile.am mesa-17.3.3/Makefile.am
--- mesa-17.2.4/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -54,9 +54,11 @@
common.py \
docs \
doxygen \
- bin/git_sha1_gen.sh \
+ bin/git_sha1_gen.py \
scons \
- SConstruct
+ SConstruct \
+ build-support/conftest.dyn \
+ build-support/conftest.map
noinst_HEADERS = \
include/c99_alloca.h \
diff -Nru mesa-17.2.4/Makefile.in mesa-17.3.3/Makefile.in
--- mesa-17.2.4/Makefile.in 2017-10-30 14:49:58.000000000 +0000
+++ mesa-17.3.3/Makefile.in 2018-01-18 21:30:38.000000000 +0000
@@ -112,7 +112,8 @@
target_triplet = @target@
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -379,9 +380,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -435,6 +436,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -446,12 +449,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -568,9 +574,11 @@
common.py \
docs \
doxygen \
- bin/git_sha1_gen.sh \
+ bin/git_sha1_gen.py \
scons \
- SConstruct
+ SConstruct \
+ build-support/conftest.dyn \
+ build-support/conftest.map
noinst_HEADERS = \
include/c99_alloca.h \
diff -Nru mesa-17.2.4/meson.build mesa-17.3.3/meson.build
--- mesa-17.2.4/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,815 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+project('mesa', ['c', 'cpp'], version : '17.3.0-devel', license : 'MIT',
+ default_options : ['c_std=c99', 'cpp_std=c++11'])
+
+error('The meson build is unsupported for building mesa 17.3.x releases.')
+
+# Arguments for the preprocessor, put these in a separate array from the C and
+# C++ (cpp in meson terminology) arguments since they need to be added to the
+# default arguments for both C and C++.
+pre_args = [
+ '-D__STDC_CONSTANT_MACROS',
+ '-D__STDC_FORMAT_MACROS',
+ '-D__STDC_LIMIT_MACROS',
+ '-DVERSION="@0@"'.format(meson.project_version()),
+ '-DPACKAGE_VERSION=VERSION',
+ '-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa"',
+ '-D_GNU_SOURCE',
+]
+
+with_vulkan_icd_dir = get_option('vulkan-icd-dir')
+with_tests = get_option('build-tests')
+with_valgrind = get_option('valgrind')
+with_asm = get_option('asm')
+with_llvm = get_option('llvm')
+if get_option('texture-float')
+ pre_args += '-DTEXTURE_FLOAT_ENABLED'
+ message('WARNING: Floating-point texture enabled. Please consult docs/patents.txt and your lawyer before building mesa.')
+endif
+
+# XXX: yeah, do these
+with_appledri = false
+with_windowsdri = false
+
+dri_drivers_path = get_option('dri-drivers-path')
+if dri_drivers_path == ''
+ dri_drivers_path = join_paths(get_option('libdir'), 'dri')
+endif
+
+with_gles1 = get_option('gles1')
+with_gles2 = get_option('gles2')
+with_opengl = get_option('opengl')
+with_any_opengl = with_opengl or with_gles1 or with_gles2
+# Only build shared_glapi if at least one OpenGL API is enabled
+with_shared_glapi = get_option('shared-glapi') and with_any_opengl
+
+# TODO: these will need options, but at the moment they just control header
+# installs
+with_osmesa = false
+
+# shared-glapi is required if at least two OpenGL APIs are being built
+if not with_shared_glapi
+ if ((with_gles1 and with_gles2) or (with_gles1 and with_opengl)
+ or (with_gles2 and with_opengl))
+ error('shared-glapi required for building two or more of OpenGL, OpenGL ES 1.x, OpenGL ES 2.x')
+ endif
+endif
+
+# We require OpenGL for OpenGL ES
+if (with_gles1 or with_gles2) and not with_opengl
+ error('building OpenGL ES without OpenGL is not supported.')
+endif
+
+with_dri = false
+with_dri_i915 = false
+with_dri_i965 = false
+with_dri_swrast = false
+_drivers = get_option('dri-drivers')
+if _drivers != ''
+ _split = _drivers.split(',')
+ with_dri_i915 = _split.contains('i915')
+ with_dri_i965 = _split.contains('i965')
+ with_dri_swrast = _split.contains('swrast')
+ with_dri = true
+endif
+
+with_gallium = false
+with_gallium_pl111 = false
+with_gallium_radeonsi = false
+with_gallium_nouveau = false
+with_gallium_softpipe = false
+with_gallium_vc4 = false
+with_gallium_vc5 = false
+_drivers = get_option('gallium-drivers')
+if _drivers != ''
+ _split = _drivers.split(',')
+ with_gallium_pl111 = _split.contains('pl111')
+ with_gallium_radeonsi = _split.contains('radeonsi')
+ with_gallium_nouveau = _split.contains('nouveau')
+ with_gallium_softpipe = _split.contains('swrast')
+ with_gallium_vc4 = _split.contains('vc4')
+ with_gallium_vc5 = _split.contains('vc5')
+ with_gallium = true
+ with_dri = true
+endif
+
+if not (with_dri or with_gallium)
+ with_gles1 = false
+ with_gles2 = false
+ with_opengl = false
+ with_any_opengl = false
+ with_shared_glapi = false
+endif
+
+if with_dri_swrast and with_gallium_softpipe
+ error('Only one swrast provider can be built')
+endif
+
+dep_libdrm_intel = []
+if with_dri_i915
+ dep_libdrm_intel = dependency('libdrm_intel', version : '>= 2.4.75')
+endif
+
+# TODO: other OSes
+with_dri_platform = 'drm'
+
+# TODO: android platform
+with_platform_wayland = false
+with_platform_x11 = false
+with_platform_drm = false
+with_platform_surfaceless = false
+egl_native_platform = ''
+_platforms = get_option('platforms')
+if _platforms != ''
+ _split = _platforms.split(',')
+ with_platform_x11 = _split.contains('x11')
+ with_platform_wayland = _split.contains('wayland')
+ with_platform_drm = _split.contains('drm')
+ with_platform_surfaceless = _split.contains('surfaceless')
+ egl_native_platform = _split[0]
+endif
+
+with_gbm = get_option('gbm')
+if with_gbm == 'auto' and with_dri # TODO: or gallium
+ with_gbm = host_machine.system() == 'linux'
+elif with_gbm == 'yes'
+ if not ['linux', 'bsd'].contains(host_machine.system())
+ error('GBM only supports unix-like platforms')
+ endif
+ with_gbm = true
+else
+ with_gbm = false
+endif
+
+_egl = get_option('egl')
+if _egl == 'auto'
+ with_egl = with_dri and with_shared_glapi and egl_native_platform != ''
+elif _egl == 'yes'
+ if not with_dri
+ error('EGL requires dri')
+ elif not with_shared_glapi
+ error('EGL requires shared-glapi')
+ elif egl_native_platform == ''
+ error('No platforms specified, consider -Dplatforms=drm,x11 at least')
+ endif
+ with_egl = true
+else
+ with_egl = false
+endif
+
+# TODO: or virgl
+if with_egl and with_gallium_radeonsi and not (with_platform_drm or with_platform_surfaceless)
+ error('RadeonSI requires drm or surfaceless platform when using EGL')
+endif
+
+pre_args += '-DGLX_USE_TLS'
+with_glx = get_option('glx')
+if with_glx != 'disabled'
+ if not (with_platform_x11 and with_any_opengl)
+ if with_glx == 'auto'
+ with_glx = 'disabled'
+ else
+ error('Cannot build GLX support without X11 platform support and at least one OpenGL API')
+ endif
+ elif with_glx == 'gallium-xlib'
+ if not with_gallium
+ error('Gallium-xlib based GLX requires at least one gallium driver')
+ elif with_dri
+ error('gallium-xlib conflicts with any dri driver')
+ endif
+ elif with_glx == 'dri' and not with_dri
+ error('dri based GLX requires at least one DRI driver')
+ elif with_glx == 'auto'
+ if with_dri
+ with_glx = 'dri'
+ elif with_gallium
+ with_glx = 'gallium-xlib'
+ elif with_platform_x11 and with_any_opengl
+ with_glx = 'xlib'
+ else
+ with_glx = 'disabled'
+ endif
+ endif
+endif
+
+with_glvnd = get_option('glvnd')
+if with_glvnd and with_glx != 'dri'
+ message('glvnd requires dri based glx')
+endif
+
+# TODO: toggle for this
+with_glx_direct = true
+
+if with_vulkan_icd_dir == ''
+ with_vulkan_icd_dir = join_paths(get_option('datadir'), 'vulkan/icd.d')
+endif
+
+with_intel_vk = false
+with_amd_vk = false
+with_any_vk = false
+_vulkan_drivers = get_option('vulkan-drivers')
+if _vulkan_drivers != ''
+ _split = _vulkan_drivers.split(',')
+ with_intel_vk = _split.contains('intel')
+ with_amd_vk = _split.contains('amd')
+ with_any_vk = with_amd_vk or with_intel_vk
+ if not (with_platform_x11 or with_platform_wayland)
+ error('Vulkan requires at least one platform (x11, wayland)')
+ endif
+endif
+
+with_dri2 = (with_dri or with_any_vk) and with_dri_platform == 'drm'
+with_dri3 = get_option('dri3')
+if with_dri3 == 'auto'
+ if host_machine.system() == 'linux' and with_dri2
+ with_dri3 = true
+ else
+ with_dri3 = false
+ endif
+elif with_dri3 == 'yes'
+ with_dri3 = true
+else
+ with_dri3 = false
+endif
+
+if with_any_vk and (with_platform_x11 and not with_dri3)
+ error('Vulkan drivers require dri3 for X11 support')
+endif
+if with_dri or with_gallium
+ if with_glx == 'disabled' and not with_egl
+ error('building dri or gallium drivers require at least one window system')
+ endif
+endif
+
+with_gallium_xvmc = false
+with_gallium_vdpau = false
+with_gallium_omx = false # this is bellagio
+with_gallium_va = false
+with_gallium_media = false
+dep_va = []
+_drivers = get_option('gallium-media')
+if _drivers != ''
+ _split = _drivers.split(',')
+ with_gallium_xvmc = _split.contains('xvmc')
+ with_gallium_vdpau = _split.contains('vdpau')
+ with_gallium_omx = _split.contains('omx')
+ with_gallium_va = _split.contains('va')
+ with_gallium_media = (with_gallium_xvmc or with_gallium_vdpau or
+ with_gallium_omx or with_gallium_va)
+endif
+
+gl_pkgconfig_c_flags = []
+if with_platform_x11
+ if with_any_vk or (with_glx == 'dri' and with_dri_platform == 'drm')
+ pre_args += '-DHAVE_X11_PLATFORM'
+ endif
+ if with_glx == 'xlib'
+ # TODO
+ error('TODO')
+ elif with_glx == 'gallium-xlib'
+ # TODO
+ error('TODO')
+ else
+ pre_args += '-DGLX_INDIRECT_RENDERING'
+ if with_glx_direct
+ pre_args += '-DGLX_DIRECT_RENDERING'
+ endif
+ if with_dri_platform == 'drm'
+ pre_args += '-DGLX_USE_DRM'
+ endif
+ endif
+else
+ pre_args += '-DMESA_EGL_NO_X11_HEADERS'
+ gl_pkgconfig_c_flags += '-DMESA_EGL_NO_X11_HEADERS'
+endif
+if with_platform_drm
+ if with_egl and not with_gbm
+ error('EGL drm platform requires gbm')
+ endif
+ pre_args += '-DHAVE_DRM_PLATFORM'
+endif
+if with_platform_surfaceless
+ pre_args += '-DHAVE_SURFACELESS_PLATFORM'
+endif
+
+prog_python2 = find_program('python2')
+has_mako = run_command(prog_python2, '-c', 'import mako')
+if has_mako.returncode() != 0
+ error('Python (2.x) mako module required to build mesa.')
+endif
+
+cc = meson.get_compiler('c')
+if cc.get_id() == 'gcc' and cc.version().version_compare('< 4.4.6')
+ error('When using GCC, version 4.4.6 or later is required.')
+endif
+
+# Define DEBUG for debug and debugoptimized builds
+if get_option('buildtype').startswith('debug')
+ pre_args += '-DDEBUG'
+endif
+
+if get_option('shader-cache')
+ pre_args += '-DENABLE_SHADER_CACHE'
+elif with_amd_vk
+ error('Radv requires shader cache support')
+endif
+
+# Check for GCC style builtins
+foreach b : ['bswap32', 'bswap64', 'clz', 'clzll', 'ctz', 'expect', 'ffs',
+ 'ffsll', 'popcount', 'popcountll', 'unreachable']
+ if cc.has_function(b)
+ pre_args += '-DHAVE___BUILTIN_@0@'.format(b.to_upper())
+ endif
+endforeach
+
+# check for GCC __attribute__
+foreach a : ['const', 'flatten', 'malloc', 'pure', 'unused',
+ 'warn_unused_result', 'weak',]
+ if cc.compiles('int foo(void) __attribute__((@0@));'.format(a),
+ name : '__attribute__((@0@))'.format(a))
+ pre_args += '-DHAVE_FUNC_ATTRIBUTE_@0@'.format(a.to_upper())
+ endif
+endforeach
+if cc.compiles('int foo(const char *p, ...) __attribute__((format(printf, 1, 2)));',
+ name : '__attribute__((format(...)))')
+ pre_args += '-DHAVE_FUNC_ATTRIBUTE_FORMAT'
+endif
+if cc.compiles('struct __attribute__((packed)) foo { int bar; };',
+ name : '__attribute__((packed))')
+ pre_args += '-DHAVE_FUNC_ATTRIBUTE_PACKED'
+endif
+if cc.compiles('int *foo(void) __attribute__((returns_nonnull));',
+ name : '__attribute__((returns_nonnull))')
+ pre_args += '-DHAVE_FUNC_ATTRIBUTE_NONNULL'
+endif
+if cc.compiles('''int foo_def(void) __attribute__((visibility("default")));
+ int foo_hid(void) __attribute__((visibility("hidden")));
+ int foo_int(void) __attribute__((visibility("internal")));
+ int foo_pro(void) __attribute__((visibility("protected")));''',
+ name : '__attribute__((visibility(...)))')
+ pre_args += '-DHAVE_FUNC_ATTRIBUTE_VISBILITY'
+endif
+if cc.compiles('int foo(void) { return 0; } int bar(void) __attribute__((alias("foo")));',
+ name : '__attribute__((alias(...)))')
+ pre_args += '-DHAVE_FUNC_ATTRIBUTE_ALIAS'
+endif
+
+# TODO: this is very incomplete
+if host_machine.system() == 'linux'
+ pre_args += '-D_GNU_SOURCE'
+endif
+
+# Check for generic C arguments
+c_args = []
+foreach a : ['-Wall', '-Werror=implicit-function-declaration',
+ '-Werror=missing-prototypes', '-fno-math-errno',
+ '-fno-trapping-math', '-Qunused-arguments']
+ if cc.has_argument(a)
+ c_args += a
+ endif
+endforeach
+c_vis_args = []
+if cc.has_argument('-fvisibility=hidden')
+ c_vis_args += '-fvisibility=hidden'
+endif
+
+# Check for generic C++ arguments
+cpp = meson.get_compiler('cpp')
+cpp_args = []
+foreach a : ['-Wall', '-fno-math-errno', '-fno-trapping-math',
+ '-Qunused-arguments', '-Wno-non-virtual-dtor']
+ if cpp.has_argument(a)
+ cpp_args += a
+ endif
+endforeach
+cpp_vis_args = []
+if cpp.has_argument('-fvisibility=hidden')
+ cpp_vis_args += '-fvisibility=hidden'
+endif
+
+# Check for C and C++ arguments for MSVC2013 compatibility. These are only used
+# in parts of the mesa code base that need to compile with old versions of
+# MSVC, mainly common code
+c_msvc_compat_args = []
+cpp_msvc_compat_args = []
+foreach a : ['-Werror=pointer-arith', '-Werror=vla']
+ if cc.has_argument(a)
+ c_msvc_compat_args += a
+ endif
+ if cpp.has_argument(a)
+ cpp_msvc_compat_args += a
+ endif
+endforeach
+
+no_override_init_args = []
+foreach a : ['-Wno-override-init', '-Wno-initializer-overrides']
+ if cc.has_argument(a)
+ no_override_init_args += a
+ endif
+endforeach
+
+# TODO: SSE41 (which is only required for core mesa)
+
+# Check for GCC style atomics
+if cc.compiles('int main() { int n; return __atomic_load_n(&n, __ATOMIC_ACQUIRE); }',
+ name : 'GCC atomic builtins')
+ pre_args += '-DUSE_GCC_ATOMIC_BUILTINS'
+endif
+if not cc.links('''#include
+ uint64_t v;
+ int main() {
+ return __sync_add_and_fetch(&v, (uint64_t)1);
+ }''',
+ name : 'GCC 64bit atomics')
+ pre_args += '-DMISSING_64_BIT_ATOMICS'
+endif
+
+# TODO: endian
+# TODO: powr8
+# TODO: shared/static? Is this even worth doing?
+
+# I don't think that I need to set any of the debug stuff, I think meson
+# handles that for us
+
+# TODO: ldflags
+
+# TODO: texture-float (gallium/mesa only)
+
+# TODO: cross-compiling. I don't think this is relavent to meson
+
+# FIXME: enable asm when cross compiler
+# This is doable (autotools does it), but it's not of immediate concern
+if meson.is_cross_build()
+ message('Cross compiling, disabling asm')
+ with_asm = false
+endif
+
+with_asm_arch = ''
+if with_asm
+ # TODO: SPARC and PPC
+ if host_machine.cpu_family() == 'x86'
+ if ['linux', 'bsd'].contains(host_machine.system()) # FIXME: hurd?
+ with_asm_arch = 'x86'
+ pre_args += ['-DUSE_X86_ASM', '-DUSE_MMX_ASM', '-DUSE_3DNOW_ASM',
+ '-DUSE_SSE_ASM']
+ endif
+ elif host_machine.cpu_family() == 'x86_64'
+ if host_machine.system() == 'linux'
+ with_asm_arch = 'x86_64'
+ pre_args += ['-DUSE_X86_64_ASM']
+ endif
+ elif host_machine.cpu_family() == 'arm'
+ if host_machine.system() == 'linux'
+ with_asm_arch = 'arm'
+ pre_args += ['-DUSE_ARM_ASM']
+ endif
+ elif host_machine.cpu_family() == 'aarch64'
+ if host_machine.system() == 'linux'
+ with_asm_arch = 'aarch64'
+ pre_args += ['-DUSE_AARCH64_ASM']
+ endif
+ endif
+endif
+
+# Check for standard headers and functions
+if cc.has_header_symbol('sys/sysmacros.h', 'major')
+ pre_args += '-DMAJOR_IN_SYSMACROS'
+elif cc.has_header_symbol('sys/mkdev.h', 'major')
+ pre_args += '-DMAJOR_IN_MKDEV'
+endif
+
+foreach h : ['xlocale.h', 'sys/sysctl.h']
+ if cc.has_header(h)
+ pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
+ endif
+endforeach
+
+foreach f : ['strtof', 'mkostemp', 'posix_memalign']
+ if cc.has_function(f)
+ pre_args += '-DHAVE_@0@'.format(f.to_upper())
+ endif
+endforeach
+
+# strtod locale support
+if cc.links('''
+ #define _GNU_SOURCE
+ #include
+ #include
+ #ifdef HAVE_XLOCALE_H
+ #include
+ #endif
+ int main() {
+ locale_t loc = newlocale(LC_CTYPE_MASK, "C", NULL);
+ const char *s = "1.0";
+ char *end;
+ double d = strtod_l(s, end, loc);
+ float f = strtod_l(s, end, loc);
+ freelocale(loc);
+ return 0;
+ }''',
+ extra_args : pre_args,
+ name : 'strtod has locale support')
+ pre_args += '-DHAVE_STRTOD_L'
+endif
+
+# Check for some linker flags
+ld_args_bsymbolic = []
+if cc.links('int main() { return 0; }', args : '-Wl,-Bsymbolic', name : 'Bsymbolic')
+ ld_args_bsymbolic += '-Wl,-Bsymbolic'
+endif
+ld_args_gc_sections = []
+if cc.links('static char unused() { return 5; } int main() { return 0; }',
+ args : '-Wl,--gc-sections', name : 'gc-sections')
+ ld_args_gc_sections += '-Wl,--gc-sections'
+endif
+with_ld_version_script = false
+if cc.links('int main() { return 0; }',
+ args : '-Wl,--version-script=@0@'.format(
+ join_paths(meson.source_root(), 'build-support/conftest.map')),
+ name : 'version-script')
+ with_ld_version_script = true
+endif
+with_ld_dynamic_list = false
+if cc.links('int main() { return 0; }',
+ args : '-Wl,--dynamic-list=@0@'.format(
+ join_paths(meson.source_root(), 'build-support/conftest.dyn')),
+ name : 'dynamic-list')
+ with_ld_dynamic_list = true
+endif
+
+# check for dl support
+if cc.has_function('dlopen')
+ dep_dl = []
+else
+ dep_dl = cc.find_library('dl')
+endif
+if cc.has_function('dladdr', dependencies : dep_dl)
+ # This is really only required for megadrivers
+ pre_args += '-DHAVE_DLADDR'
+endif
+
+if cc.has_function('dl_iterate_phdr')
+ pre_args += '-DHAVE_DL_ITERATE_PHDR'
+else
+ # TODO: this is required for vulkan
+endif
+
+# Determine whether or not the rt library is needed for time functions
+if cc.has_function('clock_gettime')
+ dep_clock = []
+else
+ dep_clock = cc.find_library('rt')
+endif
+
+with_gallium_drisw_kms = false
+dep_libdrm = dependency('libdrm', version : '>= 2.4.75',
+ required : with_dri2 or with_dri3)
+if dep_libdrm.found()
+ pre_args += '-DHAVE_LIBDRM'
+ if with_dri_platform == 'drm' and with_dri
+ with_gallium_drisw_kms = true
+ endif
+endif
+
+# TODO: some of these may be conditional
+dep_zlib = dependency('zlib', version : '>= 1.2.3')
+dep_thread = dependency('threads')
+if dep_thread.found() and host_machine.system() == 'linux'
+ pre_args += '-DHAVE_PTHREAD'
+endif
+dep_elf = dependency('libelf', required : false)
+if not dep_elf.found() and (with_amd_vk or with_gallium_radeonsi) # TODO: clover, r600
+ dep_elf = cc.find_library('elf')
+endif
+dep_expat = dependency('expat')
+# this only exists on linux so either this is linux and it will be found, or
+# its not linux and and wont
+dep_m = cc.find_library('m', required : false)
+
+dep_libdrm_amdgpu = []
+dep_libdrm_radeon = []
+dep_libdrm_nouveau = []
+if with_amd_vk or with_gallium_radeonsi
+ dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.85')
+endif
+if with_gallium_radeonsi # older radeon too
+ dep_libdrm_radeon = dependency('libdrm_radeon', version : '>= 2.4.71')
+endif
+if with_gallium_nouveau
+ dep_libdrm_nouveau = dependency('libdrm_nouveau', version : '>= 2.4.66')
+endif
+
+llvm_modules = ['bitwriter', 'engine', 'mcdisassembler', 'mcjit']
+if with_amd_vk
+ llvm_modules += ['amdgpu', 'bitreader', 'ipo']
+endif
+dep_llvm = dependency(
+ 'llvm', version : '>= 3.9.0', required : with_amd_vk, modules : llvm_modules,
+)
+if with_llvm
+ if dep_llvm.found()
+ _llvm_version = dep_llvm.version().split('.')
+ # Development versions of LLVM have an 'svn' suffix, we don't want that for
+ # our version checks.
+ _llvm_patch = _llvm_version[2]
+ if _llvm_patch.endswith('svn')
+ _llvm_patch = _llvm_patch.split('s')[0]
+ endif
+ pre_args += [
+ '-DHAVE_LLVM=0x0@0@@1@@2@'.format(_llvm_version[0], _llvm_version[1], _llvm_patch),
+ '-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
+ ]
+ else
+ if with_gallium_softpipe
+ error('Cannot find LLVM to build LLVMPipe. If you wanted softpipe pass -Dllvm=false to meson')
+ elif with_amd_vk or with_gallium_radeonsi # etc
+ error('The following drivers requires LLVM: Radv, RadeonSI. One of these is enabled, but LLVM was not found.')
+ endif
+ endif
+elif with_amd_vk or with_gallium_radeonsi
+ error('The following drivers requires LLVM: Radv, RadeonSI. One of these is enabled, but LLVM is disabled.')
+endif
+
+dep_glvnd = []
+if with_glvnd
+ dep_glvnd = dependency('libglvnd', version : '>= 0.2.0')
+ pre_args += '-DUSE_LIBGLVND=1'
+endif
+
+# TODO: make this conditional
+dep_valgrind = dependency('valgrind', required : false)
+if dep_valgrind.found() and with_valgrind
+ pre_args += '-DHAVE_VALGRIND'
+endif
+
+# pthread stubs. Lets not and say we didn't
+
+prog_bison = find_program('bison', required : with_any_opengl)
+prog_flex = find_program('flex', required : with_any_opengl)
+
+# TODO: selinux
+dep_selinux = []
+
+# TODO: llvm-prefix and llvm-shared-libs
+
+dep_unwind = dependency('libunwind', required : false)
+if dep_unwind.found()
+ pre_args += '-DHAVE_LIBUNWIND'
+endif
+
+# TODO: flags for opengl, gles, dri
+
+# TODO: gallium-hud
+
+# TODO: glx provider
+
+# TODO: osmesa provider
+
+# TODO: symbol mangling
+
+if with_platform_wayland
+ prog_wl_scanner = find_program('wayland-scanner')
+ dep_wl_protocols = dependency('wayland-protocols', version : '>= 1.8')
+ dep_wayland_client = dependency('wayland-client', version : '>=1.11')
+ dep_wayland_server = dependency('wayland-server', version : '>=1.11')
+ wayland_dmabuf_xml = join_paths(
+ dep_wl_protocols.get_pkgconfig_variable('pkgdatadir'), 'unstable',
+ 'linux-dmabuf', 'linux-dmabuf-unstable-v1.xml'
+ )
+ pre_args += ['-DHAVE_WAYLAND_PLATFORM', '-DWL_HIDE_DEPRECATED']
+else
+ prog_wl_scanner = []
+ dep_wl_protocols = []
+ dep_wayland_client = []
+ dep_wayland_server = []
+ wayland_dmabuf_xml = ''
+endif
+
+dep_x11 = []
+dep_xext = []
+dep_xdamage = []
+dep_xfixes = []
+dep_x11_xcb = []
+dep_xcb_glx = []
+dep_xcb_dri2 = []
+dep_xcb_dri3 = []
+dep_dri2proto = []
+dep_glproto = []
+dep_xf86vm = []
+dep_xcb_dri3 = []
+dep_xcb_present = []
+dep_xcb_sync = []
+dep_xcb_xfixes = []
+dep_xshmfence = []
+if with_platform_x11
+ if with_glx == 'dri' and with_dri_platform == 'drm'
+ dep_x11 = dependency('x11')
+ dep_xext = dependency('xext')
+ dep_xdamage = dependency('xdamage', version : '>= 1.1')
+ dep_xfixes = dependency('xfixes')
+ dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1')
+ dep_xf86vm = dependency('xxf86vm', required : false)
+ endif
+ if with_any_vk or (with_glx == 'dri' and with_dri_platform == 'drm')
+ dep_xcb = dependency('xcb')
+ dep_x11_xcb = dependency('x11-xcb')
+ dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8')
+
+ if with_dri3
+ pre_args += '-DHAVE_DRI3'
+ dep_xcb_dri3 = dependency('xcb-dri3')
+ dep_xcb_present = dependency('xcb-present')
+ dep_xcb_sync = dependency('xcb-sync')
+ dep_xshmfence = dependency('xshmfence', version : '>= 1.1')
+ endif
+ endif
+ if with_glx != 'disabled'
+ dep_dri2proto = dependency('dri2proto', version : '>= 2.8')
+ dep_glproto = dependency('glproto', version : '>= 1.4.14')
+ endif
+ if with_egl
+ dep_xcb_xfixes = dependency('xcb-xfixes')
+ endif
+endif
+
+# TODO: osmesa
+
+# TODO: vallium G3DVL
+
+# TODO: nine
+
+# TODO: clover
+
+# TODO: gallium tests
+
+# TODO: various libdirs
+
+# TODO: swr
+
+# TODO: gallium driver dirs
+
+# FIXME: this is a workaround for #2326
+prog_touch = find_program('touch')
+dummy_cpp = custom_target(
+ 'dummy_cpp',
+ output : 'dummy.cpp',
+ command : [prog_touch, '@OUTPUT@'],
+)
+
+foreach a : pre_args
+ add_project_arguments(a, language : ['c', 'cpp'])
+endforeach
+foreach a : c_args
+ add_project_arguments(a, language : ['c'])
+endforeach
+foreach a : cpp_args
+ add_project_arguments(a, language : ['cpp'])
+endforeach
+
+inc_include = include_directories('include')
+
+gl_priv_reqs = [
+ 'x11', 'xext', 'xdamage >= 1.1', 'xfixes', 'x11-xcb', 'xcb',
+ 'xcb-glx >= 1.8.1', 'libdrm >= 2.4.75',
+]
+if dep_xf86vm != [] and dep_xf86vm.found()
+ gl_priv_reqs += 'xf86vm'
+endif
+if with_dri_platform == 'drm'
+ gl_priv_reqs += 'xcb-dri2 >= 1.8'
+endif
+
+gl_priv_libs = []
+if dep_thread.found()
+ gl_priv_libs += ['-lpthread', '-pthread']
+endif
+if dep_m.found()
+ gl_priv_libs += '-lm'
+endif
+if dep_dl.found()
+ gl_priv_libs += '-ldl'
+endif
+
+pkg = import('pkgconfig')
+
+subdir('include')
+subdir('src')
diff -Nru mesa-17.2.4/meson_options.txt mesa-17.3.3/meson_options.txt
--- mesa-17.2.4/meson_options.txt 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/meson_options.txt 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,156 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+option(
+ 'platforms',
+ type : 'string',
+ value : 'x11,wayland,drm,surfaceless',
+ description : 'comma separated list of window systems to support. wayland, x11, surfaceless, drm, etc.'
+)
+option(
+ 'dri3',
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'yes', 'no'],
+ description : 'enable support for dri3'
+)
+option(
+ 'dri-drivers',
+ type : 'string',
+ value : 'i915,i965',
+ description : 'comma separated list of dri drivers to build.'
+)
+option(
+ 'dri-drivers-path',
+ type : 'string',
+ value : '',
+ description : 'Location of dri drivers. Default: $libdir/dri.'
+)
+option(
+ 'gallium-drivers',
+ type : 'string',
+ value : 'pl111,radeonsi,nouveau,swrast,vc4',
+ description : 'comma separated list of gallium drivers to build.'
+)
+option(
+ 'gallium-media',
+ type : 'string',
+ value : '',
+ description : 'comma separated list of gallium media APIs to build (omx,va,vdpau,xvmc).'
+)
+option(
+ 'vulkan-drivers',
+ type : 'string',
+ value : 'intel,amd',
+ description : 'comma separated list of vulkan drivers to build.'
+)
+option(
+ 'shader-cache',
+ type : 'boolean',
+ value : true,
+ description : 'Build with on-disk shader cache support'
+)
+option(
+ 'vulkan-icd-dir',
+ type : 'string',
+ value : '',
+ description : 'Location relative to prefix to put vulkan icds on install. Default: $datadir/vulkan/icd.d'
+)
+option(
+ 'shared-glapi',
+ type : 'boolean',
+ value : true,
+ description : 'Whether to build a shared or static glapi'
+)
+option(
+ 'gles1',
+ type : 'boolean',
+ value : true,
+ description : 'Build support for OpenGL ES 1.x'
+)
+option(
+ 'gles2',
+ type : 'boolean',
+ value : true,
+ description : 'Build support for OpenGL ES 2.x and 3.x'
+)
+option(
+ 'opengl',
+ type : 'boolean',
+ value : true,
+ description : 'Build support for OpenGL (all versions)'
+)
+option(
+ 'gbm',
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'yes', 'no'],
+ description : 'Build support for gbm platform'
+)
+option(
+ 'glx',
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'disabled', 'dri', 'xlib', 'gallium-xlib'],
+ description : 'Build support for GLX platform'
+)
+option(
+ 'egl',
+ type : 'combo',
+ value : 'auto',
+ choices : ['auto', 'yes', 'no'],
+ description : 'Build support for EGL platform'
+)
+option(
+ 'glvnd',
+ type : 'boolean',
+ value : false,
+ description : 'Enable GLVND support.'
+)
+option(
+ 'asm',
+ type : 'boolean',
+ value : true,
+ description : 'Build assembly code if possible'
+)
+option(
+ 'llvm',
+ type : 'boolean',
+ value : true,
+ description : 'Build with LLVM support.'
+)
+option(
+ 'valgrind',
+ type : 'boolean',
+ value : true,
+ description : 'Build with valgrind support if possible'
+)
+option(
+ 'build-tests',
+ type : 'boolean',
+ value : false,
+ description : 'Build unit tests. Currently this will build *all* unit tests, which may build more than expected.'
+)
+option(
+ 'texture-float',
+ type : 'boolean',
+ value : false,
+ description : 'Enable floating point textures and renderbuffers. This option may be patent encumbered, please read docs/patents.txt and consult with your lawyer before turning this on.'
+)
diff -Nru mesa-17.2.4/scons/crossmingw.py mesa-17.3.3/scons/crossmingw.py
--- mesa-17.2.4/scons/crossmingw.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/scons/crossmingw.py 2018-01-18 21:30:28.000000000 +0000
@@ -83,7 +83,7 @@
no_import_lib = env.get('no_import_lib', 0)
if not dll:
- raise SCons.Errors.UserError, "A shared library should have exactly one target with the suffix: %s" % env.subst("$SHLIBSUFFIX")
+ raise SCons.Errors.UserError("A shared library should have exactly one target with the suffix: %s" % env.subst("$SHLIBSUFFIX"))
if not no_import_lib and \
not env.FindIxes(target, 'LIBPREFIX', 'LIBSUFFIX'):
diff -Nru mesa-17.2.4/scons/custom.py mesa-17.3.3/scons/custom.py
--- mesa-17.2.4/scons/custom.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/scons/custom.py 2018-01-18 21:30:28.000000000 +0000
@@ -113,7 +113,7 @@
finder = modulefinder.ModuleFinder(path=path)
finder.run_script(node.abspath)
results = []
- for name, mod in finder.modules.iteritems():
+ for name, mod in finder.modules.items():
if mod.__file__ is None:
continue
assert os.path.exists(mod.__file__)
@@ -189,7 +189,7 @@
except OSError:
return
prefix = name + '_'
- for flag_name, flag_value in flags.iteritems():
+ for flag_name, flag_value in flags.items():
assert '_' not in flag_name
env[prefix + flag_name] = flag_value
@@ -222,7 +222,7 @@
raise Exception('Attempt to use unavailable module %s' % name)
flags = {}
- for flag_name, flag_value in env.Dictionary().iteritems():
+ for flag_name, flag_value in env.Dictionary().items():
if flag_name.startswith(prefix):
flag_name = flag_name[len(prefix):]
if '_' not in flag_name:
@@ -262,7 +262,7 @@
symbols = names
else:
- symbols = sym_table.keys()
+ symbols = list(sym_table.keys())
# convert the symbol table to source lists
src_lists = {}
diff -Nru mesa-17.2.4/scons/dxsdk.py mesa-17.3.3/scons/dxsdk.py
--- mesa-17.2.4/scons/dxsdk.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/scons/dxsdk.py 2018-01-18 21:30:28.000000000 +0000
@@ -51,7 +51,7 @@
elif env['machine'] == 'x86_64':
target_cpu = 'x64'
else:
- raise SCons.Errors.InternalError, "Unsupported target machine"
+ raise SCons.Errors.InternalError("Unsupported target machine")
include_dir = os.path.join(dxsdk_root, 'Include')
lib_dir = os.path.join(dxsdk_root, 'Lib', target_cpu)
diff -Nru mesa-17.2.4/scons/gallium.py mesa-17.3.3/scons/gallium.py
--- mesa-17.2.4/scons/gallium.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/scons/gallium.py 2018-01-18 21:30:28.000000000 +0000
@@ -193,15 +193,15 @@
env.Tool(env['toolchain'])
# Allow override compiler and specify additional flags from environment
- if os.environ.has_key('CC'):
+ if 'CC' in os.environ:
env['CC'] = os.environ['CC']
- if os.environ.has_key('CFLAGS'):
+ if 'CFLAGS' in os.environ:
env['CCFLAGS'] += SCons.Util.CLVar(os.environ['CFLAGS'])
- if os.environ.has_key('CXX'):
+ if 'CXX' in os.environ:
env['CXX'] = os.environ['CXX']
- if os.environ.has_key('CXXFLAGS'):
+ if 'CXXFLAGS' in os.environ:
env['CXXFLAGS'] += SCons.Util.CLVar(os.environ['CXXFLAGS'])
- if os.environ.has_key('LDFLAGS'):
+ if 'LDFLAGS' in os.environ:
env['LINKFLAGS'] += SCons.Util.CLVar(os.environ['LDFLAGS'])
# Detect gcc/clang not by executable name, but through pre-defined macros
diff -Nru mesa-17.2.4/scons/llvm.py mesa-17.3.3/scons/llvm.py
--- mesa-17.2.4/scons/llvm.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/scons/llvm.py 2018-01-18 21:30:28.000000000 +0000
@@ -50,7 +50,7 @@
llvm_dir = None
else:
if not os.path.isdir(llvm_dir):
- raise SCons.Errors.InternalError, "Specified LLVM directory not found"
+ raise SCons.Errors.InternalError("Specified LLVM directory not found")
if env['debug']:
llvm_subdir = 'Debug'
@@ -61,7 +61,7 @@
if not os.path.isdir(llvm_bin_dir):
llvm_bin_dir = os.path.join(llvm_dir, 'bin')
if not os.path.isdir(llvm_bin_dir):
- raise SCons.Errors.InternalError, "LLVM binary directory not found"
+ raise SCons.Errors.InternalError("LLVM binary directory not found")
env.PrependENVPath('PATH', llvm_bin_dir)
diff -Nru mesa-17.2.4/SConstruct mesa-17.3.3/SConstruct
--- mesa-17.2.4/SConstruct 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/SConstruct 2018-01-18 21:30:28.000000000 +0000
@@ -152,8 +152,7 @@
except ImportError:
pass
else:
- aliases = default_ans.keys()
- aliases.sort()
+ aliases = sorted(default_ans.keys())
env.Help('\n')
env.Help('Recognized targets:\n')
for alias in aliases:
diff -Nru mesa-17.2.4/src/amd/addrlib/addrinterface.cpp mesa-17.3.3/src/amd/addrlib/addrinterface.cpp
--- mesa-17.2.4/src/amd/addrlib/addrinterface.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/addrlib/addrinterface.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -1675,3 +1675,37 @@
return returnCode;
}
+/**
+****************************************************************************************************
+* Addr2IsValidDisplaySwizzleMode
+*
+* @brief
+* Return whether the swizzle mode is supported by DCE / DCN.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
+ ADDR_HANDLE hLib,
+ AddrSwizzleMode swizzleMode,
+ UINT_32 bpp,
+ bool *result)
+{
+ ADDR_E_RETURNCODE returnCode;
+
+ V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+ if (pLib != NULL)
+ {
+ ADDR2_COMPUTE_SURFACE_INFO_INPUT in;
+ in.swizzleMode = swizzleMode;
+ in.bpp = bpp;
+
+ *result = pLib->IsValidDisplaySwizzleMode(&in);
+ returnCode = ADDR_OK;
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
diff -Nru mesa-17.2.4/src/amd/addrlib/addrinterface.h mesa-17.3.3/src/amd/addrlib/addrinterface.h
--- mesa-17.2.4/src/amd/addrlib/addrinterface.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/addrlib/addrinterface.h 2018-01-18 21:30:28.000000000 +0000
@@ -3653,6 +3653,20 @@
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut);
+/**
+****************************************************************************************************
+* Addr2IsValidDisplaySwizzleMode
+*
+* @brief
+* Return whether the swizzle mode is supported by DCE / DCN.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
+ ADDR_HANDLE hLib,
+ AddrSwizzleMode swizzleMode,
+ UINT_32 bpp,
+ bool *result);
+
#if defined(__cplusplus)
}
#endif
diff -Nru mesa-17.2.4/src/amd/addrlib/core/addrcommon.h mesa-17.3.3/src/amd/addrlib/core/addrcommon.h
--- mesa-17.2.4/src/amd/addrlib/core/addrcommon.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/addrlib/core/addrcommon.h 2018-01-18 21:30:28.000000000 +0000
@@ -36,22 +36,30 @@
#include "addrinterface.h"
-
// ADDR_LNX_KERNEL_BUILD is for internal build
// Moved from addrinterface.h so __KERNEL__ is not needed any more
#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__))
#include "lnx_common_defs.h" // ported from cmmqs
#elif !defined(__APPLE__) || defined(HAVE_TSERVER)
+ #include
#include
#include
#endif
+#if BRAHMA_BUILD && !defined(DEBUG)
+#ifdef NDEBUG
+#define DEBUG 0
+#else
+#define DEBUG 1
+#endif
+#endif
+
////////////////////////////////////////////////////////////////////////////////////////////////////
// Platform specific debug break defines
////////////////////////////////////////////////////////////////////////////////////////////////////
#if DEBUG
#if defined(__GNUC__)
- #define ADDR_DBG_BREAK()
+ #define ADDR_DBG_BREAK() assert(false)
#elif defined(__APPLE__)
#define ADDR_DBG_BREAK() { IOPanic("");}
#else
@@ -71,21 +79,21 @@
#define ADDR_ANALYSIS_ASSUME(expr) do { (void)(expr); } while (0)
#endif
-#if DEBUG
+#if BRAHMA_BUILD
+ #define ADDR_ASSERT(__e) assert(__e)
+#elif DEBUG
#define ADDR_ASSERT(__e) \
do { \
ADDR_ANALYSIS_ASSUME(__e); \
if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); } \
} while (0)
- #define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
- #define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
- #define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
#else //DEBUG
#define ADDR_ASSERT(__e) ADDR_ANALYSIS_ASSUME(__e)
- #define ADDR_ASSERT_ALWAYS()
- #define ADDR_UNHANDLED_CASE()
- #define ADDR_NOT_IMPLEMENTED()
#endif //DEBUG
+
+#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
+#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
+#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
diff -Nru mesa-17.2.4/src/amd/addrlib/core/addrlib2.h mesa-17.3.3/src/amd/addrlib/core/addrlib2.h
--- mesa-17.2.4/src/amd/addrlib/core/addrlib2.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/addrlib/core/addrlib2.h 2018-01-18 21:30:28.000000000 +0000
@@ -178,6 +178,13 @@
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;
+ virtual BOOL_32 IsValidDisplaySwizzleMode(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
+ {
+ ADDR_NOT_IMPLEMENTED();
+ return ADDR_NOTIMPLEMENTED;
+ }
+
protected:
Lib(); // Constructor is protected
Lib(const Client* pClient);
diff -Nru mesa-17.2.4/src/amd/addrlib/gfx9/gfx9addrlib.cpp mesa-17.3.3/src/amd/addrlib/gfx9/gfx9addrlib.cpp
--- mesa-17.2.4/src/amd/addrlib/gfx9/gfx9addrlib.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/addrlib/gfx9/gfx9addrlib.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -268,7 +268,8 @@
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
) const
{
- ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
+// TODO: Clarify with AddrLib team
+// ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
pIn->swizzleMode);
diff -Nru mesa-17.2.4/src/amd/addrlib/gfx9/gfx9addrlib.h mesa-17.3.3/src/amd/addrlib/gfx9/gfx9addrlib.h
--- mesa-17.2.4/src/amd/addrlib/gfx9/gfx9addrlib.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/addrlib/gfx9/gfx9addrlib.h 2018-01-18 21:30:28.000000000 +0000
@@ -97,6 +97,9 @@
return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL;
}
+ virtual BOOL_32 IsValidDisplaySwizzleMode(
+ const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
protected:
Gfx9Lib(const Client* pClient);
virtual ~Gfx9Lib();
@@ -405,8 +408,6 @@
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
- BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
-
Gfx9ChipSettings m_settings;
};
diff -Nru mesa-17.2.4/src/amd/addrlib/meson.build mesa-17.3.3/src/amd/addrlib/meson.build
--- mesa-17.2.4/src/amd/addrlib/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/addrlib/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,63 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_addrlib = files(
+ 'addrinterface.cpp',
+ 'addrinterface.h',
+ 'addrtypes.h',
+ 'core/addrcommon.h',
+ 'core/addrelemlib.cpp',
+ 'core/addrelemlib.h',
+ 'core/addrlib.cpp',
+ 'core/addrlib.h',
+ 'core/addrlib1.cpp',
+ 'core/addrlib1.h',
+ 'core/addrlib2.cpp',
+ 'core/addrlib2.h',
+ 'core/addrobject.cpp',
+ 'core/addrobject.h',
+ 'gfx9/chip/gfx9_enum.h',
+ 'gfx9/coord.cpp',
+ 'gfx9/coord.h',
+ 'gfx9/gfx9addrlib.cpp',
+ 'gfx9/gfx9addrlib.h',
+ 'gfx9/rbmap.cpp',
+ 'gfx9/rbmap.h',
+ 'inc/chip/gfx9/gfx9_gb_reg.h',
+ 'inc/chip/r800/si_gb_reg.h',
+ 'inc/lnx_common_defs.h',
+ 'r800/chip/si_ci_vi_merged_enum.h',
+ 'r800/ciaddrlib.cpp',
+ 'r800/ciaddrlib.h',
+ 'r800/egbaddrlib.cpp',
+ 'r800/egbaddrlib.h',
+ 'r800/siaddrlib.cpp',
+ 'r800/siaddrlib.h',
+)
+
+libamdgpu_addrlib = static_library(
+ 'addrlib',
+ files_addrlib,
+ include_directories : include_directories(
+ 'core', 'inc/chip/gfx9', 'inc/chip/r800', 'gfx9/chip', 'r800/chip',
+ '../common', '../../'),
+ cpp_args : [cpp_vis_args, '-DBRAHMA_BUILD=1'],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/amd/addrlib/r800/ciaddrlib.cpp mesa-17.3.3/src/amd/addrlib/r800/ciaddrlib.cpp
--- mesa-17.2.4/src/amd/addrlib/r800/ciaddrlib.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/addrlib/r800/ciaddrlib.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -747,7 +747,7 @@
SiLib::HwlComputeSurfaceInfo(&localIn, pOut);
- ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex));
+ ADDR_ASSERT(((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex)) || pOut->tileIndex == Depth1DThinIndex);
depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut);
}
diff -Nru mesa-17.2.4/src/amd/common/ac_binary.c mesa-17.3.3/src/amd/common/ac_binary.c
--- mesa-17.2.4/src/amd/common/ac_binary.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_binary.c 2018-01-18 21:30:28.000000000 +0000
@@ -252,6 +252,7 @@
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
case R_00B848_COMPUTE_PGM_RSRC1:
+ case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
conf->float_mode = G_00B028_FLOAT_MODE(value);
diff -Nru mesa-17.2.4/src/amd/common/ac_binary.h mesa-17.3.3/src/amd/common/ac_binary.h
--- mesa-17.2.4/src/amd/common/ac_binary.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_binary.h 2018-01-18 21:30:28.000000000 +0000
@@ -36,31 +36,32 @@
};
struct ac_shader_binary {
+ unsigned code_size;
+ unsigned config_size;
+ /** The number of bytes of config information for each global symbol.
+ */
+ unsigned config_size_per_symbol;
+ unsigned rodata_size;
+ unsigned global_symbol_count;
+ unsigned reloc_count;
+
/** Shader code */
unsigned char *code;
- unsigned code_size;
/** Config/Context register state that accompanies this shader.
* This is a stream of dword pairs. First dword contains the
* register address, the second dword contains the value.*/
unsigned char *config;
- unsigned config_size;
- /** The number of bytes of config information for each global symbol.
- */
- unsigned config_size_per_symbol;
/** Constant data accessed by the shader. This will be uploaded
* into a constant buffer. */
unsigned char *rodata;
- unsigned rodata_size;
/** List of symbol offsets for the shader */
uint64_t *global_symbol_offsets;
- unsigned global_symbol_count;
struct ac_shader_reloc *relocs;
- unsigned reloc_count;
/** Disassembled shader in a string. */
char *disasm_string;
diff -Nru mesa-17.2.4/src/amd/common/ac_debug.c mesa-17.3.3/src/amd/common/ac_debug.c
--- mesa-17.2.4/src/amd/common/ac_debug.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_debug.c 2018-01-18 21:30:28.000000000 +0000
@@ -26,11 +26,24 @@
#include "ac_debug.h"
+#ifdef HAVE_VALGRIND
+#include
+#include
+#define VG(x) x
+#else
+#define VG(x)
+#endif
+
+#include
+
#include "sid.h"
#include "gfx9d.h"
#include "sid_tables.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
+
+#include
/* Parsed IBs are difficult to read without colors. Use "less -R file" to
* read them, or use "aha -b -f file" to convert them to html.
@@ -43,6 +56,21 @@
#define INDENT_PKT 8
+struct ac_ib_parser {
+ FILE *f;
+ uint32_t *ib;
+ unsigned num_dw;
+ const int *trace_ids;
+ unsigned trace_id_count;
+ enum chip_class chip_class;
+ ac_debug_addr_callback addr_callback;
+ void *addr_callback_data;
+
+ unsigned cur_dw;
+};
+
+static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib);
+
static void print_spaces(FILE *f, unsigned num)
{
fprintf(f, "%*s", num, "");
@@ -75,65 +103,108 @@
print_value(file, value, bits);
}
-void ac_dump_reg(FILE *file, unsigned offset, uint32_t value,
- uint32_t field_mask)
+static const struct si_reg *find_register(const struct si_reg *table,
+ unsigned table_size,
+ unsigned offset)
{
- int r, f;
+ for (unsigned i = 0; i < table_size; i++) {
+ const struct si_reg *reg = &table[i];
- for (r = 0; r < ARRAY_SIZE(sid_reg_table); r++) {
- const struct si_reg *reg = &sid_reg_table[r];
- const char *reg_name = sid_strings + reg->name_offset;
+ if (reg->offset == offset)
+ return reg;
+ }
- if (reg->offset == offset) {
- bool first_field = true;
+ return NULL;
+}
- print_spaces(file, INDENT_PKT);
- fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ",
- reg_name);
-
- if (!reg->num_fields) {
- print_value(file, value, 32);
- return;
- }
+void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset,
+ uint32_t value, uint32_t field_mask)
+{
+ const struct si_reg *reg = NULL;
- for (f = 0; f < reg->num_fields; f++) {
- const struct si_field *field = sid_fields_table + reg->fields_offset + f;
- const int *values_offsets = sid_strings_offsets + field->values_offset;
- uint32_t val = (value & field->mask) >>
- (ffs(field->mask) - 1);
-
- if (!(field->mask & field_mask))
- continue;
-
- /* Indent the field. */
- if (!first_field)
- print_spaces(file,
- INDENT_PKT + strlen(reg_name) + 4);
-
- /* Print the field. */
- fprintf(file, "%s = ", sid_strings + field->name_offset);
-
- if (val < field->num_values && values_offsets[val] >= 0)
- fprintf(file, "%s\n", sid_strings + values_offsets[val]);
- else
- print_value(file, val,
- util_bitcount(field->mask));
+ if (chip_class >= GFX9)
+ reg = find_register(gfx9d_reg_table, ARRAY_SIZE(gfx9d_reg_table), offset);
+ if (!reg)
+ reg = find_register(sid_reg_table, ARRAY_SIZE(sid_reg_table), offset);
- first_field = false;
- }
+ if (reg) {
+ const char *reg_name = sid_strings + reg->name_offset;
+ bool first_field = true;
+
+ print_spaces(file, INDENT_PKT);
+ fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ",
+ reg_name);
+
+ if (!reg->num_fields) {
+ print_value(file, value, 32);
return;
}
+
+ for (unsigned f = 0; f < reg->num_fields; f++) {
+ const struct si_field *field = sid_fields_table + reg->fields_offset + f;
+ const int *values_offsets = sid_strings_offsets + field->values_offset;
+ uint32_t val = (value & field->mask) >>
+ (ffs(field->mask) - 1);
+
+ if (!(field->mask & field_mask))
+ continue;
+
+ /* Indent the field. */
+ if (!first_field)
+ print_spaces(file,
+ INDENT_PKT + strlen(reg_name) + 4);
+
+ /* Print the field. */
+ fprintf(file, "%s = ", sid_strings + field->name_offset);
+
+ if (val < field->num_values && values_offsets[val] >= 0)
+ fprintf(file, "%s\n", sid_strings + values_offsets[val]);
+ else
+ print_value(file, val,
+ util_bitcount(field->mask));
+
+ first_field = false;
+ }
+ return;
}
print_spaces(file, INDENT_PKT);
fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value);
}
-static void ac_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count,
- unsigned reg_offset)
+static uint32_t ac_ib_get(struct ac_ib_parser *ib)
+{
+ uint32_t v = 0;
+
+ if (ib->cur_dw < ib->num_dw) {
+ v = ib->ib[ib->cur_dw];
+#ifdef HAVE_VALGRIND
+ /* Help figure out where garbage data is written to IBs.
+ *
+ * Arguably we should do this already when the IBs are written,
+ * see RADEON_VALGRIND. The problem is that client-requests to
+ * Valgrind have an overhead even when Valgrind isn't running,
+ * and radeon_emit is performance sensitive...
+ */
+ if (VALGRIND_CHECK_VALUE_IS_DEFINED(v))
+ fprintf(ib->f, COLOR_RED "Valgrind: The next DWORD is garbage"
+ COLOR_RESET "\n");
+#endif
+ fprintf(ib->f, "\n\035#%08x ", v);
+ } else {
+ fprintf(ib->f, "\n\035#???????? ");
+ }
+
+ ib->cur_dw++;
+ return v;
+}
+
+static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset,
+ struct ac_ib_parser *ib)
{
- unsigned reg = ((ib[1] & 0xFFFF) << 2) + reg_offset;
- unsigned index = ib[1] >> 28;
+ unsigned reg_dw = ac_ib_get(ib);
+ unsigned reg = ((reg_dw & 0xFFFF) << 2) + reg_offset;
+ unsigned index = reg_dw >> 28;
int i;
if (index != 0) {
@@ -142,17 +213,16 @@
}
for (i = 0; i < count; i++)
- ac_dump_reg(f, reg + i*4, ib[2+i], ~0);
+ ac_dump_reg(f, ib->chip_class, reg + i*4, ac_ib_get(ib), ~0);
}
-static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
- int trace_id, enum chip_class chip_class,
- ac_debug_addr_callback addr_callback,
- void *addr_callback_data)
+static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
+ int *current_trace_id)
{
- unsigned count = PKT_COUNT_G(ib[0]);
- unsigned op = PKT3_IT_OPCODE_G(ib[0]);
- const char *predicate = PKT3_PREDICATE(ib[0]) ? "(predicate)" : "";
+ unsigned first_dw = ib->cur_dw;
+ int count = PKT_COUNT_G(header);
+ unsigned op = PKT3_IT_OPCODE_G(header);
+ const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : "";
int i;
/* Print the name first. */
@@ -179,180 +249,213 @@
/* Print the contents. */
switch (op) {
case PKT3_SET_CONTEXT_REG:
- ac_parse_set_reg_packet(f, ib, count, SI_CONTEXT_REG_OFFSET);
+ ac_parse_set_reg_packet(f, count, SI_CONTEXT_REG_OFFSET, ib);
break;
case PKT3_SET_CONFIG_REG:
- ac_parse_set_reg_packet(f, ib, count, SI_CONFIG_REG_OFFSET);
+ ac_parse_set_reg_packet(f, count, SI_CONFIG_REG_OFFSET, ib);
break;
case PKT3_SET_UCONFIG_REG:
- ac_parse_set_reg_packet(f, ib, count, CIK_UCONFIG_REG_OFFSET);
+ ac_parse_set_reg_packet(f, count, CIK_UCONFIG_REG_OFFSET, ib);
break;
case PKT3_SET_SH_REG:
- ac_parse_set_reg_packet(f, ib, count, SI_SH_REG_OFFSET);
+ ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib);
break;
case PKT3_ACQUIRE_MEM:
- ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0);
- ac_dump_reg(f, R_0301F4_CP_COHER_SIZE, ib[2], ~0);
- ac_dump_reg(f, R_030230_CP_COHER_SIZE_HI, ib[3], ~0);
- ac_dump_reg(f, R_0301F8_CP_COHER_BASE, ib[4], ~0);
- ac_dump_reg(f, R_0301E4_CP_COHER_BASE_HI, ib[5], ~0);
- print_named_value(f, "POLL_INTERVAL", ib[6], 16);
+ ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
+ print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
break;
case PKT3_SURFACE_SYNC:
- if (chip_class >= CIK) {
- ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0);
- ac_dump_reg(f, R_0301F4_CP_COHER_SIZE, ib[2], ~0);
- ac_dump_reg(f, R_0301F8_CP_COHER_BASE, ib[3], ~0);
+ if (ib->chip_class >= CIK) {
+ ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
} else {
- ac_dump_reg(f, R_0085F0_CP_COHER_CNTL, ib[1], ~0);
- ac_dump_reg(f, R_0085F4_CP_COHER_SIZE, ib[2], ~0);
- ac_dump_reg(f, R_0085F8_CP_COHER_BASE, ib[3], ~0);
+ ac_dump_reg(f, ib->chip_class, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
}
- print_named_value(f, "POLL_INTERVAL", ib[4], 16);
+ print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
break;
- case PKT3_EVENT_WRITE:
- ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
+ case PKT3_EVENT_WRITE: {
+ uint32_t event_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
- print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
- print_named_value(f, "INV_L2", (ib[1] >> 20) & 0x1, 1);
+ print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
+ print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1);
if (count > 0) {
- print_named_value(f, "ADDRESS_LO", ib[2], 32);
- print_named_value(f, "ADDRESS_HI", ib[3], 16);
+ print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 16);
}
break;
- case PKT3_EVENT_WRITE_EOP:
- ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
+ }
+ case PKT3_EVENT_WRITE_EOP: {
+ uint32_t event_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
- print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
- print_named_value(f, "TCL1_VOL_ACTION_ENA", (ib[1] >> 12) & 0x1, 1);
- print_named_value(f, "TC_VOL_ACTION_ENA", (ib[1] >> 13) & 0x1, 1);
- print_named_value(f, "TC_WB_ACTION_ENA", (ib[1] >> 15) & 0x1, 1);
- print_named_value(f, "TCL1_ACTION_ENA", (ib[1] >> 16) & 0x1, 1);
- print_named_value(f, "TC_ACTION_ENA", (ib[1] >> 17) & 0x1, 1);
- print_named_value(f, "ADDRESS_LO", ib[2], 32);
- print_named_value(f, "ADDRESS_HI", ib[3], 16);
- print_named_value(f, "DST_SEL", (ib[3] >> 16) & 0x3, 2);
- print_named_value(f, "INT_SEL", (ib[3] >> 24) & 0x7, 3);
- print_named_value(f, "DATA_SEL", ib[3] >> 29, 3);
- print_named_value(f, "DATA_LO", ib[4], 32);
- print_named_value(f, "DATA_HI", ib[5], 32);
+ print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
+ print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
+ print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1);
+ print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1);
+ print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1);
+ print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1);
+ print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
+ uint32_t addr_hi_dw = ac_ib_get(ib);
+ print_named_value(f, "ADDRESS_HI", addr_hi_dw, 16);
+ print_named_value(f, "DST_SEL", (addr_hi_dw >> 16) & 0x3, 2);
+ print_named_value(f, "INT_SEL", (addr_hi_dw >> 24) & 0x7, 3);
+ print_named_value(f, "DATA_SEL", addr_hi_dw >> 29, 3);
+ print_named_value(f, "DATA_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "DATA_HI", ac_ib_get(ib), 32);
break;
- case PKT3_RELEASE_MEM:
- ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
+ }
+ case PKT3_RELEASE_MEM: {
+ uint32_t event_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
- print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
- print_named_value(f, "TCL1_VOL_ACTION_ENA", (ib[1] >> 12) & 0x1, 1);
- print_named_value(f, "TC_VOL_ACTION_ENA", (ib[1] >> 13) & 0x1, 1);
- print_named_value(f, "TC_WB_ACTION_ENA", (ib[1] >> 15) & 0x1, 1);
- print_named_value(f, "TCL1_ACTION_ENA", (ib[1] >> 16) & 0x1, 1);
- print_named_value(f, "TC_ACTION_ENA", (ib[1] >> 17) & 0x1, 1);
- print_named_value(f, "TC_NC_ACTION_ENA", (ib[1] >> 19) & 0x1, 1);
- print_named_value(f, "TC_WC_ACTION_ENA", (ib[1] >> 20) & 0x1, 1);
- print_named_value(f, "TC_MD_ACTION_ENA", (ib[1] >> 21) & 0x1, 1);
- print_named_value(f, "DST_SEL", (ib[2] >> 16) & 0x3, 2);
- print_named_value(f, "INT_SEL", (ib[2] >> 24) & 0x7, 3);
- print_named_value(f, "DATA_SEL", ib[2] >> 29, 3);
- print_named_value(f, "ADDRESS_LO", ib[3], 32);
- print_named_value(f, "ADDRESS_HI", ib[4], 32);
- print_named_value(f, "DATA_LO", ib[5], 32);
- print_named_value(f, "DATA_HI", ib[6], 32);
- print_named_value(f, "CTXID", ib[7], 32);
+ print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
+ print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
+ print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1);
+ print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1);
+ print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1);
+ print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1);
+ print_named_value(f, "TC_NC_ACTION_ENA", (event_dw >> 19) & 0x1, 1);
+ print_named_value(f, "TC_WC_ACTION_ENA", (event_dw >> 20) & 0x1, 1);
+ print_named_value(f, "TC_MD_ACTION_ENA", (event_dw >> 21) & 0x1, 1);
+ uint32_t sel_dw = ac_ib_get(ib);
+ print_named_value(f, "DST_SEL", (sel_dw >> 16) & 0x3, 2);
+ print_named_value(f, "INT_SEL", (sel_dw >> 24) & 0x7, 3);
+ print_named_value(f, "DATA_SEL", sel_dw >> 29, 3);
+ print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
+ print_named_value(f, "DATA_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "DATA_HI", ac_ib_get(ib), 32);
+ print_named_value(f, "CTXID", ac_ib_get(ib), 32);
break;
+ }
case PKT3_WAIT_REG_MEM:
- print_named_value(f, "OP", ib[1], 32);
- print_named_value(f, "ADDRESS_LO", ib[2], 32);
- print_named_value(f, "ADDRESS_HI", ib[3], 32);
- print_named_value(f, "REF", ib[4], 32);
- print_named_value(f, "MASK", ib[5], 32);
- print_named_value(f, "POLL_INTERVAL", ib[6], 16);
+ print_named_value(f, "OP", ac_ib_get(ib), 32);
+ print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
+ print_named_value(f, "REF", ac_ib_get(ib), 32);
+ print_named_value(f, "MASK", ac_ib_get(ib), 32);
+ print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
break;
case PKT3_DRAW_INDEX_AUTO:
- ac_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[1], ~0);
- ac_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[2], ~0);
+ ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
break;
case PKT3_DRAW_INDEX_2:
- ac_dump_reg(f, R_028A78_VGT_DMA_MAX_SIZE, ib[1], ~0);
- ac_dump_reg(f, R_0287E8_VGT_DMA_BASE, ib[2], ~0);
- ac_dump_reg(f, R_0287E4_VGT_DMA_BASE_HI, ib[3], ~0);
- ac_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[4], ~0);
- ac_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[5], ~0);
+ ac_dump_reg(f, ib->chip_class, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
break;
case PKT3_INDEX_TYPE:
- ac_dump_reg(f, R_028A7C_VGT_DMA_INDEX_TYPE, ib[1], ~0);
+ ac_dump_reg(f, ib->chip_class, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
break;
case PKT3_NUM_INSTANCES:
- ac_dump_reg(f, R_030934_VGT_NUM_INSTANCES, ib[1], ~0);
+ ac_dump_reg(f, ib->chip_class, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
break;
case PKT3_WRITE_DATA:
- ac_dump_reg(f, R_370_CONTROL, ib[1], ~0);
- ac_dump_reg(f, R_371_DST_ADDR_LO, ib[2], ~0);
- ac_dump_reg(f, R_372_DST_ADDR_HI, ib[3], ~0);
- for (i = 2; i < count; i++) {
- print_spaces(f, INDENT_PKT);
- fprintf(f, "0x%08x\n", ib[2+i]);
- }
+ ac_dump_reg(f, ib->chip_class, R_370_CONTROL, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0);
+ /* The payload is written automatically */
break;
case PKT3_CP_DMA:
- ac_dump_reg(f, R_410_CP_DMA_WORD0, ib[1], ~0);
- ac_dump_reg(f, R_411_CP_DMA_WORD1, ib[2], ~0);
- ac_dump_reg(f, R_412_CP_DMA_WORD2, ib[3], ~0);
- ac_dump_reg(f, R_413_CP_DMA_WORD3, ib[4], ~0);
- ac_dump_reg(f, R_414_COMMAND, ib[5], ~0);
+ ac_dump_reg(f, ib->chip_class, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
break;
case PKT3_DMA_DATA:
- ac_dump_reg(f, R_500_DMA_DATA_WORD0, ib[1], ~0);
- ac_dump_reg(f, R_501_SRC_ADDR_LO, ib[2], ~0);
- ac_dump_reg(f, R_502_SRC_ADDR_HI, ib[3], ~0);
- ac_dump_reg(f, R_503_DST_ADDR_LO, ib[4], ~0);
- ac_dump_reg(f, R_504_DST_ADDR_HI, ib[5], ~0);
- ac_dump_reg(f, R_414_COMMAND, ib[6], ~0);
+ ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
break;
case PKT3_INDIRECT_BUFFER_SI:
case PKT3_INDIRECT_BUFFER_CONST:
- case PKT3_INDIRECT_BUFFER_CIK:
- ac_dump_reg(f, R_3F0_IB_BASE_LO, ib[1], ~0);
- ac_dump_reg(f, R_3F1_IB_BASE_HI, ib[2], ~0);
- ac_dump_reg(f, R_3F2_CONTROL, ib[3], ~0);
-
- if (addr_callback) {
- uint64_t addr = ((uint64_t)ib[2] << 32) | ib[1];
- void *data = addr_callback(addr_callback_data, addr);
- const char *name = G_3F2_CHAIN(ib[3]) ? "chained" : "nested";
-
- if (data)
- ac_parse_ib(f, data, G_3F2_IB_SIZE(ib[3]),
- trace_id, name, chip_class,
- addr_callback, addr_callback_data);
+ case PKT3_INDIRECT_BUFFER_CIK: {
+ uint32_t base_lo_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
+ uint32_t base_hi_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
+ uint32_t control_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_3F2_CONTROL, control_dw, ~0);
+
+ if (!ib->addr_callback)
+ break;
+
+ uint64_t addr = ((uint64_t)base_hi_dw << 32) | base_lo_dw;
+ void *data = ib->addr_callback(ib->addr_callback_data, addr);
+ if (!data)
+ break;
+
+ if (G_3F2_CHAIN(control_dw)) {
+ ib->ib = data;
+ ib->num_dw = G_3F2_IB_SIZE(control_dw);
+ ib->cur_dw = 0;
+ return;
+ }
+
+ struct ac_ib_parser ib_recurse;
+ memcpy(&ib_recurse, ib, sizeof(ib_recurse));
+ ib_recurse.ib = data;
+ ib_recurse.num_dw = G_3F2_IB_SIZE(control_dw);
+ ib_recurse.cur_dw = 0;
+ if(ib_recurse.trace_id_count) {
+ if (*current_trace_id == *ib->trace_ids) {
+ ++ib_recurse.trace_ids;
+ --ib_recurse.trace_id_count;
+ } else {
+ ib_recurse.trace_id_count = 0;
+ }
}
+
+ fprintf(f, "\n\035>------------------ nested begin ------------------\n");
+ ac_do_parse_ib(f, &ib_recurse);
+ fprintf(f, "\n\035<------------------- nested end -------------------\n");
break;
+ }
case PKT3_CLEAR_STATE:
case PKT3_INCREMENT_DE_COUNTER:
case PKT3_PFP_SYNC_ME:
break;
case PKT3_NOP:
- if (ib[0] == 0xffff1000) {
+ if (header == 0xffff1000) {
count = -1; /* One dword NOP. */
- break;
- } else if (count == 0 && AC_IS_TRACE_POINT(ib[1])) {
- unsigned packet_id = AC_GET_TRACE_POINT_ID(ib[1]);
+ } else if (count == 0 && ib->cur_dw < ib->num_dw &&
+ AC_IS_TRACE_POINT(ib->ib[ib->cur_dw])) {
+ unsigned packet_id = AC_GET_TRACE_POINT_ID(ib->ib[ib->cur_dw]);
print_spaces(f, INDENT_PKT);
fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
- if (trace_id == -1)
+ if (!ib->trace_id_count)
break; /* tracing was disabled */
+ *current_trace_id = packet_id;
+
print_spaces(f, INDENT_PKT);
- if (packet_id < trace_id)
+ if (packet_id < *ib->trace_ids)
fprintf(f, COLOR_RED
"This trace point was reached by the CP."
COLOR_RESET "\n");
- else if (packet_id == trace_id)
+ else if (packet_id == *ib->trace_ids)
fprintf(f, COLOR_RED
"!!!!! This is the last trace point that "
"was reached by the CP !!!!!"
COLOR_RESET "\n");
- else if (packet_id+1 == trace_id)
+ else if (packet_id+1 == *ib->trace_ids)
fprintf(f, COLOR_RED
"!!!!! This is the first trace point that "
"was NOT been reached by the CP !!!!!"
@@ -364,17 +467,127 @@
COLOR_RESET "\n");
break;
}
- /* fall through, print all dwords */
- default:
- for (i = 0; i < count+1; i++) {
- print_spaces(f, INDENT_PKT);
- fprintf(f, "0x%08x\n", ib[1+i]);
+ break;
+ }
+
+ /* print additional dwords */
+ while (ib->cur_dw <= first_dw + count)
+ ac_ib_get(ib);
+
+ if (ib->cur_dw > first_dw + count + 1)
+ fprintf(f, COLOR_RED "\n!!!!! count in header too low !!!!!"
+ COLOR_RESET "\n");
+}
+
+/**
+ * Parse and print an IB into a file.
+ */
+static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib)
+{
+ int current_trace_id = -1;
+
+ while (ib->cur_dw < ib->num_dw) {
+ uint32_t header = ac_ib_get(ib);
+ unsigned type = PKT_TYPE_G(header);
+
+ switch (type) {
+ case 3:
+ ac_parse_packet3(f, header, ib, ¤t_trace_id);
+ break;
+ case 2:
+ /* type-2 nop */
+ if (header == 0x80000000) {
+ fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n");
+ break;
+ }
+ /* fall through */
+ default:
+ fprintf(f, "Unknown packet type %i\n", type);
+ break;
}
}
+}
- ib += count + 2;
- *num_dw -= count + 2;
- return ib;
+static void format_ib_output(FILE *f, char *out)
+{
+ unsigned depth = 0;
+
+ for (;;) {
+ char op = 0;
+
+ if (out[0] == '\n' && out[1] == '\035')
+ out++;
+ if (out[0] == '\035') {
+ op = out[1];
+ out += 2;
+ }
+
+ if (op == '<')
+ depth--;
+
+ unsigned indent = 4 * depth;
+ if (op != '#')
+ indent += 9;
+
+ if (indent)
+ print_spaces(f, indent);
+
+ char *end = util_strchrnul(out, '\n');
+ fwrite(out, end - out, 1, f);
+ fputc('\n', f); /* always end with a new line */
+ if (!*end)
+ break;
+
+ out = end + 1;
+
+ if (op == '>')
+ depth++;
+ }
+}
+
+/**
+ * Parse and print an IB into a file.
+ *
+ * \param f file
+ * \param ib_ptr IB
+ * \param num_dw size of the IB
+ * \param chip_class chip class
+ * \param trace_ids the last trace IDs that are known to have been reached
+ * and executed by the CP, typically read from a buffer
+ * \param trace_id_count The number of entries in the trace_ids array.
+ * \param addr_callback Get a mapped pointer of the IB at a given address. Can
+ * be NULL.
+ * \param addr_callback_data user data for addr_callback
+ */
+void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_ids,
+ unsigned trace_id_count, enum chip_class chip_class,
+ ac_debug_addr_callback addr_callback, void *addr_callback_data)
+{
+ struct ac_ib_parser ib = {};
+ ib.ib = ib_ptr;
+ ib.num_dw = num_dw;
+ ib.trace_ids = trace_ids;
+ ib.trace_id_count = trace_id_count;
+ ib.chip_class = chip_class;
+ ib.addr_callback = addr_callback;
+ ib.addr_callback_data = addr_callback_data;
+
+ char *out;
+ size_t outsize;
+ FILE *memf = open_memstream(&out, &outsize);
+ ib.f = memf;
+ ac_do_parse_ib(memf, &ib);
+ fclose(memf);
+
+ if (out) {
+ format_ib_output(f, out);
+ free(out);
+ }
+
+ if (ib.cur_dw > ib.num_dw) {
+ printf("\nPacket ends after the end of IB.\n");
+ exit(1);
+ }
}
/**
@@ -384,46 +597,203 @@
* \param ib IB
* \param num_dw size of the IB
* \param chip_class chip class
- * \param trace_id the last trace ID that is known to have been reached
+ * \param trace_ids the last trace IDs that are known to have been reached
* and executed by the CP, typically read from a buffer
+ * \param trace_id_count The number of entries in the trace_ids array.
* \param addr_callback Get a mapped pointer of the IB at a given address. Can
* be NULL.
* \param addr_callback_data user data for addr_callback
*/
-void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id,
- const char *name, enum chip_class chip_class,
- ac_debug_addr_callback addr_callback, void *addr_callback_data)
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
+ unsigned trace_id_count, const char *name,
+ enum chip_class chip_class, ac_debug_addr_callback addr_callback,
+ void *addr_callback_data)
{
fprintf(f, "------------------ %s begin ------------------\n", name);
- while (num_dw > 0) {
- unsigned type = PKT_TYPE_G(ib[0]);
+ ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count,
+ chip_class, addr_callback, addr_callback_data);
- switch (type) {
- case 3:
- ib = ac_parse_packet3(f, ib, &num_dw, trace_id,
- chip_class, addr_callback,
- addr_callback_data);
+ fprintf(f, "------------------- %s end -------------------\n\n", name);
+}
+
+/**
+ * Parse dmesg and return TRUE if a VM fault has been detected.
+ *
+ * \param chip_class chip class
+ * \param old_dmesg_timestamp previous dmesg timestamp parsed at init time
+ * \param out_addr detected VM fault addr
+ */
+bool ac_vm_fault_occured(enum chip_class chip_class,
+ uint64_t *old_dmesg_timestamp, uint64_t *out_addr)
+{
+ char line[2000];
+ unsigned sec, usec;
+ int progress = 0;
+ uint64_t dmesg_timestamp = 0;
+ bool fault = false;
+
+ FILE *p = popen("dmesg", "r");
+ if (!p)
+ return false;
+
+ while (fgets(line, sizeof(line), p)) {
+ char *msg, len;
+
+ if (!line[0] || line[0] == '\n')
+ continue;
+
+ /* Get the timestamp. */
+ if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
+ static bool hit = false;
+ if (!hit) {
+ fprintf(stderr, "%s: failed to parse line '%s'\n",
+ __func__, line);
+ hit = true;
+ }
+ continue;
+ }
+ dmesg_timestamp = sec * 1000000ull + usec;
+
+ /* If just updating the timestamp. */
+ if (!out_addr)
+ continue;
+
+ /* Process messages only if the timestamp is newer. */
+ if (dmesg_timestamp <= *old_dmesg_timestamp)
+ continue;
+
+ /* Only process the first VM fault. */
+ if (fault)
+ continue;
+
+ /* Remove trailing \n */
+ len = strlen(line);
+ if (len && line[len-1] == '\n')
+ line[len-1] = 0;
+
+ /* Get the message part. */
+ msg = strchr(line, ']');
+ if (!msg)
+ continue;
+ msg++;
+
+ const char *header_line, *addr_line_prefix, *addr_line_format;
+
+ if (chip_class >= GFX9) {
+ /* Match this:
+ * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
+ * ..: at page 0x0000000219f8f000 from 27
+ * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
+ */
+ header_line = "VMC page fault";
+ addr_line_prefix = " at page";
+ addr_line_format = "%"PRIx64;
+ } else {
+ header_line = "GPU fault detected:";
+ addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
+ addr_line_format = "%"PRIX64;
+ }
+
+ switch (progress) {
+ case 0:
+ if (strstr(msg, header_line))
+ progress = 1;
break;
- case 2:
- /* type-2 nop */
- if (ib[0] == 0x80000000) {
- fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n");
- ib++;
- num_dw--;
- break;
+ case 1:
+ msg = strstr(msg, addr_line_prefix);
+ if (msg) {
+ msg = strstr(msg, "0x");
+ if (msg) {
+ msg += 2;
+ if (sscanf(msg, addr_line_format, out_addr) == 1)
+ fault = true;
+ }
}
- /* fall through */
+ progress = 0;
+ break;
default:
- fprintf(f, "Unknown packet type %i\n", type);
- return;
+ progress = 0;
}
}
+ pclose(p);
+
+ if (dmesg_timestamp > *old_dmesg_timestamp)
+ *old_dmesg_timestamp = dmesg_timestamp;
+
+ return fault;
+}
+
+static int compare_wave(const void *p1, const void *p2)
+{
+ struct ac_wave_info *w1 = (struct ac_wave_info *)p1;
+ struct ac_wave_info *w2 = (struct ac_wave_info *)p2;
+
+ /* Sort waves according to PC and then SE, SH, CU, etc. */
+ if (w1->pc < w2->pc)
+ return -1;
+ if (w1->pc > w2->pc)
+ return 1;
+ if (w1->se < w2->se)
+ return -1;
+ if (w1->se > w2->se)
+ return 1;
+ if (w1->sh < w2->sh)
+ return -1;
+ if (w1->sh > w2->sh)
+ return 1;
+ if (w1->cu < w2->cu)
+ return -1;
+ if (w1->cu > w2->cu)
+ return 1;
+ if (w1->simd < w2->simd)
+ return -1;
+ if (w1->simd > w2->simd)
+ return 1;
+ if (w1->wave < w2->wave)
+ return -1;
+ if (w1->wave > w2->wave)
+ return 1;
+
+ return 0;
+}
+
+/* Return wave information. "waves" should be a large enough array. */
+unsigned ac_get_wave_info(struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP])
+{
+ char line[2000];
+ unsigned num_waves = 0;
+
+ FILE *p = popen("umr -wa", "r");
+ if (!p)
+ return 0;
+
+ if (!fgets(line, sizeof(line), p) ||
+ strncmp(line, "SE", 2) != 0) {
+ pclose(p);
+ return 0;
+ }
- fprintf(f, "------------------- %s end -------------------\n", name);
- if (num_dw < 0) {
- printf("Packet ends after the end of IB.\n");
- exit(0);
+ while (fgets(line, sizeof(line), p)) {
+ struct ac_wave_info *w;
+ uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
+
+ assert(num_waves < AC_MAX_WAVES_PER_CHIP);
+ w = &waves[num_waves];
+
+ if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
+ &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
+ &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
+ &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
+ w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
+ w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
+ w->matched = false;
+ num_waves++;
+ }
}
- fprintf(f, "\n");
+
+ qsort(waves, num_waves, sizeof(struct ac_wave_info), compare_wave);
+
+ pclose(p);
+ return num_waves;
}
diff -Nru mesa-17.2.4/src/amd/common/ac_debug.h mesa-17.3.3/src/amd/common/ac_debug.h
--- mesa-17.2.4/src/amd/common/ac_debug.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_debug.h 2018-01-18 21:30:28.000000000 +0000
@@ -28,6 +28,7 @@
#include
#include
+#include
#include "amd_family.h"
@@ -35,12 +36,36 @@
#define AC_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000)
#define AC_GET_TRACE_POINT_ID(x) ((x) & 0xffff)
+#define AC_MAX_WAVES_PER_CHIP (64 * 40)
+
+struct ac_wave_info {
+ unsigned se; /* shader engine */
+ unsigned sh; /* shader array */
+ unsigned cu; /* compute unit */
+ unsigned simd;
+ unsigned wave;
+ uint32_t status;
+ uint64_t pc; /* program counter */
+ uint32_t inst_dw0;
+ uint32_t inst_dw1;
+ uint64_t exec;
+ bool matched; /* whether the wave is used by a currently-bound shader */
+};
+
typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);
-void ac_dump_reg(FILE *file, unsigned offset, uint32_t value,
- uint32_t field_mask);
-void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id,
- const char *name, enum chip_class chip_class,
+void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset,
+ uint32_t value, uint32_t field_mask);
+void ac_parse_ib_chunk(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
+ unsigned trace_id_count, enum chip_class chip_class,
+ ac_debug_addr_callback addr_callback, void *addr_callback_data);
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
+ unsigned trace_id_count, const char *name, enum chip_class chip_class,
ac_debug_addr_callback addr_callback, void *addr_callback_data);
+bool ac_vm_fault_occured(enum chip_class chip_class,
+ uint64_t *old_dmesg_timestamp, uint64_t *out_addr);
+
+unsigned ac_get_wave_info(struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);
+
#endif
diff -Nru mesa-17.2.4/src/amd/common/ac_gpu_info.c mesa-17.3.3/src/amd/common/ac_gpu_info.c
--- mesa-17.2.4/src/amd/common/ac_gpu_info.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_gpu_info.c 2018-01-18 21:30:28.000000000 +0000
@@ -100,7 +100,6 @@
struct amdgpu_heap_info vram, vram_vis, gtt;
struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
- uint32_t unused_feature;
int r, i, j;
drmDevicePtr devinfo;
@@ -176,21 +175,24 @@
}
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0,
- &info->me_fw_version, &unused_feature);
+ &info->me_fw_version,
+ &info->me_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(me) failed.\n");
return false;
}
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0,
- &info->pfp_fw_version, &unused_feature);
+ &info->pfp_fw_version,
+ &info->pfp_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(pfp) failed.\n");
return false;
}
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_CE, 0, 0,
- &info->ce_fw_version, &unused_feature);
+ &info->ce_fw_version,
+ &info->ce_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(ce) failed.\n");
return false;
@@ -267,6 +269,8 @@
vce.available_rings ? vce_version : 0;
info->has_userptr = true;
info->has_syncobj = has_syncobj(fd);
+ info->has_sync_file = info->has_syncobj && info->drm_minor >= 21;
+ info->has_ctx_priority = info->drm_minor >= 22;
info->num_render_backends = amdinfo->rb_pipes;
info->clock_crystal_freq = amdinfo->gpu_counter_freq;
if (!info->clock_crystal_freq) {
@@ -314,3 +318,30 @@
return true;
}
+void ac_compute_driver_uuid(char *uuid, size_t size)
+{
+ char amd_uuid[] = "AMD-MESA-DRV";
+
+ assert(size >= sizeof(amd_uuid));
+
+ memset(uuid, 0, size);
+ strncpy(uuid, amd_uuid, size);
+}
+
+void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size)
+{
+ uint32_t *uint_uuid = (uint32_t*)uuid;
+
+ assert(size >= sizeof(uint32_t)*4);
+
+ /**
+ * Use the device info directly instead of using a sha1. GL/VK UUIDs
+ * are 16 byte vs 20 byte for sha1, and the truncation that would be
+ * required would get rid of part of the little entropy we have.
+ * */
+ memset(uuid, 0, size);
+ uint_uuid[0] = info->pci_domain;
+ uint_uuid[1] = info->pci_bus;
+ uint_uuid[2] = info->pci_dev;
+ uint_uuid[3] = info->pci_func;
+}
diff -Nru mesa-17.2.4/src/amd/common/ac_gpu_info.h mesa-17.3.3/src/amd/common/ac_gpu_info.h
--- mesa-17.2.4/src/amd/common/ac_gpu_info.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_gpu_info.h 2018-01-18 21:30:28.000000000 +0000
@@ -26,6 +26,7 @@
#ifndef AC_GPU_INFO_H
#define AC_GPU_INFO_H
+#include
#include
#include
#include "amd_family.h"
@@ -65,8 +66,11 @@
uint32_t uvd_fw_version;
uint32_t vce_fw_version;
uint32_t me_fw_version;
+ uint32_t me_fw_feature;
uint32_t pfp_fw_version;
+ uint32_t pfp_fw_feature;
uint32_t ce_fw_version;
+ uint32_t ce_fw_feature;
uint32_t vce_harvest_config;
uint32_t clock_crystal_freq;
uint32_t tcc_cache_line_size;
@@ -77,6 +81,8 @@
uint32_t drm_patchlevel;
bool has_userptr;
bool has_syncobj;
+ bool has_sync_file;
+ bool has_ctx_priority;
/* Shader cores. */
uint32_t r600_max_quad_pipes; /* wave size / 16 */
@@ -106,6 +112,10 @@
struct radeon_info *info,
struct amdgpu_gpu_info *amdinfo);
+void ac_compute_driver_uuid(char *uuid, size_t size);
+
+void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size);
+
#ifdef __cplusplus
}
#endif
diff -Nru mesa-17.2.4/src/amd/common/ac_llvm_build.c mesa-17.3.3/src/amd/common/ac_llvm_build.c
--- mesa-17.2.4/src/amd/common/ac_llvm_build.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_llvm_build.c 2018-01-18 21:30:28.000000000 +0000
@@ -36,6 +36,7 @@
#include "ac_exp_param.h"
#include "util/bitscan.h"
#include "util/macros.h"
+#include "util/u_atomic.h"
#include "sid.h"
#include "shader_enums.h"
@@ -91,6 +92,92 @@
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
}
+unsigned
+ac_get_type_size(LLVMTypeRef type)
+{
+ LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+ switch (kind) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(type) / 8;
+ case LLVMFloatTypeKind:
+ return 4;
+ case LLVMDoubleTypeKind:
+ case LLVMPointerTypeKind:
+ return 8;
+ case LLVMVectorTypeKind:
+ return LLVMGetVectorSize(type) *
+ ac_get_type_size(LLVMGetElementType(type));
+ case LLVMArrayTypeKind:
+ return LLVMGetArrayLength(type) *
+ ac_get_type_size(LLVMGetElementType(type));
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->f16 || t == ctx->i16)
+ return ctx->i16;
+ else if (t == ctx->f32 || t == ctx->i32)
+ return ctx->i32;
+ else if (t == ctx->f64 || t == ctx->i64)
+ return ctx->i64;
+ else
+ unreachable("Unhandled integer size");
+}
+
+LLVMTypeRef
+ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
+ LLVMGetVectorSize(t));
+ }
+ return to_integer_type_scalar(ctx, t);
+}
+
+LLVMValueRef
+ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
+}
+
+static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->i16 || t == ctx->f16)
+ return ctx->f16;
+ else if (t == ctx->i32 || t == ctx->f32)
+ return ctx->f32;
+ else if (t == ctx->i64 || t == ctx->f64)
+ return ctx->f64;
+ else
+ unreachable("Unhandled float size");
+}
+
+LLVMTypeRef
+ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
+ LLVMGetVectorSize(t));
+ }
+ return to_float_type_scalar(ctx, t);
+}
+
+LLVMValueRef
+ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
+}
+
+
LLVMValueRef
ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
LLVMTypeRef return_type, LLVMValueRef *params,
@@ -128,20 +215,6 @@
return call;
}
-static LLVMValueRef bitcast_to_float(struct ac_llvm_context *ctx,
- LLVMValueRef value)
-{
- LLVMTypeRef type = LLVMTypeOf(value);
- LLVMTypeRef new_type;
-
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
- new_type = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
- else
- new_type = ctx->f32;
-
- return LLVMBuildBitCast(ctx->builder, value, new_type, "");
-}
-
/**
* Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
* intrinsic names).
@@ -193,18 +266,117 @@
return phi;
}
+/* Prevent optimizations (at least of memory accesses) across the current
+ * point in the program by emitting empty inline assembly that is marked as
+ * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
+ */
+void
+ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef *pvgpr)
+{
+ static int counter = 0;
+
+ LLVMBuilderRef builder = ctx->builder;
+ char code[16];
+
+ snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
+
+ if (!pvgpr) {
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
+ LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+ } else {
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
+ LLVMValueRef vgpr = *pvgpr;
+ LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+ unsigned vgpr_size = ac_get_type_size(vgpr_type);
+ LLVMValueRef vgpr0;
+
+ assert(vgpr_size % 4 == 0);
+
+ vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
+ vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+ vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
+ vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
+ vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+ *pvgpr = vgpr;
+ }
+}
+
+LLVMValueRef
+ac_build_ballot(struct ac_llvm_context *ctx,
+ LLVMValueRef value)
+{
+ LLVMValueRef args[3] = {
+ value,
+ ctx->i32_0,
+ LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+ };
+
+ /* We currently have no other way to prevent LLVM from lifting the icmp
+ * calls to a dominating basic block.
+ */
+ ac_build_optimization_barrier(ctx, &args[0]);
+
+ if (LLVMTypeOf(args[0]) != ctx->i32)
+ args[0] = LLVMBuildBitCast(ctx->builder, args[0], ctx->i32, "");
+
+ return ac_build_intrinsic(ctx,
+ "llvm.amdgcn.icmp.i32",
+ ctx->i64, args, 3,
+ AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef
+ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
+}
+
+LLVMValueRef
+ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
+ LLVMConstInt(ctx->i64, 0, 0), "");
+}
+
+LLVMValueRef
+ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+
+ LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ vote_set, active_set, "");
+ LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ vote_set,
+ LLVMConstInt(ctx->i64, 0, 0), "");
+ return LLVMBuildOr(ctx->builder, all, none, "");
+}
+
LLVMValueRef
ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
unsigned value_stride,
- bool load)
+ bool load,
+ bool always_vector)
{
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef vec = NULL;
unsigned i;
- if (value_count == 1) {
+ if (value_count == 1 && !always_vector) {
if (load)
return LLVMBuildLoad(builder, values[0], "");
return values[0];
@@ -229,7 +401,7 @@
LLVMValueRef *values,
unsigned value_count)
{
- return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
+ return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
}
LLVMValueRef
@@ -280,12 +452,13 @@
* selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
* the selcoords major axis.
*/
-static void build_cube_select(LLVMBuilderRef builder,
+static void build_cube_select(struct ac_llvm_context *ctx,
const struct cube_selection_coords *selcoords,
const LLVMValueRef *coords,
LLVMValueRef *out_st,
LLVMValueRef *out_ma)
{
+ LLVMBuilderRef builder = ctx->builder;
LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
LLVMValueRef is_ma_positive;
LLVMValueRef sgn_ma;
@@ -322,9 +495,9 @@
/* Select ma */
tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
- sgn = LLVMBuildSelect(builder, is_ma_positive,
- LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
- *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+ tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
+ ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
+ *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
}
void
@@ -412,7 +585,7 @@
* seems awfully quiet about how textureGrad for cube
* maps should be handled.
*/
- build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+ build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
deriv_st, &deriv_ma);
deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
@@ -544,32 +717,40 @@
* \param base_ptr Where the array starts.
* \param index The element index into the array.
* \param uniform Whether the base_ptr and index can be assumed to be
- * dynamically uniform
+ * dynamically uniform (i.e. load to an SGPR)
+ * \param invariant Whether the load is invariant (no other opcodes affect it)
*/
-LLVMValueRef
-ac_build_indexed_load(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index,
- bool uniform)
+static LLVMValueRef
+ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index, bool uniform, bool invariant)
{
- LLVMValueRef pointer;
+ LLVMValueRef pointer, result;
pointer = ac_build_gep0(ctx, base_ptr, index);
if (uniform)
LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
- return LLVMBuildLoad(ctx->builder, pointer, "");
+ result = LLVMBuildLoad(ctx->builder, pointer, "");
+ if (invariant)
+ LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+ return result;
}
-/**
- * Do a load from &base_ptr[index], but also add a flag that it's loading
- * a constant from a dynamically uniform index.
- */
-LLVMValueRef
-ac_build_indexed_load_const(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index)
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index)
{
- LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
- LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
- return result;
+ return ac_build_load_custom(ctx, base_ptr, index, false, false);
+}
+
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ return ac_build_load_custom(ctx, base_ptr, index, false, true);
+}
+
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ return ac_build_load_custom(ctx, base_ptr, index, true, true);
}
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
@@ -587,10 +768,13 @@
bool glc,
bool slc,
bool writeonly_memory,
- bool has_add_tid)
+ bool swizzle_enable_hint)
{
- /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
- if (!has_add_tid) {
+ /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
+ * (voffset is swizzled, but soffset isn't swizzled).
+ * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
+ */
+ if (!swizzle_enable_hint) {
/* Split 3 channel stores, becase LLVM doesn't support 3-channel
* intrinsics. */
if (num_channels == 3) {
@@ -604,11 +788,11 @@
ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
soffset, inst_offset, glc, slc,
- writeonly_memory, has_add_tid);
+ writeonly_memory, swizzle_enable_hint);
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
soffset, inst_offset + 8,
glc, slc,
- writeonly_memory, has_add_tid);
+ writeonly_memory, swizzle_enable_hint);
return;
}
@@ -624,7 +808,7 @@
offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
LLVMValueRef args[] = {
- bitcast_to_float(ctx, vdata),
+ ac_to_float(ctx, vdata),
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
LLVMConstInt(ctx->i32, 0, 0),
offset,
@@ -841,7 +1025,6 @@
*/
LLVMValueRef
ac_build_ddxy(struct ac_llvm_context *ctx,
- bool has_ds_bpermute,
uint32_t mask,
int idx,
LLVMValueRef val)
@@ -849,7 +1032,7 @@
LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
- if (has_ds_bpermute) {
+ if (ctx->chip_class >= VI) {
LLVMValueRef thread_id, tl_tid, trbl_tid;
thread_id = ac_get_thread_id(ctx);
@@ -876,7 +1059,7 @@
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
} else {
- uint32_t masks[2];
+ uint32_t masks[2] = {};
switch (mask) {
case AC_TID_MASK_TOP_LEFT:
@@ -895,6 +1078,8 @@
masks[0] = 0x80a0;
masks[1] = 0x80f5;
break;
+ default:
+ assert(0);
}
args[0] = val;
@@ -983,6 +1168,13 @@
LLVMConstInt(ctx->i32, -1, true), msb, "");
}
+LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
{
if (HAVE_LLVM >= 0x0500) {
@@ -1074,7 +1266,7 @@
a->opcode == ac_image_get_lod;
if (sample)
- args[num_args++] = bitcast_to_float(ctx, a->addr);
+ args[num_args++] = ac_to_float(ctx, a->addr);
else
args[num_args++] = a->addr;
@@ -1542,3 +1734,11 @@
*num_param_exports = exports.num;
}
}
+
+void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
+{
+ LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
+ ac_build_intrinsic(ctx,
+ "llvm.amdgcn.init.exec", ctx->voidt,
+ &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
+}
diff -Nru mesa-17.2.4/src/amd/common/ac_llvm_build.h mesa-17.3.3/src/amd/common/ac_llvm_build.h
--- mesa-17.2.4/src/amd/common/ac_llvm_build.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_llvm_build.h 2018-01-18 21:30:28.000000000 +0000
@@ -71,6 +71,13 @@
ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
enum chip_class chip_class);
+unsigned ac_get_type_size(LLVMTypeRef type);
+
+LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
+LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
+LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
+
LLVMValueRef
ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
LLVMTypeRef return_type, LLVMValueRef *params,
@@ -83,12 +90,24 @@
unsigned count_incoming, LLVMValueRef *values,
LLVMBasicBlockRef *blocks);
+void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef *pvgpr);
+
+LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
+
+LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
+
+LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value);
+
+LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
+
LLVMValueRef
ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
unsigned value_stride,
- bool load);
+ bool load,
+ bool always_vector);
LLVMValueRef
ac_build_gather_values(struct ac_llvm_context *ctx,
LLVMValueRef *values,
@@ -131,14 +150,12 @@
LLVMValueRef base_ptr, LLVMValueRef index,
LLVMValueRef value);
-LLVMValueRef
-ac_build_indexed_load(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index,
- bool uniform);
-
-LLVMValueRef
-ac_build_indexed_load_const(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index);
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index);
void
ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
@@ -151,7 +168,7 @@
bool glc,
bool slc,
bool writeonly_memory,
- bool has_add_tid);
+ bool swizzle_enable_hint);
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
@@ -180,7 +197,6 @@
LLVMValueRef
ac_build_ddxy(struct ac_llvm_context *ctx,
- bool has_ds_bpermute,
uint32_t mask,
int idx,
LLVMValueRef val);
@@ -205,6 +221,7 @@
LLVMValueRef arg,
LLVMTypeRef dst_type);
+LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
struct ac_export_args {
@@ -264,6 +281,7 @@
uint8_t *vs_output_param_offset,
uint32_t num_outputs,
uint8_t *num_param_exports);
+void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
#ifdef __cplusplus
}
#endif
diff -Nru mesa-17.2.4/src/amd/common/ac_nir_to_llvm.c mesa-17.3.3/src/amd/common/ac_nir_to_llvm.c
--- mesa-17.2.4/src/amd/common/ac_nir_to_llvm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_nir_to_llvm.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,6 +30,7 @@
#include "../vulkan/radv_descriptor_set.h"
#include "util/bitscan.h"
#include
+#include "ac_shader_abi.h"
#include "ac_shader_info.h"
#include "ac_exp_param.h"
@@ -38,6 +39,7 @@
RADEON_LLVM_AMDGPU_GS = 88,
RADEON_LLVM_AMDGPU_PS = 89,
RADEON_LLVM_AMDGPU_CS = 90,
+ RADEON_LLVM_AMDGPU_HS = 93,
};
#define CONST_ADDR_SPACE 2
@@ -46,17 +48,37 @@
#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
#define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
-enum desc_type {
- DESC_IMAGE,
- DESC_FMASK,
- DESC_SAMPLER,
- DESC_BUFFER,
+struct nir_to_llvm_context;
+
+struct ac_nir_context {
+ struct ac_llvm_context ac;
+ struct ac_shader_abi *abi;
+
+ gl_shader_stage stage;
+
+ struct hash_table *defs;
+ struct hash_table *phis;
+ struct hash_table *vars;
+
+ LLVMValueRef main_function;
+ LLVMBasicBlockRef continue_block;
+ LLVMBasicBlockRef break_block;
+
+ LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
+
+ int num_locals;
+ LLVMValueRef *locals;
+
+ struct nir_to_llvm_context *nctx; /* TODO get rid of this */
};
struct nir_to_llvm_context {
struct ac_llvm_context ac;
const struct ac_nir_compiler_options *options;
struct ac_shader_variant_info *shader_info;
+ struct ac_shader_abi abi;
+ struct ac_nir_context *nir;
+
unsigned max_workgroup_size;
LLVMContextRef context;
LLVMModuleRef module;
@@ -65,24 +87,19 @@
struct hash_table *defs;
struct hash_table *phis;
- struct hash_table *vars;
LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
LLVMValueRef ring_offsets;
LLVMValueRef push_constants;
+ LLVMValueRef view_index;
LLVMValueRef num_work_groups;
LLVMValueRef workgroup_ids;
LLVMValueRef local_invocation_ids;
LLVMValueRef tg_size;
LLVMValueRef vertex_buffers;
- LLVMValueRef base_vertex;
- LLVMValueRef start_instance;
- LLVMValueRef draw_index;
- LLVMValueRef vertex_id;
LLVMValueRef rel_auto_id;
LLVMValueRef vs_prim_id;
- LLVMValueRef instance_id;
LLVMValueRef ls_out_layout;
LLVMValueRef es2gs_offset;
@@ -91,6 +108,7 @@
LLVMValueRef tcs_out_layout;
LLVMValueRef tcs_in_layout;
LLVMValueRef oc_lds;
+ LLVMValueRef merged_wave_info;
LLVMValueRef tess_factor_offset;
LLVMValueRef tcs_patch_id;
LLVMValueRef tcs_rel_ids;
@@ -115,13 +133,6 @@
LLVMValueRef sample_pos_offset;
LLVMValueRef persp_sample, persp_center, persp_centroid;
LLVMValueRef linear_sample, linear_center, linear_centroid;
- LLVMValueRef front_face;
- LLVMValueRef ancillary;
- LLVMValueRef sample_coverage;
- LLVMValueRef frag_pos[4];
-
- LLVMBasicBlockRef continue_block;
- LLVMBasicBlockRef break_block;
LLVMTypeRef i1;
LLVMTypeRef i8;
@@ -153,17 +164,12 @@
LLVMValueRef lds;
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
- LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
uint64_t input_mask;
uint64_t output_mask;
- int num_locals;
- LLVMValueRef *locals;
uint8_t num_output_clips;
uint8_t num_output_culls;
- bool has_ds_bpermute;
-
bool is_gs_copy_shader;
LLVMValueRef gs_next_vertex;
unsigned gs_max_out_vertices;
@@ -173,9 +179,18 @@
uint64_t tess_patch_outputs_written;
};
-static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
+static inline struct nir_to_llvm_context *
+nir_to_llvm_context_from_abi(struct ac_shader_abi *abi)
+{
+ struct nir_to_llvm_context *ctx = NULL;
+ return container_of(abi, ctx, abi);
+}
+
+static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
const nir_deref_var *deref,
- enum desc_type desc_type);
+ enum ac_descriptor_type desc_type,
+ bool image, bool write);
+
static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
{
return (index * 4) + chan;
@@ -203,26 +218,6 @@
unreachable("illegal slot in get unique index\n");
}
-static unsigned llvm_get_type_size(LLVMTypeRef type)
-{
- LLVMTypeKind kind = LLVMGetTypeKind(type);
-
- switch (kind) {
- case LLVMIntegerTypeKind:
- return LLVMGetIntTypeWidth(type) / 8;
- case LLVMFloatTypeKind:
- return 4;
- case LLVMPointerTypeKind:
- return 8;
- case LLVMVectorTypeKind:
- return LLVMGetVectorSize(type) *
- llvm_get_type_size(LLVMGetElementType(type));
- default:
- assert(0);
- return 0;
- }
-}
-
static void set_llvm_calling_convention(LLVMValueRef func,
gl_shader_stage stage)
{
@@ -230,13 +225,15 @@
switch (stage) {
case MESA_SHADER_VERTEX:
- case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
calling_conv = RADEON_LLVM_AMDGPU_VS;
break;
case MESA_SHADER_GEOMETRY:
calling_conv = RADEON_LLVM_AMDGPU_GS;
break;
+ case MESA_SHADER_TESS_CTRL:
+ calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS;
+ break;
case MESA_SHADER_FRAGMENT:
calling_conv = RADEON_LLVM_AMDGPU_PS;
break;
@@ -278,7 +275,7 @@
LLVMTypeRef type, LLVMValueRef *param_ptr)
{
add_argument(info, type, param_ptr);
- info->num_sgprs_used += llvm_get_type_size(type) / 4;
+ info->num_sgprs_used += ac_get_type_size(type) / 4;
info->sgpr_count++;
}
@@ -288,7 +285,7 @@
LLVMValueRef *param_ptr)
{
add_sgpr_argument(info, type, param_ptr);
- info->num_user_sgprs_used += llvm_get_type_size(type) / 4;
+ info->num_user_sgprs_used += ac_get_type_size(type) / 4;
info->user_sgpr_count++;
}
@@ -298,7 +295,7 @@
LLVMValueRef *param_ptr)
{
add_argument(info, type, param_ptr);
- info->num_vgprs_used += llvm_get_type_size(type) / 4;
+ info->num_vgprs_used += ac_get_type_size(type) / 4;
}
static inline void
@@ -387,62 +384,6 @@
CONST_ADDR_SPACE);
}
-static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (t == ctx->f16 || t == ctx->i16)
- return ctx->i16;
- else if (t == ctx->f32 || t == ctx->i32)
- return ctx->i32;
- else if (t == ctx->f64 || t == ctx->i64)
- return ctx->i64;
- else
- unreachable("Unhandled integer size");
-}
-
-static LLVMTypeRef to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
- LLVMTypeRef elem_type = LLVMGetElementType(t);
- return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
- LLVMGetVectorSize(t));
- }
- return to_integer_type_scalar(ctx, t);
-}
-
-static LLVMValueRef to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
- LLVMTypeRef type = LLVMTypeOf(v);
- return LLVMBuildBitCast(ctx->builder, v, to_integer_type(ctx, type), "");
-}
-
-static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (t == ctx->i16 || t == ctx->f16)
- return ctx->f16;
- else if (t == ctx->i32 || t == ctx->f32)
- return ctx->f32;
- else if (t == ctx->i64 || t == ctx->f64)
- return ctx->f64;
- else
- unreachable("Unhandled float size");
-}
-
-static LLVMTypeRef to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
- LLVMTypeRef elem_type = LLVMGetElementType(t);
- return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
- LLVMGetVectorSize(t));
- }
- return to_float_type_scalar(ctx, t);
-}
-
-static LLVMValueRef to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
- LLVMTypeRef type = LLVMTypeOf(v);
- return LLVMBuildBitCast(ctx->builder, v, to_float_type(ctx, type), "");
-}
-
static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
{
if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
@@ -461,7 +402,7 @@
unreachable("Unhandled type kind in get_elem_bits");
}
-static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
+static LLVMValueRef unpack_param(struct ac_llvm_context *ctx,
LLVMValueRef param, unsigned rshift,
unsigned bitwidth)
{
@@ -482,7 +423,7 @@
{
switch (ctx->stage) {
case MESA_SHADER_TESS_CTRL:
- return unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
+ return unpack_param(&ctx->ac, ctx->tcs_rel_ids, 0, 8);
case MESA_SHADER_TESS_EVAL:
return ctx->tes_rel_patch_id;
break;
@@ -515,9 +456,9 @@
get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx)
{
if (ctx->stage == MESA_SHADER_VERTEX)
- return unpack_param(ctx, ctx->ls_out_layout, 0, 13);
+ return unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13);
else if (ctx->stage == MESA_SHADER_TESS_CTRL)
- return unpack_param(ctx, ctx->tcs_in_layout, 0, 13);
+ return unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13);
else {
assert(0);
return NULL;
@@ -527,14 +468,14 @@
static LLVMValueRef
get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx)
{
- return unpack_param(ctx, ctx->tcs_out_layout, 0, 13);
+ return unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13);
}
static LLVMValueRef
get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx)
{
return LLVMBuildMul(ctx->builder,
- unpack_param(ctx, ctx->tcs_out_offsets, 0, 16),
+ unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16),
LLVMConstInt(ctx->i32, 4, false), "");
}
@@ -542,7 +483,7 @@
get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx)
{
return LLVMBuildMul(ctx->builder,
- unpack_param(ctx, ctx->tcs_out_offsets, 16, 16),
+ unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16),
LLVMConstInt(ctx->i32, 4, false), "");
}
@@ -687,36 +628,133 @@
}
}
-static void create_function(struct nir_to_llvm_context *ctx)
+static void
+radv_define_common_user_sgprs_phase1(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ const struct user_sgpr_info *user_sgpr_info,
+ struct arg_info *args,
+ LLVMValueRef *desc_sets)
{
unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
- uint8_t user_sgpr_idx;
- struct user_sgpr_info user_sgpr_info;
- struct arg_info args = {};
- LLVMValueRef desc_sets;
-
- allocate_user_sgprs(ctx, &user_sgpr_info);
- if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
- add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->ring_offsets); /* address of rings */
- }
+ unsigned stage_mask = 1 << stage;
+ if (has_previous_stage)
+ stage_mask |= 1 << previous_stage;
/* 1 for each descriptor set */
- if (!user_sgpr_info.indirect_all_descriptor_sets) {
+ if (!user_sgpr_info->indirect_all_descriptor_sets) {
for (unsigned i = 0; i < num_sets; ++i) {
- if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
- add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->descriptor_sets[i]);
+ if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ add_user_sgpr_array_argument(args, const_array(ctx->i8, 1024 * 1024), &ctx->descriptor_sets[i]);
}
}
} else
- add_user_sgpr_array_argument(&args, const_array(const_array(ctx->i8, 1024 * 1024), 32), &desc_sets);
+ add_user_sgpr_array_argument(args, const_array(const_array(ctx->i8, 1024 * 1024), 32), desc_sets);
if (ctx->shader_info->info.needs_push_constants) {
/* 1 for push constants and dynamic descriptors */
- add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->push_constants);
+ add_user_sgpr_array_argument(args, const_array(ctx->i8, 1024 * 1024), &ctx->push_constants);
}
+}
- switch (ctx->stage) {
+static void
+radv_define_common_user_sgprs_phase2(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ const struct user_sgpr_info *user_sgpr_info,
+ LLVMValueRef desc_sets,
+ uint8_t *user_sgpr_idx)
+{
+ unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
+ unsigned stage_mask = 1 << stage;
+ if (has_previous_stage)
+ stage_mask |= 1 << previous_stage;
+
+ if (!user_sgpr_info->indirect_all_descriptor_sets) {
+ for (unsigned i = 0; i < num_sets; ++i) {
+ if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
+ } else
+ ctx->descriptor_sets[i] = NULL;
+ }
+ } else {
+ uint32_t desc_sgpr_idx = *user_sgpr_idx;
+ set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx, 2);
+
+ for (unsigned i = 0; i < num_sets; ++i) {
+ if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
+ ctx->descriptor_sets[i] = ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
+
+ } else
+ ctx->descriptor_sets[i] = NULL;
+ }
+ ctx->shader_info->need_indirect_descriptor_sets = true;
+ }
+
+ if (ctx->shader_info->info.needs_push_constants) {
+ set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
+ }
+}
+
+static void
+radv_define_vs_user_sgprs_phase1(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ struct arg_info *args)
+{
+ if (!ctx->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+ if (ctx->shader_info->info.vs.has_vertex_buffers)
+ add_user_sgpr_argument(args, const_array(ctx->v4i32, 16), &ctx->vertex_buffers); /* vertex buffers */
+ add_user_sgpr_argument(args, ctx->i32, &ctx->abi.base_vertex); // base vertex
+ add_user_sgpr_argument(args, ctx->i32, &ctx->abi.start_instance);// start instance
+ if (ctx->shader_info->info.vs.needs_draw_id)
+ add_user_sgpr_argument(args, ctx->i32, &ctx->abi.draw_id); // draw id
+ }
+}
+
+static void
+radv_define_vs_user_sgprs_phase2(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ uint8_t *user_sgpr_idx)
+{
+ if (!ctx->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+ if (ctx->shader_info->info.vs.has_vertex_buffers) {
+ set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
+ }
+ unsigned vs_num = 2;
+ if (ctx->shader_info->info.vs.needs_draw_id)
+ vs_num++;
+
+ set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
+ }
+}
+
+
+static void create_function(struct nir_to_llvm_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage)
+{
+ uint8_t user_sgpr_idx;
+ struct user_sgpr_info user_sgpr_info;
+ struct arg_info args = {};
+ LLVMValueRef desc_sets;
+
+ allocate_user_sgprs(ctx, &user_sgpr_info);
+
+ if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
+ add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->ring_offsets); /* address of rings */
+ }
+
+ switch (stage) {
case MESA_SHADER_COMPUTE:
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
if (ctx->shader_info->info.cs.grid_components_used)
add_user_sgpr_argument(&args, LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used), &ctx->num_work_groups); /* grid size */
add_sgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->workgroup_ids);
@@ -724,37 +762,68 @@
add_vgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->local_invocation_ids);
break;
case MESA_SHADER_VERTEX:
- if (!ctx->is_gs_copy_shader) {
- if (ctx->shader_info->info.vs.has_vertex_buffers)
- add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->vertex_buffers); /* vertex buffers */
- add_user_sgpr_argument(&args, ctx->i32, &ctx->base_vertex); // base vertex
- add_user_sgpr_argument(&args, ctx->i32, &ctx->start_instance);// start instance
- if (ctx->shader_info->info.vs.needs_draw_id)
- add_user_sgpr_argument(&args, ctx->i32, &ctx->draw_index); // draw id
- }
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
+ if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index))
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
if (ctx->options->key.vs.as_es)
add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset
else if (ctx->options->key.vs.as_ls)
add_user_sgpr_argument(&args, ctx->i32, &ctx->ls_out_layout); // ls out layout
- add_vgpr_argument(&args, ctx->i32, &ctx->vertex_id); // vertex id
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.vertex_id); // vertex id
if (!ctx->is_gs_copy_shader) {
add_vgpr_argument(&args, ctx->i32, &ctx->rel_auto_id); // rel auto id
add_vgpr_argument(&args, ctx->i32, &ctx->vs_prim_id); // vs prim id
- add_vgpr_argument(&args, ctx->i32, &ctx->instance_id); // instance id
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.instance_id); // instance id
}
break;
case MESA_SHADER_TESS_CTRL:
- add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
- add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
- add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
- add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
- add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
- add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
- add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
- add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids;
+ if (has_previous_stage) {
+ // First 6 system regs
+ add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
+ add_sgpr_argument(&args, ctx->i32, &ctx->merged_wave_info); // merged wave info
+ add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
+
+ add_sgpr_argument(&args, ctx->i32, NULL); // scratch offset
+ add_sgpr_argument(&args, ctx->i32, NULL); // unknown
+ add_sgpr_argument(&args, ctx->i32, NULL); // unknown
+
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->ls_out_layout); // ls out layout
+
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
+
+ add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
+ add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids;
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.vertex_id); // vertex id
+ add_vgpr_argument(&args, ctx->i32, &ctx->rel_auto_id); // rel auto id
+ add_vgpr_argument(&args, ctx->i32, &ctx->vs_prim_id); // vs prim id
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.instance_id); // instance id
+ } else {
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
+ add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
+ add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
+ add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
+ add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids;
+ }
break;
case MESA_SHADER_TESS_EVAL:
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
+ if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
if (ctx->options->key.tes.as_es) {
add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // OC LDS
add_sgpr_argument(&args, ctx->i32, NULL); //
@@ -769,20 +838,64 @@
add_vgpr_argument(&args, ctx->i32, &ctx->tes_patch_id); // tes patch id
break;
case MESA_SHADER_GEOMETRY:
- add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride
- add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires
- add_sgpr_argument(&args, ctx->i32, &ctx->gs2vs_offset); // gs2vs offset
- add_sgpr_argument(&args, ctx->i32, &ctx->gs_wave_id); // wave id
- add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[0]); // vtx0
- add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[1]); // vtx1
- add_vgpr_argument(&args, ctx->i32, &ctx->gs_prim_id); // prim id
- add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[2]);
- add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[3]);
- add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[4]);
- add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[5]);
- add_vgpr_argument(&args, ctx->i32, &ctx->gs_invocation_id);
+ if (has_previous_stage) {
+ // First 6 system regs
+ add_sgpr_argument(&args, ctx->i32, &ctx->gs2vs_offset); // tess factor offset
+ add_sgpr_argument(&args, ctx->i32, &ctx->merged_wave_info); // merged wave info
+ add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
+
+ add_sgpr_argument(&args, ctx->i32, NULL); // scratch offset
+ add_sgpr_argument(&args, ctx->i32, NULL); // unknown
+ add_sgpr_argument(&args, ctx->i32, NULL); // unknown
+
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ if (previous_stage == MESA_SHADER_TESS_EVAL)
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
+ else
+ radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
+
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[0]); // vtx01
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[2]); // vtx23
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_prim_id); // prim id
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_invocation_id);
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[4]);
+
+ if (previous_stage == MESA_SHADER_VERTEX) {
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.vertex_id); // vertex id
+ add_vgpr_argument(&args, ctx->i32, &ctx->rel_auto_id); // rel auto id
+ add_vgpr_argument(&args, ctx->i32, &ctx->vs_prim_id); // vs prim id
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.instance_id); // instance id
+ } else {
+ add_vgpr_argument(&args, ctx->f32, &ctx->tes_u); // tes_u
+ add_vgpr_argument(&args, ctx->f32, &ctx->tes_v); // tes_v
+ add_vgpr_argument(&args, ctx->i32, &ctx->tes_rel_patch_id); // tes rel patch id
+ add_vgpr_argument(&args, ctx->i32, &ctx->tes_patch_id); // tes patch id
+ }
+ } else {
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
+ radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
+ add_sgpr_argument(&args, ctx->i32, &ctx->gs2vs_offset); // gs2vs offset
+ add_sgpr_argument(&args, ctx->i32, &ctx->gs_wave_id); // wave id
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[0]); // vtx0
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[1]); // vtx1
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_prim_id); // prim id
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[2]);
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[3]);
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[4]);
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[5]);
+ add_vgpr_argument(&args, ctx->i32, &ctx->gs_invocation_id);
+ }
break;
case MESA_SHADER_FRAGMENT:
+ radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
if (ctx->shader_info->info.ps.needs_sample_positions)
add_user_sgpr_argument(&args, ctx->i32, &ctx->sample_pos_offset); /* sample position offset */
add_sgpr_argument(&args, ctx->i32, &ctx->prim_mask); /* prim mask */
@@ -794,13 +907,13 @@
add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_center); /* linear center */
add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_centroid); /* linear centroid */
add_vgpr_argument(&args, ctx->f32, NULL); /* line stipple tex */
- add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[0]); /* pos x float */
- add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[1]); /* pos y float */
- add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[2]); /* pos z float */
- add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[3]); /* pos w float */
- add_vgpr_argument(&args, ctx->i32, &ctx->front_face); /* front face */
- add_vgpr_argument(&args, ctx->i32, &ctx->ancillary); /* ancillary */
- add_vgpr_argument(&args, ctx->i32, &ctx->sample_coverage); /* sample coverage */
+ add_vgpr_argument(&args, ctx->f32, &ctx->abi.frag_pos[0]); /* pos x float */
+ add_vgpr_argument(&args, ctx->f32, &ctx->abi.frag_pos[1]); /* pos y float */
+ add_vgpr_argument(&args, ctx->f32, &ctx->abi.frag_pos[2]); /* pos z float */
+ add_vgpr_argument(&args, ctx->f32, &ctx->abi.frag_pos[3]); /* pos w float */
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.front_face); /* front face */
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.ancillary); /* ancillary */
+ add_vgpr_argument(&args, ctx->i32, &ctx->abi.sample_coverage); /* sample coverage */
add_vgpr_argument(&args, ctx->i32, NULL); /* fixed pt */
break;
default:
@@ -811,14 +924,12 @@
ctx->context, ctx->module, ctx->builder, NULL, 0, &args,
ctx->max_workgroup_size,
ctx->options->unsafe_math);
- set_llvm_calling_convention(ctx->main_function, ctx->stage);
+ set_llvm_calling_convention(ctx->main_function, stage);
ctx->shader_info->num_input_vgprs = 0;
- ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs =
- ctx->options->supports_spill ? 2 : 0;
+ ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0;
- ctx->shader_info->num_user_sgprs += args.num_user_sgprs_used;
ctx->shader_info->num_input_sgprs += args.num_sgprs_used;
if (ctx->stage != MESA_SHADER_FRAGMENT)
@@ -838,50 +949,24 @@
const_array(ctx->v4i32, 16), "");
}
}
+
+ /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
+ * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
+ if (has_previous_stage)
+ user_sgpr_idx = 0;
- if (!user_sgpr_info.indirect_all_descriptor_sets) {
- for (unsigned i = 0; i < num_sets; ++i) {
- if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
- set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], &user_sgpr_idx, 2);
- } else
- ctx->descriptor_sets[i] = NULL;
- }
- } else {
- uint32_t desc_sgpr_idx = user_sgpr_idx;
- set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, &user_sgpr_idx, 2);
-
- for (unsigned i = 0; i < num_sets; ++i) {
- if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
- set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
- ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
-
- } else
- ctx->descriptor_sets[i] = NULL;
- }
- ctx->shader_info->need_indirect_descriptor_sets = true;
- }
-
- if (ctx->shader_info->info.needs_push_constants) {
- set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, &user_sgpr_idx, 2);
- }
+ radv_define_common_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, desc_sets, &user_sgpr_idx);
- switch (ctx->stage) {
+ switch (stage) {
case MESA_SHADER_COMPUTE:
if (ctx->shader_info->info.cs.grid_components_used) {
set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used);
}
break;
case MESA_SHADER_VERTEX:
- if (!ctx->is_gs_copy_shader) {
- if (ctx->shader_info->info.vs.has_vertex_buffers) {
- set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, &user_sgpr_idx, 2);
- }
- unsigned vs_num = 2;
- if (ctx->shader_info->info.vs.needs_draw_id)
- vs_num++;
-
- set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, &user_sgpr_idx, vs_num);
- }
+ radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
+ if (ctx->view_index)
+ set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
if (ctx->options->key.vs.as_ls) {
set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1);
}
@@ -889,14 +974,31 @@
declare_tess_lds(ctx);
break;
case MESA_SHADER_TESS_CTRL:
+ radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
+ if (has_previous_stage)
+ set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1);
set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
+ if (ctx->view_index)
+ set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
declare_tess_lds(ctx);
break;
case MESA_SHADER_TESS_EVAL:
set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
+ if (ctx->view_index)
+ set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_GEOMETRY:
+ if (has_previous_stage) {
+ if (previous_stage == MESA_SHADER_VERTEX)
+ radv_define_vs_user_sgprs_phase2(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
+ else
+ set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
+ }
set_userdata_location_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, &user_sgpr_idx, 2);
+ if (ctx->view_index)
+ set_userdata_location_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+ if (has_previous_stage)
+ declare_tess_lds(ctx);
break;
case MESA_SHADER_FRAGMENT:
if (ctx->shader_info->info.ps.needs_sample_positions) {
@@ -906,6 +1008,8 @@
default:
unreachable("Shader stage not implemented");
}
+
+ ctx->shader_info->num_user_sgprs = user_sgpr_idx;
}
static void setup_types(struct nir_to_llvm_context *ctx)
@@ -957,7 +1061,7 @@
return num_components;
}
-static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
+static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
LLVMValueRef value,
int index)
{
@@ -967,11 +1071,11 @@
if (count == 1)
return value;
- return LLVMBuildExtractElement(ctx->builder, value,
- LLVMConstInt(ctx->i32, index, false), "");
+ return LLVMBuildExtractElement(ac->builder, value,
+ LLVMConstInt(ac->i32, index, false), "");
}
-static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
+static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
LLVMValueRef value, unsigned count)
{
unsigned num_components = get_llvm_num_components(value);
@@ -991,54 +1095,49 @@
}
static void
-build_store_values_extended(struct nir_to_llvm_context *ctx,
+build_store_values_extended(struct ac_llvm_context *ac,
LLVMValueRef *values,
unsigned value_count,
unsigned value_stride,
LLVMValueRef vec)
{
- LLVMBuilderRef builder = ctx->builder;
+ LLVMBuilderRef builder = ac->builder;
unsigned i;
- if (value_count == 1) {
- LLVMBuildStore(builder, vec, values[0]);
- return;
- }
-
for (i = 0; i < value_count; i++) {
LLVMValueRef ptr = values[i * value_stride];
- LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
+ LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
LLVMBuildStore(builder, value, ptr);
}
}
-static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
+static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
const nir_ssa_def *def)
{
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
if (def->num_components > 1) {
type = LLVMVectorType(type, def->num_components);
}
return type;
}
-static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
+static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
{
assert(src.is_ssa);
- struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
+ struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
return (LLVMValueRef)entry->data;
}
-static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
+static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
const struct nir_block *b)
{
- struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
+ struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
return (LLVMBasicBlockRef)entry->data;
}
-static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
+static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
nir_alu_src src,
unsigned num_components)
{
@@ -1059,20 +1158,20 @@
if (need_swizzle || num_components != src_components) {
LLVMValueRef masks[] = {
- LLVMConstInt(ctx->i32, src.swizzle[0], false),
- LLVMConstInt(ctx->i32, src.swizzle[1], false),
- LLVMConstInt(ctx->i32, src.swizzle[2], false),
- LLVMConstInt(ctx->i32, src.swizzle[3], false)};
+ LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
if (src_components > 1 && num_components == 1) {
- value = LLVMBuildExtractElement(ctx->builder, value,
+ value = LLVMBuildExtractElement(ctx->ac.builder, value,
masks[0], "");
} else if (src_components == 1 && num_components > 1) {
LLVMValueRef values[] = {value, value, value, value};
value = ac_build_gather_values(&ctx->ac, values, num_components);
} else {
LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
- value = LLVMBuildShuffleVector(ctx->builder, value, value,
+ value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
swizzle, "");
}
}
@@ -1096,8 +1195,8 @@
LLVMValueRef src1)
{
LLVMValueRef result;
- src0 = to_float(ctx, src0);
- src1 = to_float(ctx, src1);
+ src0 = ac_to_float(ctx, src0);
+ src1 = ac_to_float(ctx, src1);
result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
return LLVMBuildSelect(ctx->builder, result,
LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
@@ -1111,7 +1210,7 @@
{
char name[64];
LLVMValueRef params[] = {
- to_float(ctx, src0),
+ ac_to_float(ctx, src0),
};
MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
@@ -1127,8 +1226,8 @@
{
char name[64];
LLVMValueRef params[] = {
- to_float(ctx, src0),
- to_float(ctx, src1),
+ ac_to_float(ctx, src0),
+ ac_to_float(ctx, src1),
};
MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
@@ -1144,9 +1243,9 @@
{
char name[64];
LLVMValueRef params[] = {
- to_float(ctx, src0),
- to_float(ctx, src1),
- to_float(ctx, src2),
+ ac_to_float(ctx, src0),
+ ac_to_float(ctx, src1),
+ ac_to_float(ctx, src2),
};
MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
@@ -1248,7 +1347,7 @@
LLVMValueRef src0)
{
const char *intr = "llvm.floor.f32";
- LLVMValueRef fsrc0 = to_float(ctx, src0);
+ LLVMValueRef fsrc0 = ac_to_float(ctx, src0);
LLVMValueRef params[] = {
fsrc0,
};
@@ -1286,7 +1385,7 @@
static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
LLVMValueRef src0)
{
- src0 = to_float(ctx, src0);
+ src0 = ac_to_float(ctx, src0);
return LLVMBuildSExt(ctx->builder,
LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, ctx->f32_0, ""),
ctx->i32, "");
@@ -1310,9 +1409,9 @@
LLVMValueRef src0)
{
LLVMValueRef result;
- LLVMValueRef cond;
+ LLVMValueRef cond = NULL;
- src0 = to_float(&ctx->ac, src0);
+ src0 = ac_to_float(&ctx->ac, src0);
result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
if (ctx->options->chip_class >= VI) {
@@ -1418,23 +1517,13 @@
static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
LLVMValueRef src0)
{
- LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
- int i;
LLVMValueRef comp[2];
- src0 = to_float(ctx, src0);
+ src0 = ac_to_float(ctx, src0);
comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
- for (i = 0; i < 2; i++) {
- comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
- comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
- comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
- }
-
- comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
- comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
- return comp[0];
+ return ac_build_cvt_pkrtz_f16(ctx, comp);
}
static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
@@ -1459,7 +1548,7 @@
return result;
}
-static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
+static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
nir_op op,
LLVMValueRef src0)
{
@@ -1482,9 +1571,7 @@
else
idx = 2;
- result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
- mask, idx,
- src0);
+ result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
return result;
}
@@ -1494,22 +1581,22 @@
* it returns DDX(I), DDX(J), DDY(I), DDY(J).
*/
static LLVMValueRef emit_ddxy_interp(
- struct nir_to_llvm_context *ctx,
+ struct ac_nir_context *ctx,
LLVMValueRef interp_ij)
{
LLVMValueRef result[4], a;
unsigned i;
for (i = 0; i < 2; i++) {
- a = LLVMBuildExtractElement(ctx->builder, interp_ij,
- LLVMConstInt(ctx->i32, i, false), "");
+ a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
+ LLVMConstInt(ctx->ac.i32, i, false), "");
result[i] = emit_ddxy(ctx, nir_op_fddx, a);
result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
}
return ac_build_gather_values(&ctx->ac, result, 4);
}
-static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *instr)
+static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
{
LLVMValueRef src[4], result = NULL;
unsigned num_components = instr->dest.dest.ssa.num_components;
@@ -1542,101 +1629,101 @@
result = src[0];
break;
case nir_op_fneg:
- src[0] = to_float(&ctx->ac, src[0]);
- result = LLVMBuildFNeg(ctx->builder, src[0], "");
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
break;
case nir_op_ineg:
- result = LLVMBuildNeg(ctx->builder, src[0], "");
+ result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
break;
case nir_op_inot:
- result = LLVMBuildNot(ctx->builder, src[0], "");
+ result = LLVMBuildNot(ctx->ac.builder, src[0], "");
break;
case nir_op_iadd:
- result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_fadd:
- src[0] = to_float(&ctx->ac, src[0]);
- src[1] = to_float(&ctx->ac, src[1]);
- result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_fsub:
- src[0] = to_float(&ctx->ac, src[0]);
- src[1] = to_float(&ctx->ac, src[1]);
- result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_isub:
- result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_imul:
- result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_imod:
- result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_umod:
- result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_fmod:
- src[0] = to_float(&ctx->ac, src[0]);
- src[1] = to_float(&ctx->ac, src[1]);
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
- to_float_type(&ctx->ac, def_type), result);
- result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
- result = LLVMBuildFSub(ctx->builder, src[0], result, "");
+ ac_to_float_type(&ctx->ac, def_type), result);
+ result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
+ result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
break;
case nir_op_frem:
- src[0] = to_float(&ctx->ac, src[0]);
- src[1] = to_float(&ctx->ac, src[1]);
- result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_irem:
- result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_idiv:
- result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_udiv:
- result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_fmul:
- src[0] = to_float(&ctx->ac, src[0]);
- src[1] = to_float(&ctx->ac, src[1]);
- result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_fdiv:
- src[0] = to_float(&ctx->ac, src[0]);
- src[1] = to_float(&ctx->ac, src[1]);
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
break;
case nir_op_frcp:
- src[0] = to_float(&ctx->ac, src[0]);
- result = ac_build_fdiv(&ctx->ac, ctx->f32one, src[0]);
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, src[0]);
break;
case nir_op_iand:
- result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_ior:
- result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_ixor:
- result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
+ result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
break;
case nir_op_ishl:
- result = LLVMBuildShl(ctx->builder, src[0],
- LLVMBuildZExt(ctx->builder, src[1],
+ result = LLVMBuildShl(ctx->ac.builder, src[0],
+ LLVMBuildZExt(ctx->ac.builder, src[1],
LLVMTypeOf(src[0]), ""),
"");
break;
case nir_op_ishr:
- result = LLVMBuildAShr(ctx->builder, src[0],
- LLVMBuildZExt(ctx->builder, src[1],
+ result = LLVMBuildAShr(ctx->ac.builder, src[0],
+ LLVMBuildZExt(ctx->ac.builder, src[1],
LLVMTypeOf(src[0]), ""),
"");
break;
case nir_op_ushr:
- result = LLVMBuildLShr(ctx->builder, src[0],
- LLVMBuildZExt(ctx->builder, src[1],
+ result = LLVMBuildLShr(ctx->ac.builder, src[0],
+ LLVMBuildZExt(ctx->ac.builder, src[1],
LLVMTypeOf(src[0]), ""),
"");
break;
@@ -1672,7 +1759,7 @@
break;
case nir_op_fabs:
result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_iabs:
result = emit_iabs(&ctx->ac, src[0]);
@@ -1693,76 +1780,76 @@
result = emit_isign(&ctx->ac, src[0]);
break;
case nir_op_fsign:
- src[0] = to_float(&ctx->ac, src[0]);
+ src[0] = ac_to_float(&ctx->ac, src[0]);
result = emit_fsign(&ctx->ac, src[0]);
break;
case nir_op_ffloor:
result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_ftrunc:
result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fceil:
result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fround_even:
result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
- to_float_type(&ctx->ac, def_type),src[0]);
+ ac_to_float_type(&ctx->ac, def_type),src[0]);
break;
case nir_op_ffract:
result = emit_ffract(&ctx->ac, src[0]);
break;
case nir_op_fsin:
result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fcos:
result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fsqrt:
result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_fexp2:
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_flog2:
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
- to_float_type(&ctx->ac, def_type), src[0]);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
break;
case nir_op_frsq:
result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
- to_float_type(&ctx->ac, def_type), src[0]);
- result = ac_build_fdiv(&ctx->ac, ctx->f32one, result);
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, result);
break;
case nir_op_fpow:
result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
- to_float_type(&ctx->ac, def_type), src[0], src[1]);
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
break;
case nir_op_fmax:
result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
- to_float_type(&ctx->ac, def_type), src[0], src[1]);
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
if (instr->dest.dest.ssa.bit_size == 32)
result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
- to_float_type(&ctx->ac, def_type),
+ ac_to_float_type(&ctx->ac, def_type),
result);
break;
case nir_op_fmin:
result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
- to_float_type(&ctx->ac, def_type), src[0], src[1]);
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
if (instr->dest.dest.ssa.bit_size == 32)
result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
- to_float_type(&ctx->ac, def_type),
+ ac_to_float_type(&ctx->ac, def_type),
result);
break;
case nir_op_ffma:
result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
- to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
break;
case nir_op_ibitfield_extract:
result = emit_bitfield_extract(&ctx->ac, true, src);
@@ -1774,83 +1861,83 @@
result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
break;
case nir_op_bitfield_reverse:
- result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
+ result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
break;
case nir_op_bit_count:
- result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
+ result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
- src[i] = to_integer(&ctx->ac, src[i]);
+ src[i] = ac_to_integer(&ctx->ac, src[i]);
result = ac_build_gather_values(&ctx->ac, src, num_components);
break;
case nir_op_f2i32:
case nir_op_f2i64:
- src[0] = to_float(&ctx->ac, src[0]);
- result = LLVMBuildFPToSI(ctx->builder, src[0], def_type, "");
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
break;
case nir_op_f2u32:
case nir_op_f2u64:
- src[0] = to_float(&ctx->ac, src[0]);
- result = LLVMBuildFPToUI(ctx->builder, src[0], def_type, "");
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
break;
case nir_op_i2f32:
case nir_op_i2f64:
- src[0] = to_integer(&ctx->ac, src[0]);
- result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
+ result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_u2f32:
case nir_op_u2f64:
- src[0] = to_integer(&ctx->ac, src[0]);
- result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
+ result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_f2f64:
- result = LLVMBuildFPExt(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
+ result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_f2f32:
- result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
+ result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
break;
case nir_op_u2u32:
case nir_op_u2u64:
- src[0] = to_integer(&ctx->ac, src[0]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildZExt(ctx->builder, src[0], def_type, "");
+ result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
else
- result = LLVMBuildTrunc(ctx->builder, src[0], def_type, "");
+ result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
break;
case nir_op_i2i32:
case nir_op_i2i64:
- src[0] = to_integer(&ctx->ac, src[0]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildSExt(ctx->builder, src[0], def_type, "");
+ result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
else
- result = LLVMBuildTrunc(ctx->builder, src[0], def_type, "");
+ result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
break;
case nir_op_bcsel:
result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
break;
case nir_op_find_lsb:
- src[0] = to_integer(&ctx->ac, src[0]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
result = emit_find_lsb(&ctx->ac, src[0]);
break;
case nir_op_ufind_msb:
- src[0] = to_integer(&ctx->ac, src[0]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
result = emit_ufind_msb(&ctx->ac, src[0]);
break;
case nir_op_ifind_msb:
- src[0] = to_integer(&ctx->ac, src[0]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
result = emit_ifind_msb(&ctx->ac, src[0]);
break;
case nir_op_uadd_carry:
- src[0] = to_integer(&ctx->ac, src[0]);
- src[1] = to_integer(&ctx->ac, src[1]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
+ src[1] = ac_to_integer(&ctx->ac, src[1]);
result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
break;
case nir_op_usub_borrow:
- src[0] = to_integer(&ctx->ac, src[0]);
- src[1] = to_integer(&ctx->ac, src[1]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
+ src[1] = ac_to_integer(&ctx->ac, src[1]);
result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
break;
case nir_op_b2f:
@@ -1863,20 +1950,20 @@
result = emit_b2i(&ctx->ac, src[0]);
break;
case nir_op_i2b:
- src[0] = to_integer(&ctx->ac, src[0]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
result = emit_i2b(&ctx->ac, src[0]);
break;
case nir_op_fquantize2f16:
- result = emit_f2f16(ctx, src[0]);
+ result = emit_f2f16(ctx->nctx, src[0]);
break;
case nir_op_umul_high:
- src[0] = to_integer(&ctx->ac, src[0]);
- src[1] = to_integer(&ctx->ac, src[1]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
+ src[1] = ac_to_integer(&ctx->ac, src[1]);
result = emit_umul_high(&ctx->ac, src[0], src[1]);
break;
case nir_op_imul_high:
- src[0] = to_integer(&ctx->ac, src[0]);
- src[1] = to_integer(&ctx->ac, src[1]);
+ src[0] = ac_to_integer(&ctx->ac, src[0]);
+ src[1] = ac_to_integer(&ctx->ac, src[1]);
result = emit_imul_high(&ctx->ac, src[0], src[1]);
break;
case nir_op_pack_half_2x16:
@@ -1896,31 +1983,31 @@
case nir_op_unpack_64_2x32_split_x: {
assert(instr->src[0].src.ssa->num_components == 1);
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src[0],
- LLVMVectorType(ctx->i32, 2),
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+ LLVMVectorType(ctx->ac.i32, 2),
"");
- result = LLVMBuildExtractElement(ctx->builder, tmp,
- ctx->i32zero, "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ ctx->ac.i32_0, "");
break;
}
case nir_op_unpack_64_2x32_split_y: {
assert(instr->src[0].src.ssa->num_components == 1);
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src[0],
- LLVMVectorType(ctx->i32, 2),
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+ LLVMVectorType(ctx->ac.i32, 2),
"");
- result = LLVMBuildExtractElement(ctx->builder, tmp,
- ctx->i32one, "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ ctx->ac.i32_1, "");
break;
}
case nir_op_pack_64_2x32_split: {
- LLVMValueRef tmp = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
- tmp = LLVMBuildInsertElement(ctx->builder, tmp,
- src[0], ctx->i32zero, "");
- tmp = LLVMBuildInsertElement(ctx->builder, tmp,
- src[1], ctx->i32one, "");
- result = LLVMBuildBitCast(ctx->builder, tmp, ctx->i64, "");
+ LLVMValueRef tmp = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, 2));
+ tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
+ src[0], ctx->ac.i32_0, "");
+ tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
+ src[1], ctx->ac.i32_1, "");
+ result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
break;
}
@@ -1933,18 +2020,18 @@
if (result) {
assert(instr->dest.dest.is_ssa);
- result = to_integer(&ctx->ac, result);
+ result = ac_to_integer(&ctx->ac, result);
_mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
result);
}
}
-static void visit_load_const(struct nir_to_llvm_context *ctx,
+static void visit_load_const(struct ac_nir_context *ctx,
const nir_load_const_instr *instr)
{
LLVMValueRef values[4], value = NULL;
LLVMTypeRef element_type =
- LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
+ LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
for (unsigned i = 0; i < instr->def.num_components; ++i) {
switch (instr->def.bit_size) {
@@ -1980,27 +2067,27 @@
}
static LLVMValueRef
-get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
+get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
{
LLVMValueRef size =
- LLVMBuildExtractElement(ctx->builder, descriptor,
- LLVMConstInt(ctx->i32, 2, false), "");
+ LLVMBuildExtractElement(ctx->ac.builder, descriptor,
+ LLVMConstInt(ctx->ac.i32, 2, false), "");
/* VI only */
- if (ctx->options->chip_class >= VI && in_elements) {
+ if (ctx->ac.chip_class == VI && in_elements) {
/* On VI, the descriptor contains the size in bytes,
* but TXQ must return the size in elements.
* The stride is always non-zero for resources using TXQ.
*/
LLVMValueRef stride =
- LLVMBuildExtractElement(ctx->builder, descriptor,
- LLVMConstInt(ctx->i32, 1, false), "");
- stride = LLVMBuildLShr(ctx->builder, stride,
- LLVMConstInt(ctx->i32, 16, false), "");
- stride = LLVMBuildAnd(ctx->builder, stride,
- LLVMConstInt(ctx->i32, 0x3fff, false), "");
+ LLVMBuildExtractElement(ctx->ac.builder, descriptor,
+ LLVMConstInt(ctx->ac.i32, 1, false), "");
+ stride = LLVMBuildLShr(ctx->ac.builder, stride,
+ LLVMConstInt(ctx->ac.i32, 16, false), "");
+ stride = LLVMBuildAnd(ctx->ac.builder, stride,
+ LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
- size = LLVMBuildUDiv(ctx->builder, size, stride, "");
+ size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
}
return size;
}
@@ -2022,14 +2109,14 @@
strcpy(buf, "i32");
}
-static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
+static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx,
struct ac_image_args *args,
const nir_tex_instr *instr)
{
enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
LLVMValueRef coord = args->addr;
LLVMValueRef half_texel[2];
- LLVMValueRef compare_cube_wa;
+ LLVMValueRef compare_cube_wa = NULL;
LLVMValueRef result;
int c;
unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
@@ -2041,15 +2128,15 @@
txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
txq_args.opcode = ac_image_get_resinfo;
txq_args.dmask = 0xf;
- txq_args.addr = ctx->i32zero;
+ txq_args.addr = ctx->i32_0;
txq_args.resource = args->resource;
- LLVMValueRef size = ac_build_image_opcode(&ctx->ac, &txq_args);
+ LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
for (c = 0; c < 2; c++) {
half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
LLVMConstInt(ctx->i32, c, false), "");
half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
- half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
+ half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
LLVMConstReal(ctx->f32, -0.5), "");
}
@@ -2085,11 +2172,11 @@
/* workaround 8/8/8/8 uint/sint cube gather bug */
/* first detect it then change to a scaled read and f2i */
- tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32one, "");
+ tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
tmp2 = tmp;
/* extract the DATA_FORMAT */
- tmp = ac_build_bfe(&ctx->ac, tmp, LLVMConstInt(ctx->i32, 20, false),
+ tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false),
LLVMConstInt(ctx->i32, 6, false), false);
/* is the DATA_FORMAT == 8_8_8_8 */
@@ -2108,13 +2195,13 @@
tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
- args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32one, "");
+ args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, "");
/* don't modify the coordinates for this case */
coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
}
args->addr = coord;
- result = ac_build_image_opcode(&ctx->ac, args);
+ result = ac_build_image_opcode(ctx, args);
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
LLVMValueRef tmp, tmp2;
@@ -2136,7 +2223,7 @@
return result;
}
-static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
+static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
const nir_tex_instr *instr,
bool lod_is_zero,
struct ac_image_args *args)
@@ -2145,7 +2232,7 @@
return ac_build_buffer_load_format(&ctx->ac,
args->resource,
args->addr,
- LLVMConstInt(ctx->i32, 0, false),
+ LLVMConstInt(ctx->ac.i32, 0, false),
true);
}
@@ -2193,10 +2280,10 @@
break;
}
- if (instr->op == nir_texop_tg4 && ctx->options->chip_class <= VI) {
+ if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
- return radv_lower_gather4_integer(ctx, args, instr);
+ return radv_lower_gather4_integer(&ctx->ac, args, instr);
}
}
return ac_build_image_opcode(&ctx->ac, args);
@@ -2205,7 +2292,7 @@
static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
nir_intrinsic_instr *instr)
{
- LLVMValueRef index = get_src(ctx, instr->src[0]);
+ LLVMValueRef index = get_src(ctx->nir, instr->src[0]);
unsigned desc_set = nir_intrinsic_desc_set(instr);
unsigned binding = nir_intrinsic_binding(instr);
LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
@@ -2241,47 +2328,46 @@
LLVMValueRef ptr, addr;
addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
- addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), "");
+ addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), "");
ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
- ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
+ ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa));
return LLVMBuildLoad(ctx->builder, ptr, "");
}
-static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
+static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef desc = get_src(ctx, instr->src[0]);
return get_buffer_size(ctx, desc, false);
}
-static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
+static void visit_store_ssbo(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
const char *store_name;
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
- LLVMTypeRef data_type = ctx->f32;
+ LLVMTypeRef data_type = ctx->ac.f32;
int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32;
int components_32bit = elem_size_mult * instr->num_components;
unsigned writemask = nir_intrinsic_write_mask(instr);
LLVMValueRef base_data, base_offset;
LLVMValueRef params[6];
+ LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false);
- if (ctx->stage == MESA_SHADER_FRAGMENT)
- ctx->shader_info->fs.writes_memory = true;
-
- params[1] = get_src(ctx, instr->src[1]);
- params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
- params[4] = ctx->i1false; /* glc */
- params[5] = ctx->i1false; /* slc */
+ params[1] = ctx->abi->load_ssbo(ctx->abi,
+ get_src(ctx, instr->src[1]), true);
+ params[2] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex */
+ params[4] = i1false; /* glc */
+ params[5] = i1false; /* slc */
if (components_32bit > 1)
- data_type = LLVMVectorType(ctx->f32, components_32bit);
+ data_type = LLVMVectorType(ctx->ac.f32, components_32bit);
- base_data = to_float(&ctx->ac, src_data);
- base_data = trim_vector(ctx, base_data, instr->num_components);
- base_data = LLVMBuildBitCast(ctx->builder, base_data,
+ base_data = ac_to_float(&ctx->ac, src_data);
+ base_data = trim_vector(&ctx->ac, base_data, instr->num_components);
+ base_data = LLVMBuildBitCast(ctx->ac.builder, base_data,
data_type, "");
base_offset = get_src(ctx, instr->src[2]); /* voffset */
while (writemask) {
@@ -2310,22 +2396,24 @@
store_name = "llvm.amdgcn.buffer.store.v4f32";
data = base_data;
} else if (count == 2) {
- tmp = LLVMBuildExtractElement(ctx->builder,
- base_data, LLVMConstInt(ctx->i32, start, false), "");
- data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
- ctx->i32zero, "");
-
- tmp = LLVMBuildExtractElement(ctx->builder,
- base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
- data = LLVMBuildInsertElement(ctx->builder, data, tmp,
- ctx->i32one, "");
+ LLVMTypeRef v2f32 = LLVMVectorType(ctx->ac.f32, 2);
+
+ tmp = LLVMBuildExtractElement(ctx->ac.builder,
+ base_data, LLVMConstInt(ctx->ac.i32, start, false), "");
+ data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(v2f32), tmp,
+ ctx->ac.i32_0, "");
+
+ tmp = LLVMBuildExtractElement(ctx->ac.builder,
+ base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
+ data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
+ ctx->ac.i32_1, "");
store_name = "llvm.amdgcn.buffer.store.v2f32";
} else {
assert(count == 1);
if (get_llvm_num_components(base_data) > 1)
- data = LLVMBuildExtractElement(ctx->builder, base_data,
- LLVMConstInt(ctx->i32, start, false), "");
+ data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
+ LLVMConstInt(ctx->ac.i32, start, false), "");
else
data = base_data;
store_name = "llvm.amdgcn.buffer.store.f32";
@@ -2333,32 +2421,32 @@
offset = base_offset;
if (start != 0) {
- offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), "");
}
params[0] = data;
params[3] = offset;
ac_build_intrinsic(&ctx->ac, store_name,
- ctx->voidt, params, 6, 0);
+ ctx->ac.voidt, params, 6, 0);
}
}
-static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
+static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
const char *name;
LLVMValueRef params[6];
int arg_count = 0;
- if (ctx->stage == MESA_SHADER_FRAGMENT)
- ctx->shader_info->fs.writes_memory = true;
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
- params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
+ params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
}
- params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
- params[arg_count++] = get_src(ctx, instr->src[0]);
- params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
+ params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
+ params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
+ get_src(ctx, instr->src[0]),
+ true);
+ params[arg_count++] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex */
params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->i1false; /* slc */
+ params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */
switch (instr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add:
@@ -2395,10 +2483,10 @@
abort();
}
- return ac_build_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
+ return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0);
}
-static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
+static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef results[2];
@@ -2410,14 +2498,14 @@
for (int i = 0; i < num_components; i += load_components) {
load_components = MIN2(num_components - i, 4);
const char *load_name;
- LLVMTypeRef data_type = ctx->f32;
- LLVMValueRef offset = LLVMConstInt(ctx->i32, i * 4, false);
- offset = LLVMBuildAdd(ctx->builder, get_src(ctx, instr->src[1]), offset, "");
+ LLVMTypeRef data_type = ctx->ac.f32;
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false);
+ offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, "");
if (load_components == 3)
- data_type = LLVMVectorType(ctx->f32, 4);
+ data_type = LLVMVectorType(ctx->ac.f32, 4);
else if (load_components > 1)
- data_type = LLVMVectorType(ctx->f32, load_components);
+ data_type = LLVMVectorType(ctx->ac.f32, load_components);
if (load_components >= 3)
load_name = "llvm.amdgcn.buffer.load.v4f32";
@@ -2428,12 +2516,15 @@
else
unreachable("unhandled number of components");
+ LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false);
LLVMValueRef params[] = {
- get_src(ctx, instr->src[0]),
- LLVMConstInt(ctx->i32, 0, false),
+ ctx->abi->load_ssbo(ctx->abi,
+ get_src(ctx, instr->src[0]),
+ false),
+ LLVMConstInt(ctx->ac.i32, 0, false),
offset,
- ctx->i1false,
- ctx->i1false,
+ i1false,
+ i1false,
};
results[i] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
@@ -2443,22 +2534,22 @@
LLVMValueRef ret = results[0];
if (num_components > 4 || num_components == 3) {
LLVMValueRef masks[] = {
- LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
- LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
- LLVMConstInt(ctx->i32, 4, false), LLVMConstInt(ctx->i32, 5, false),
- LLVMConstInt(ctx->i32, 6, false), LLVMConstInt(ctx->i32, 7, false)
+ LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
+ LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
+ LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false),
+ LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false)
};
LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
- ret = LLVMBuildShuffleVector(ctx->builder, results[0],
+ ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
results[num_components > 4 ? 1 : 0], swizzle, "");
}
- return LLVMBuildBitCast(ctx->builder, ret,
+ return LLVMBuildBitCast(ctx->ac.builder, ret,
get_def_type(ctx, &instr->dest.ssa), "");
}
-static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
+static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef results[8], ret;
@@ -2466,16 +2557,19 @@
LLVMValueRef offset = get_src(ctx, instr->src[1]);
int num_components = instr->num_components;
+ if (ctx->abi->load_ubo)
+ rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
+
if (instr->dest.ssa.bit_size == 64)
num_components *= 2;
for (unsigned i = 0; i < num_components; ++i) {
LLVMValueRef params[] = {
rsrc,
- LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
+ LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 4 * i, 0),
offset, "")
};
- results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const.v4i32", ctx->f32,
+ results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const.v4i32", ctx->ac.f32,
params, 2,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_LEGACY);
@@ -2483,15 +2577,15 @@
ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
- return LLVMBuildBitCast(ctx->builder, ret,
+ return LLVMBuildBitCast(ctx->ac.builder, ret,
get_def_type(ctx, &instr->dest.ssa), "");
}
static void
-radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref_var *deref,
- bool vs_in, unsigned *vertex_index_out,
- LLVMValueRef *vertex_index_ref,
- unsigned *const_out, LLVMValueRef *indir_out)
+get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref,
+ bool vs_in, unsigned *vertex_index_out,
+ LLVMValueRef *vertex_index_ref,
+ unsigned *const_out, LLVMValueRef *indir_out)
{
unsigned const_offset = 0;
nir_deref *tail = &deref->deref;
@@ -2504,9 +2598,9 @@
*vertex_index_out = deref_array->base_offset;
if (vertex_index_ref) {
- LLVMValueRef vtx = LLVMConstInt(ctx->i32, deref_array->base_offset, false);
+ LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false);
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
- vtx = LLVMBuildAdd(ctx->builder, vtx, get_src(ctx, deref_array->indirect), "");
+ vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), "");
}
*vertex_index_ref = vtx;
}
@@ -2538,11 +2632,11 @@
assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
index = get_src(ctx, deref_array->indirect);
- stride = LLVMConstInt(ctx->i32, size, 0);
- local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
+ stride = LLVMConstInt(ctx->ac.i32, size, 0);
+ local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, "");
if (offset)
- offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, "");
else
offset = local_offset;
} else if (tail->deref_type == nir_deref_type_struct) {
@@ -2558,8 +2652,8 @@
}
out:
if (const_offset && offset)
- offset = LLVMBuildAdd(ctx->builder, offset,
- LLVMConstInt(ctx->i32, const_offset, 0),
+ offset = LLVMBuildAdd(ctx->ac.builder, offset,
+ LLVMConstInt(ctx->ac.i32, const_offset, 0),
"");
*const_out = const_offset;
@@ -2571,7 +2665,7 @@
LLVMValueRef dw_addr)
{
LLVMValueRef value;
- value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
+ value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
return value;
}
@@ -2610,8 +2704,8 @@
LLVMValueRef param_stride, constant16;
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
- vertices_per_patch = unpack_param(ctx, ctx->tcs_offchip_layout, 9, 6);
- num_patches = unpack_param(ctx, ctx->tcs_offchip_layout, 0, 9);
+ vertices_per_patch = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 9, 6);
+ num_patches = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9);
total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch,
num_patches, "");
@@ -2637,7 +2731,7 @@
if (!vertex_index) {
LLVMValueRef patch_data_offset =
- unpack_param(ctx, ctx->tcs_offchip_layout, 16, 16);
+ unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16);
base_addr = LLVMBuildAdd(ctx->builder, base_addr,
patch_data_offset, "");
@@ -2725,11 +2819,11 @@
const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
const bool is_compact = instr->variables[0]->var->data.compact;
param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
- radv_get_deref_offset(ctx, instr->variables[0],
- false, NULL, per_vertex ? &vertex_index : NULL,
- &const_index, &indir_index);
+ get_deref_offset(ctx->nir, instr->variables[0],
+ false, NULL, per_vertex ? &vertex_index : NULL,
+ &const_index, &indir_index);
- stride = unpack_param(ctx, ctx->tcs_in_layout, 13, 8);
+ stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
indir_index);
@@ -2740,7 +2834,7 @@
ctx->i32one, "");
}
result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
- result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
+ result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
return result;
}
@@ -2748,7 +2842,8 @@
load_tcs_output(struct nir_to_llvm_context *ctx,
nir_intrinsic_instr *instr)
{
- LLVMValueRef dw_addr, stride;
+ LLVMValueRef dw_addr;
+ LLVMValueRef stride = NULL;
LLVMValueRef value[4], result;
LLVMValueRef vertex_index = NULL;
LLVMValueRef indir_index = NULL;
@@ -2757,12 +2852,12 @@
const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
const bool is_compact = instr->variables[0]->var->data.compact;
param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
- radv_get_deref_offset(ctx, instr->variables[0],
- false, NULL, per_vertex ? &vertex_index : NULL,
- &const_index, &indir_index);
+ get_deref_offset(ctx->nir, instr->variables[0],
+ false, NULL, per_vertex ? &vertex_index : NULL,
+ &const_index, &indir_index);
if (!instr->variables[0]->var->data.patch) {
- stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8);
+ stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
dw_addr = get_tcs_out_current_patch_offset(ctx);
} else {
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
@@ -2777,7 +2872,7 @@
ctx->i32one, "");
}
result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
- result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
+ result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
return result;
}
@@ -2787,7 +2882,8 @@
LLVMValueRef src,
unsigned writemask)
{
- LLVMValueRef stride, dw_addr;
+ LLVMValueRef dw_addr;
+ LLVMValueRef stride = NULL;
LLVMValueRef buf_addr = NULL;
LLVMValueRef vertex_index = NULL;
LLVMValueRef indir_index = NULL;
@@ -2796,9 +2892,9 @@
const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
const bool is_compact = instr->variables[0]->var->data.compact;
- radv_get_deref_offset(ctx, instr->variables[0],
- false, NULL, per_vertex ? &vertex_index : NULL,
- &const_index, &indir_index);
+ get_deref_offset(ctx->nir, instr->variables[0],
+ false, NULL, per_vertex ? &vertex_index : NULL,
+ &const_index, &indir_index);
param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
@@ -2808,7 +2904,7 @@
}
if (!instr->variables[0]->var->data.patch) {
- stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8);
+ stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
dw_addr = get_tcs_out_current_patch_offset(ctx);
} else {
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
@@ -2821,19 +2917,19 @@
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
vertex_index, indir_index);
+ bool is_tess_factor = false;
+ if (instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
+ instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ is_tess_factor = true;
+
unsigned base = is_compact ? const_index : 0;
for (unsigned chan = 0; chan < 8; chan++) {
- bool is_tess_factor = false;
if (!(writemask & (1 << chan)))
continue;
- LLVMValueRef value = llvm_extract_elem(ctx, src, chan);
+ LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan);
lds_store(ctx, dw_addr, value);
- if (instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
- instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
- is_tess_factor = true;
-
if (!is_tess_factor && writemask != 0xF)
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
buf_addr, ctx->oc_lds,
@@ -2863,9 +2959,9 @@
const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
const bool is_compact = instr->variables[0]->var->data.compact;
- radv_get_deref_offset(ctx, instr->variables[0],
- false, NULL, per_vertex ? &vertex_index : NULL,
- &const_index, &indir_index);
+ get_deref_offset(ctx->nir, instr->variables[0],
+ false, NULL, per_vertex ? &vertex_index : NULL,
+ &const_index, &indir_index);
param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
is_compact && const_index > 3) {
@@ -2877,8 +2973,8 @@
result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
- result = trim_vector(ctx, result, instr->num_components);
- result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
+ result = trim_vector(&ctx->ac, result, instr->num_components);
+ result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
return result;
}
@@ -2892,9 +2988,9 @@
unsigned param, vtx_offset_param;
LLVMValueRef value[4], result;
unsigned vertex_index;
- radv_get_deref_offset(ctx, instr->variables[0],
- false, &vertex_index, NULL,
- &const_index, &indir_index);
+ get_deref_offset(ctx->nir, instr->variables[0],
+ false, &vertex_index, NULL,
+ &const_index, &indir_index);
vtx_offset_param = vertex_index;
assert(vtx_offset_param < 6);
vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
@@ -2902,21 +2998,27 @@
param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
for (unsigned i = 0; i < instr->num_components; i++) {
-
- args[0] = ctx->esgs_ring;
- args[1] = vtx_offset;
- args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
- args[3] = ctx->i32zero;
- args[4] = ctx->i32one; /* OFFEN */
- args[5] = ctx->i32zero; /* IDXEN */
- args[6] = ctx->i32one; /* GLC */
- args[7] = ctx->i32zero; /* SLC */
- args[8] = ctx->i32zero; /* TFE */
-
- value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
- ctx->i32, args, 9,
- AC_FUNC_ATTR_READONLY |
- AC_FUNC_ATTR_LEGACY);
+ if (ctx->ac.chip_class >= GFX9) {
+ LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
+ value[i] = lds_load(ctx, dw_addr);
+ } else {
+ args[0] = ctx->esgs_ring;
+ args[1] = vtx_offset;
+ args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
+ args[3] = ctx->i32zero;
+ args[4] = ctx->i32one; /* OFFEN */
+ args[5] = ctx->i32zero; /* IDXEN */
+ args[6] = ctx->i32one; /* GLC */
+ args[7] = ctx->i32zero; /* SLC */
+ args[8] = ctx->i32zero; /* TFE */
+
+ value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
+ ctx->i32, args, 9,
+ AC_FUNC_ATTR_READONLY |
+ AC_FUNC_ATTR_LEGACY);
+ }
}
result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
@@ -2924,7 +3026,7 @@
}
static LLVMValueRef
-build_gep_for_deref(struct nir_to_llvm_context *ctx,
+build_gep_for_deref(struct ac_nir_context *ctx,
nir_deref_var *deref)
{
struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
@@ -2936,10 +3038,10 @@
switch (tail->deref_type) {
case nir_deref_type_array: {
nir_deref_array *array = nir_deref_as_array(tail);
- offset = LLVMConstInt(ctx->i32, array->base_offset, 0);
+ offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0);
if (array->deref_array_type ==
nir_deref_array_type_indirect) {
- offset = LLVMBuildAdd(ctx->builder, offset,
+ offset = LLVMBuildAdd(ctx->ac.builder, offset,
get_src(ctx,
array->indirect),
"");
@@ -2949,7 +3051,7 @@
case nir_deref_type_struct: {
nir_deref_struct *deref_struct =
nir_deref_as_struct(tail);
- offset = LLVMConstInt(ctx->i32,
+ offset = LLVMConstInt(ctx->ac.i32,
deref_struct->index, 0);
break;
}
@@ -2962,7 +3064,7 @@
return val;
}
-static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
+static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMValueRef values[8];
@@ -2971,9 +3073,10 @@
LLVMValueRef indir_index;
LLVMValueRef ret;
unsigned const_index;
+ unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
instr->variables[0]->var->data.mode == nir_var_shader_in;
- radv_get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
+ get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
&const_index, &indir_index);
if (instr->dest.ssa.bit_size == 64)
@@ -2982,11 +3085,11 @@
switch (instr->variables[0]->var->data.mode) {
case nir_var_shader_in:
if (ctx->stage == MESA_SHADER_TESS_CTRL)
- return load_tcs_input(ctx, instr);
+ return load_tcs_input(ctx->nctx, instr);
if (ctx->stage == MESA_SHADER_TESS_EVAL)
- return load_tes_input(ctx, instr);
+ return load_tes_input(ctx->nctx, instr);
if (ctx->stage == MESA_SHADER_GEOMETRY) {
- return load_gs_input(ctx, instr);
+ return load_gs_input(ctx->nctx, instr);
}
for (unsigned chan = 0; chan < ve; chan++) {
if (indir_index) {
@@ -2995,14 +3098,14 @@
ctx->stage == MESA_SHADER_VERTEX);
count -= chan / 4;
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->inputs + idx + chan, count,
- 4, false);
+ &ctx->ac, ctx->abi->inputs + idx + chan, count,
+ stride, false, true);
- values[chan] = LLVMBuildExtractElement(ctx->builder,
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
tmp_vec,
indir_index, "");
} else
- values[chan] = ctx->inputs[idx + chan + const_index * 4];
+ values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
}
break;
case nir_var_local:
@@ -3013,27 +3116,27 @@
count -= chan / 4;
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
&ctx->ac, ctx->locals + idx + chan, count,
- 4, true);
+ stride, true, true);
- values[chan] = LLVMBuildExtractElement(ctx->builder,
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
tmp_vec,
indir_index, "");
} else {
- values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
+ values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
}
}
break;
case nir_var_shared: {
LLVMValueRef address = build_gep_for_deref(ctx,
instr->variables[0]);
- LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
- return LLVMBuildBitCast(ctx->builder, val,
+ LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
+ return LLVMBuildBitCast(ctx->ac.builder, val,
get_def_type(ctx, &instr->dest.ssa),
"");
}
case nir_var_shader_out:
if (ctx->stage == MESA_SHADER_TESS_CTRL)
- return load_tcs_output(ctx, instr);
+ return load_tcs_output(ctx->nctx, instr);
for (unsigned chan = 0; chan < ve; chan++) {
if (indir_index) {
unsigned count = glsl_count_attribute_slots(
@@ -3041,14 +3144,14 @@
count -= chan / 4;
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
&ctx->ac, ctx->outputs + idx + chan, count,
- 4, true);
+ stride, true, true);
- values[chan] = LLVMBuildExtractElement(ctx->builder,
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
tmp_vec,
indir_index, "");
} else {
- values[chan] = LLVMBuildLoad(ctx->builder,
- ctx->outputs[idx + chan + const_index * 4],
+ values[chan] = LLVMBuildLoad(ctx->ac.builder,
+ ctx->outputs[idx + chan + const_index * stride],
"");
}
}
@@ -3057,27 +3160,27 @@
unreachable("unhandle variable mode");
}
ret = ac_build_gather_values(&ctx->ac, values, ve);
- return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
+ return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
}
static void
-visit_store_var(struct nir_to_llvm_context *ctx,
- nir_intrinsic_instr *instr)
+visit_store_var(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr)
{
LLVMValueRef temp_ptr, value;
int idx = instr->variables[0]->var->data.driver_location;
- LLVMValueRef src = to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+ LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
int writemask = instr->const_index[0];
LLVMValueRef indir_index;
unsigned const_index;
- radv_get_deref_offset(ctx, instr->variables[0], false,
- NULL, NULL, &const_index, &indir_index);
+ get_deref_offset(ctx, instr->variables[0], false,
+ NULL, NULL, &const_index, &indir_index);
if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
int old_writemask = writemask;
- src = LLVMBuildBitCast(ctx->builder, src,
- LLVMVectorType(ctx->f32, get_llvm_num_components(src) * 2),
+ src = LLVMBuildBitCast(ctx->ac.builder, src,
+ LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2),
"");
writemask = 0;
@@ -3091,7 +3194,7 @@
case nir_var_shader_out:
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- store_tcs_output(ctx, instr, src, writemask);
+ store_tcs_output(ctx->nctx, instr, src, writemask);
return;
}
@@ -3100,7 +3203,7 @@
if (!(writemask & (1 << chan)))
continue;
- value = llvm_extract_elem(ctx, src, chan);
+ value = llvm_extract_elem(&ctx->ac, src, chan);
if (instr->variables[0]->var->data.compact)
stride = 1;
@@ -3110,20 +3213,17 @@
count -= chan / 4;
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
&ctx->ac, ctx->outputs + idx + chan, count,
- stride, true);
+ stride, true, true);
- if (get_llvm_num_components(tmp_vec) > 1) {
- tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
- value, indir_index, "");
- } else
- tmp_vec = value;
- build_store_values_extended(ctx, ctx->outputs + idx + chan,
+ tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
+ value, indir_index, "");
+ build_store_values_extended(&ctx->ac, ctx->outputs + idx + chan,
count, stride, tmp_vec);
} else {
temp_ptr = ctx->outputs[idx + chan + const_index * stride];
- LLVMBuildStore(ctx->builder, value, temp_ptr);
+ LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
}
}
break;
@@ -3132,23 +3232,23 @@
if (!(writemask & (1 << chan)))
continue;
- value = llvm_extract_elem(ctx, src, chan);
+ value = llvm_extract_elem(&ctx->ac, src, chan);
if (indir_index) {
unsigned count = glsl_count_attribute_slots(
instr->variables[0]->var->type, false);
count -= chan / 4;
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
&ctx->ac, ctx->locals + idx + chan, count,
- 4, true);
+ 4, true, true);
- tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
+ tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
value, indir_index, "");
- build_store_values_extended(ctx, ctx->locals + idx + chan,
+ build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
count, 4, tmp_vec);
} else {
temp_ptr = ctx->locals[idx + chan + const_index * 4];
- LLVMBuildStore(ctx->builder, value, temp_ptr);
+ LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
}
}
break;
@@ -3162,22 +3262,22 @@
nir_deref_tail(&instr->variables[0]->deref)->type);
if (writemask == (1 << components) - 1) {
val = LLVMBuildBitCast(
- ctx->builder, val,
+ ctx->ac.builder, val,
LLVMGetElementType(LLVMTypeOf(address)), "");
- LLVMBuildStore(ctx->builder, val, address);
+ LLVMBuildStore(ctx->ac.builder, val, address);
} else {
for (unsigned chan = 0; chan < 4; chan++) {
if (!(writemask & (1 << chan)))
continue;
LLVMValueRef ptr =
- LLVMBuildStructGEP(ctx->builder,
+ LLVMBuildStructGEP(ctx->ac.builder,
address, chan, "");
- LLVMValueRef src = llvm_extract_elem(ctx, val,
+ LLVMValueRef src = llvm_extract_elem(&ctx->ac, val,
chan);
src = LLVMBuildBitCast(
- ctx->builder, src,
+ ctx->ac.builder, src,
LLVMGetElementType(LLVMTypeOf(ptr)), "");
- LLVMBuildStore(ctx->builder, src, ptr);
+ LLVMBuildStore(ctx->ac.builder, src, ptr);
}
}
break;
@@ -3228,7 +3328,7 @@
* The sample index should be adjusted as follows:
* sample_index = (fmask >> (sample_index * 4)) & 0xF;
*/
-static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context *ctx,
+static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
LLVMValueRef coord_x, LLVMValueRef coord_y,
LLVMValueRef coord_z,
LLVMValueRef sample_index,
@@ -3250,17 +3350,17 @@
args.da = coord_z ? true : false;
args.resource = fmask_desc_ptr;
args.dmask = 0xf;
- args.addr = ac_build_gather_values(&ctx->ac, fmask_load_address, coord_z ? 4 : 2);
+ args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
- res = ac_build_image_opcode(&ctx->ac, &args);
+ res = ac_build_image_opcode(ctx, &args);
- res = to_integer(&ctx->ac, res);
+ res = ac_to_integer(ctx, res);
LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
res,
- ctx->i32zero, "");
+ ctx->i32_0, "");
LLVMValueRef sample_index4 =
LLVMBuildMul(ctx->builder, sample_index, four, "");
@@ -3278,11 +3378,11 @@
LLVMValueRef fmask_word1 =
LLVMBuildExtractElement(ctx->builder, fmask_desc,
- ctx->i32one, "");
+ ctx->i32_1, "");
LLVMValueRef word1_is_nonzero =
LLVMBuildICmp(ctx->builder, LLVMIntNE,
- fmask_word1, ctx->i32zero, "");
+ fmask_word1, ctx->i32_0, "");
/* Replace the MSAA sample index. */
sample_index =
@@ -3291,7 +3391,7 @@
return sample_index;
}
-static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
+static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
const struct glsl_type *type = instr->variables[0]->var->type;
@@ -3301,11 +3401,11 @@
LLVMValueRef src0 = get_src(ctx, instr->src[0]);
LLVMValueRef coords[4];
LLVMValueRef masks[] = {
- LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
- LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
+ LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
+ LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
};
LLVMValueRef res;
- LLVMValueRef sample_index = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
+ LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0);
int count;
enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
@@ -3314,33 +3414,37 @@
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
- bool gfx9_1d = ctx->options->chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
+ bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
count = image_type_to_components_count(dim, is_array);
if (is_ms) {
LLVMValueRef fmask_load_address[3];
int chan;
- fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
- fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], "");
+ fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
+ fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
if (is_array)
- fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], "");
+ fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
else
fmask_load_address[2] = NULL;
if (add_frag_pos) {
for (chan = 0; chan < 2; ++chan)
- fmask_load_address[chan] = LLVMBuildAdd(ctx->builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
+ fmask_load_address[chan] =
+ LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan],
+ LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
+ ctx->ac.i32, ""), "");
+ fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
}
- sample_index = adjust_sample_index_using_fmask(ctx,
+ sample_index = adjust_sample_index_using_fmask(&ctx->ac,
fmask_load_address[0],
fmask_load_address[1],
fmask_load_address[2],
sample_index,
- get_sampler_desc(ctx, instr->variables[0], DESC_FMASK));
+ get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, true, false));
}
if (count == 1 && !gfx9_1d) {
if (instr->src[0].ssa->num_components)
- res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
+ res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
else
res = src0;
} else {
@@ -3348,11 +3452,14 @@
if (is_ms)
count--;
for (chan = 0; chan < count; ++chan) {
- coords[chan] = llvm_extract_elem(ctx, src0, chan);
+ coords[chan] = llvm_extract_elem(&ctx->ac, src0, chan);
}
if (add_frag_pos) {
- for (chan = 0; chan < count; ++chan)
- coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
+ for (chan = 0; chan < 2; ++chan)
+ coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
+ ctx->ac.i32, ""), "");
+ coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
+ count++;
}
if (gfx9_1d) {
@@ -3370,7 +3477,7 @@
}
if (count == 3) {
- coords[3] = LLVMGetUndef(ctx->i32);
+ coords[3] = LLVMGetUndef(ctx->ac.i32);
count = 4;
}
res = ac_build_gather_values(&ctx->ac, coords, count);
@@ -3378,7 +3485,7 @@
return res;
}
-static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
+static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef params[7];
@@ -3386,39 +3493,44 @@
char intrinsic_name[64];
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = var->type;
+ LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false);
+ LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false);
+
if(instr->variables[0]->deref.child)
type = instr->variables[0]->deref.child->type;
type = glsl_without_array(type);
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
- params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
- params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
- LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
- params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
- params[3] = ctx->i1false; /* glc */
- params[4] = ctx->i1false; /* slc */
- res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
+ params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, false);
+ params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
+ ctx->ac.i32_0, ""); /* vindex */
+ params[2] = ctx->ac.i32_0; /* voffset */
+ params[3] = i1false; /* glc */
+ params[4] = i1false; /* slc */
+ res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->ac.v4f32,
params, 5, 0);
- res = trim_vector(ctx, res, instr->dest.ssa.num_components);
- res = to_integer(&ctx->ac, res);
+ res = trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
+ res = ac_to_integer(&ctx->ac, res);
} else {
bool is_da = glsl_sampler_type_is_array(type) ||
- glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
- LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
- LLVMValueRef glc = ctx->i1false;
- LLVMValueRef slc = ctx->i1false;
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS ||
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS;
+ LLVMValueRef da = is_da ? i1true : i1false;
+ LLVMValueRef glc = i1false;
+ LLVMValueRef slc = i1false;
params[0] = get_image_coords(ctx, instr);
- params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
- params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
+ params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, false);
+ params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
if (HAVE_LLVM <= 0x0309) {
- params[3] = ctx->i1false; /* r128 */
+ params[3] = i1false; /* r128 */
params[4] = da;
params[5] = glc;
params[6] = slc;
} else {
- LLVMValueRef lwe = ctx->i1false;
+ LLVMValueRef lwe = i1false;
params[3] = glc;
params[4] = slc;
params[5] = lwe;
@@ -3426,58 +3538,58 @@
}
ac_get_image_intr_name("llvm.amdgcn.image.load",
- ctx->v4f32, /* vdata */
+ ctx->ac.v4f32, /* vdata */
LLVMTypeOf(params[0]), /* coords */
LLVMTypeOf(params[1]), /* rsrc */
intrinsic_name, sizeof(intrinsic_name));
- res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
+ res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32,
params, 7, AC_FUNC_ATTR_READONLY);
}
- return to_integer(&ctx->ac, res);
+ return ac_to_integer(&ctx->ac, res);
}
-static void visit_image_store(struct nir_to_llvm_context *ctx,
+static void visit_image_store(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMValueRef params[8];
char intrinsic_name[64];
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = glsl_without_array(var->type);
- LLVMValueRef glc = ctx->i1false;
- bool force_glc = ctx->options->chip_class == SI;
+ LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false);
+ LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false);
+ LLVMValueRef glc = i1false;
+ bool force_glc = ctx->ac.chip_class == SI;
if (force_glc)
- glc = ctx->i1true;
- if (ctx->stage == MESA_SHADER_FRAGMENT)
- ctx->shader_info->fs.writes_memory = true;
+ glc = i1true;
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
- params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
- params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
- params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
- LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
- params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
+ params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
+ params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true);
+ params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
+ ctx->ac.i32_0, ""); /* vindex */
+ params[3] = ctx->ac.i32_0; /* voffset */
params[4] = glc; /* glc */
- params[5] = ctx->i1false; /* slc */
- ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
+ params[5] = i1false; /* slc */
+ ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
params, 6, 0);
} else {
bool is_da = glsl_sampler_type_is_array(type) ||
glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
- LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
- LLVMValueRef slc = ctx->i1false;
+ LLVMValueRef da = is_da ? i1true : i1false;
+ LLVMValueRef slc = i1false;
- params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2]));
+ params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2]));
params[1] = get_image_coords(ctx, instr); /* coords */
- params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
- params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
+ params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, true);
+ params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
if (HAVE_LLVM <= 0x0309) {
- params[4] = ctx->i1false; /* r128 */
+ params[4] = i1false; /* r128 */
params[5] = da;
params[6] = glc;
params[7] = slc;
} else {
- LLVMValueRef lwe = ctx->i1false;
+ LLVMValueRef lwe = i1false;
params[4] = glc;
params[5] = slc;
params[6] = lwe;
@@ -3490,13 +3602,13 @@
LLVMTypeOf(params[2]), /* rsrc */
intrinsic_name, sizeof(intrinsic_name));
- ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
+ ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt,
params, 8, 0);
}
}
-static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
+static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef params[7];
@@ -3506,11 +3618,10 @@
const char *atomic_name;
char intrinsic_name[41];
const struct glsl_type *type = glsl_without_array(var->type);
+ LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false);
+ LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false);
MAYBE_UNUSED int length;
- if (ctx->stage == MESA_SHADER_FRAGMENT)
- ctx->shader_info->fs.writes_memory = true;
-
bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
switch (instr->intrinsic) {
@@ -3547,11 +3658,12 @@
params[param_count++] = get_src(ctx, instr->src[2]);
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
- params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
- params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
- LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
- params[param_count++] = ctx->i32zero; /* voffset */
- params[param_count++] = ctx->i1false; /* slc */
+ params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER,
+ true, true);
+ params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
+ ctx->ac.i32_0, ""); /* vindex */
+ params[param_count++] = ctx->ac.i32_0; /* voffset */
+ params[param_count++] = i1false; /* slc */
length = snprintf(intrinsic_name, sizeof(intrinsic_name),
"llvm.amdgcn.buffer.atomic.%s", atomic_name);
@@ -3562,10 +3674,11 @@
glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr);
- params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
- params[param_count++] = ctx->i1false; /* r128 */
- params[param_count++] = da ? ctx->i1true : ctx->i1false; /* da */
- params[param_count++] = ctx->i1false; /* slc */
+ params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE,
+ true, true);
+ params[param_count++] = i1false; /* r128 */
+ params[param_count++] = da ? i1true : i1false; /* da */
+ params[param_count++] = i1false; /* slc */
build_int_type_name(LLVMTypeOf(coords),
coords_type, sizeof(coords_type));
@@ -3575,10 +3688,10 @@
}
assert(length < sizeof(intrinsic_name));
- return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
+ return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0);
}
-static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
+static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef res;
@@ -3590,32 +3703,34 @@
type = instr->variables[0]->deref.child->type;
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
- return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
+ return get_buffer_size(ctx,
+ get_sampler_desc(ctx, instr->variables[0],
+ AC_DESC_BUFFER, true, false), true);
struct ac_image_args args = { 0 };
args.da = da;
args.dmask = 0xf;
- args.resource = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
+ args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, false);
args.opcode = ac_image_get_resinfo;
- args.addr = ctx->i32zero;
+ args.addr = ctx->ac.i32_0;
res = ac_build_image_opcode(&ctx->ac, &args);
- LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
glsl_sampler_type_is_array(type)) {
- LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
- LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
- z = LLVMBuildSDiv(ctx->builder, z, six, "");
- res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
+ LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
+ LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
}
- if (ctx->options->chip_class >= GFX9 &&
+ if (ctx->ac.chip_class >= GFX9 &&
glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
glsl_sampler_type_is_array(type)) {
- LLVMValueRef layers = LLVMBuildExtractElement(ctx->builder, res, two, "");
- res = LLVMBuildInsertElement(ctx->builder, res, layers,
+ LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
ctx->ac.i32_1, "");
}
@@ -3651,19 +3766,18 @@
ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
}
-static void emit_discard_if(struct nir_to_llvm_context *ctx,
+static void emit_discard_if(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef cond;
- ctx->shader_info->fs.can_discard = true;
- cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
get_src(ctx, instr->src[0]),
- ctx->i32zero, "");
+ ctx->ac.i32_0, "");
- cond = LLVMBuildSelect(ctx->builder, cond,
- LLVMConstReal(ctx->f32, -1.0f),
- ctx->f32zero, "");
+ cond = LLVMBuildSelect(ctx->ac.builder, cond,
+ LLVMConstReal(ctx->ac.f32, -1.0f),
+ ctx->ac.f32_0, "");
ac_build_kill(&ctx->ac, cond);
}
@@ -3682,11 +3796,11 @@
const nir_intrinsic_instr *instr)
{
LLVMValueRef ptr, result;
- LLVMValueRef src = get_src(ctx, instr->src[0]);
- ptr = build_gep_for_deref(ctx, instr->variables[0]);
+ LLVMValueRef src = get_src(ctx->nir, instr->src[0]);
+ ptr = build_gep_for_deref(ctx->nir, instr->variables[0]);
if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
- LLVMValueRef src1 = get_src(ctx, instr->src[1]);
+ LLVMValueRef src1 = get_src(ctx->nir, instr->src[1]);
result = LLVMBuildAtomicCmpXchg(ctx->builder,
ptr, src, src1,
LLVMAtomicOrderingSequentiallyConsistent,
@@ -3726,7 +3840,7 @@
return NULL;
}
- result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(&ctx->ac, src),
+ result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, ac_to_integer(&ctx->ac, src),
LLVMAtomicOrderingSequentiallyConsistent,
false);
}
@@ -3775,17 +3889,17 @@
const_array(ctx->v2f32, 64), "");
sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
- result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false);
+ result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
return result;
}
-static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
+static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
{
LLVMValueRef values[2];
- values[0] = emit_ffract(&ctx->ac, ctx->frag_pos[0]);
- values[1] = emit_ffract(&ctx->ac, ctx->frag_pos[1]);
+ values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]);
+ values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]);
return ac_build_gather_values(&ctx->ac, values, 2);
}
@@ -3796,8 +3910,9 @@
LLVMValueRef interp_param, attr_number;
unsigned location;
unsigned chan;
- LLVMValueRef src_c0, src_c1;
- LLVMValueRef src0;
+ LLVMValueRef src_c0 = NULL;
+ LLVMValueRef src_c1 = NULL;
+ LLVMValueRef src0 = NULL;
int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
switch (instr->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
@@ -3806,15 +3921,15 @@
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset:
location = INTERP_CENTER;
- src0 = get_src(ctx, instr->src[0]);
+ src0 = get_src(ctx->nir, instr->src[0]);
break;
default:
break;
}
if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
- src_c0 = to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
- src_c1 = to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
+ src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
+ src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
} else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
LLVMValueRef sample_position;
LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
@@ -3830,9 +3945,9 @@
interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
attr_number = LLVMConstInt(ctx->i32, input_index, false);
- if (location == INTERP_SAMPLE || location == INTERP_CENTER) {
+ if (location == INTERP_CENTER) {
LLVMValueRef ij_out[2];
- LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
+ LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param);
/*
* take the I then J parameters, and the DDX/Y for it, and
@@ -3923,7 +4038,7 @@
/* loop num outputs */
idx = 0;
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
- LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
+ LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
int length = 4;
int slot = idx;
int slot_inc = 1;
@@ -3985,86 +4100,95 @@
LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, instr->num_components);
return LLVMBuildBitCast(ctx->builder, result,
- get_def_type(ctx, &instr->dest.ssa), "");
+ get_def_type(ctx->nir, &instr->dest.ssa), "");
}
-static void visit_intrinsic(struct nir_to_llvm_context *ctx,
+static void visit_intrinsic(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMValueRef result = NULL;
switch (instr->intrinsic) {
case nir_intrinsic_load_work_group_id: {
- result = ctx->workgroup_ids;
+ result = ctx->nctx->workgroup_ids;
break;
}
case nir_intrinsic_load_base_vertex: {
- result = ctx->base_vertex;
+ result = ctx->abi->base_vertex;
break;
}
case nir_intrinsic_load_vertex_id_zero_base: {
- result = ctx->vertex_id;
+ result = ctx->abi->vertex_id;
break;
}
case nir_intrinsic_load_local_invocation_id: {
- result = ctx->local_invocation_ids;
+ result = ctx->nctx->local_invocation_ids;
break;
}
case nir_intrinsic_load_base_instance:
- result = ctx->start_instance;
+ result = ctx->abi->start_instance;
break;
case nir_intrinsic_load_draw_id:
- result = ctx->draw_index;
+ result = ctx->abi->draw_id;
+ break;
+ case nir_intrinsic_load_view_index:
+ result = ctx->nctx->view_index ? ctx->nctx->view_index : ctx->ac.i32_0;
break;
case nir_intrinsic_load_invocation_id:
if (ctx->stage == MESA_SHADER_TESS_CTRL)
- result = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5);
+ result = unpack_param(&ctx->ac, ctx->nctx->tcs_rel_ids, 8, 5);
else
- result = ctx->gs_invocation_id;
+ result = ctx->nctx->gs_invocation_id;
break;
case nir_intrinsic_load_primitive_id:
if (ctx->stage == MESA_SHADER_GEOMETRY) {
- ctx->shader_info->gs.uses_prim_id = true;
- result = ctx->gs_prim_id;
+ ctx->nctx->shader_info->gs.uses_prim_id = true;
+ result = ctx->nctx->gs_prim_id;
} else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- ctx->shader_info->tcs.uses_prim_id = true;
- result = ctx->tcs_patch_id;
+ ctx->nctx->shader_info->tcs.uses_prim_id = true;
+ result = ctx->nctx->tcs_patch_id;
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- ctx->shader_info->tcs.uses_prim_id = true;
- result = ctx->tes_patch_id;
+ ctx->nctx->shader_info->tcs.uses_prim_id = true;
+ result = ctx->nctx->tes_patch_id;
} else
fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
break;
case nir_intrinsic_load_sample_id:
- ctx->shader_info->fs.force_persample = true;
- result = unpack_param(ctx, ctx->ancillary, 8, 4);
+ result = unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
break;
case nir_intrinsic_load_sample_pos:
- ctx->shader_info->fs.force_persample = true;
result = load_sample_pos(ctx);
break;
case nir_intrinsic_load_sample_mask_in:
- result = ctx->sample_coverage;
+ result = ctx->abi->sample_coverage;
break;
+ case nir_intrinsic_load_frag_coord: {
+ LLVMValueRef values[4] = {
+ ctx->abi->frag_pos[0],
+ ctx->abi->frag_pos[1],
+ ctx->abi->frag_pos[2],
+ ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
+ };
+ result = ac_build_gather_values(&ctx->ac, values, 4);
+ break;
+ }
case nir_intrinsic_load_front_face:
- result = ctx->front_face;
+ result = ctx->abi->front_face;
break;
case nir_intrinsic_load_instance_id:
- result = ctx->instance_id;
- ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
- ctx->shader_info->vs.vgpr_comp_cnt);
+ result = ctx->abi->instance_id;
break;
case nir_intrinsic_load_num_work_groups:
- result = ctx->num_work_groups;
+ result = ctx->nctx->num_work_groups;
break;
case nir_intrinsic_load_local_invocation_index:
- result = visit_load_local_invocation_index(ctx);
+ result = visit_load_local_invocation_index(ctx->nctx);
break;
case nir_intrinsic_load_push_constant:
- result = visit_load_push_constant(ctx, instr);
+ result = visit_load_push_constant(ctx->nctx, instr);
break;
case nir_intrinsic_vulkan_resource_index:
- result = visit_vulkan_resource_index(ctx, instr);
+ result = visit_vulkan_resource_index(ctx->nctx, instr);
break;
case nir_intrinsic_store_ssbo:
visit_store_ssbo(ctx, instr);
@@ -4116,19 +4240,18 @@
result = visit_image_size(ctx, instr);
break;
case nir_intrinsic_discard:
- ctx->shader_info->fs.can_discard = true;
ac_build_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
- ctx->voidt,
+ LLVMVoidTypeInContext(ctx->ac.context),
NULL, 0, AC_FUNC_ATTR_LEGACY);
break;
case nir_intrinsic_discard_if:
emit_discard_if(ctx, instr);
break;
case nir_intrinsic_memory_barrier:
- emit_waitcnt(ctx, VM_CNT);
+ emit_waitcnt(ctx->nctx, VM_CNT);
break;
case nir_intrinsic_barrier:
- emit_barrier(ctx);
+ emit_barrier(ctx->nctx);
break;
case nir_intrinsic_var_atomic_add:
case nir_intrinsic_var_atomic_imin:
@@ -4140,24 +4263,24 @@
case nir_intrinsic_var_atomic_xor:
case nir_intrinsic_var_atomic_exchange:
case nir_intrinsic_var_atomic_comp_swap:
- result = visit_var_atomic(ctx, instr);
+ result = visit_var_atomic(ctx->nctx, instr);
break;
case nir_intrinsic_interp_var_at_centroid:
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset:
- result = visit_interp(ctx, instr);
+ result = visit_interp(ctx->nctx, instr);
break;
case nir_intrinsic_emit_vertex:
- visit_emit_vertex(ctx, instr);
+ visit_emit_vertex(ctx->nctx, instr);
break;
case nir_intrinsic_end_primitive:
- visit_end_primitive(ctx, instr);
+ visit_end_primitive(ctx->nctx, instr);
break;
case nir_intrinsic_load_tess_coord:
- result = visit_load_tess_coord(ctx, instr);
+ result = visit_load_tess_coord(ctx->nctx, instr);
break;
case nir_intrinsic_load_patch_vertices_in:
- result = LLVMConstInt(ctx->i32, ctx->options->key.tcs.input_vertices, false);
+ result = LLVMConstInt(ctx->ac.i32, ctx->nctx->options->key.tcs.input_vertices, false);
break;
default:
fprintf(stderr, "Unknown intrinsic: ");
@@ -4170,42 +4293,58 @@
}
}
-static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
- const nir_deref_var *deref,
- enum desc_type desc_type)
+static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi,
+ LLVMValueRef buffer, bool write)
{
- unsigned desc_set = deref->var->data.descriptor_set;
- LLVMValueRef list = ctx->descriptor_sets[desc_set];
- struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
- struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
+ struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
+
+ if (write && ctx->stage == MESA_SHADER_FRAGMENT)
+ ctx->shader_info->fs.writes_memory = true;
+
+ return buffer;
+}
+
+static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
+ unsigned descriptor_set,
+ unsigned base_index,
+ unsigned constant_index,
+ LLVMValueRef index,
+ enum ac_descriptor_type desc_type,
+ bool image, bool write)
+{
+ struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
+ LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
+ struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout;
+ struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
unsigned offset = binding->offset;
unsigned stride = binding->size;
unsigned type_size;
LLVMBuilderRef builder = ctx->builder;
LLVMTypeRef type;
- LLVMValueRef index = NULL;
- unsigned constant_index = 0;
- assert(deref->var->data.binding < layout->binding_count);
+ assert(base_index < layout->binding_count);
+
+ if (write && ctx->stage == MESA_SHADER_FRAGMENT)
+ ctx->shader_info->fs.writes_memory = true;
switch (desc_type) {
- case DESC_IMAGE:
+ case AC_DESC_IMAGE:
type = ctx->v8i32;
type_size = 32;
break;
- case DESC_FMASK:
+ case AC_DESC_FMASK:
type = ctx->v8i32;
offset += 32;
type_size = 32;
break;
- case DESC_SAMPLER:
+ case AC_DESC_SAMPLER:
type = ctx->v4i32;
if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
offset += 64;
type_size = 16;
break;
- case DESC_BUFFER:
+ case AC_DESC_BUFFER:
type = ctx->v4i32;
type_size = 16;
break;
@@ -4213,19 +4352,9 @@
unreachable("invalid desc_type\n");
}
- if (deref->deref.child) {
- const nir_deref_array *child =
- (const nir_deref_array *)deref->deref.child;
+ offset += constant_index * stride;
- assert(child->deref_array_type != nir_deref_array_type_wildcard);
- offset += child->base_offset * stride;
- if (child->deref_array_type == nir_deref_array_type_indirect) {
- index = get_src(ctx, child->indirect);
- }
-
- constant_index = child->base_offset;
- }
- if (desc_type == DESC_SAMPLER && binding->immutable_samplers_offset &&
+ if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
(!index || binding->immutable_samplers_equal)) {
if (binding->immutable_samplers_equal)
constant_index = 0;
@@ -4251,10 +4380,52 @@
list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->i32, offset, 0));
list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
- return ac_build_indexed_load_const(&ctx->ac, list, index);
+ return ac_build_load_to_sgpr(&ctx->ac, list, index);
+}
+
+static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
+ const nir_deref_var *deref,
+ enum ac_descriptor_type desc_type,
+ bool image, bool write)
+{
+ LLVMValueRef index = NULL;
+ unsigned constant_index = 0;
+ const nir_deref *tail = &deref->deref;
+
+ while (tail->child) {
+ const nir_deref_array *child = nir_deref_as_array(tail->child);
+ unsigned array_size = glsl_get_aoa_size(tail->child->type);
+
+ if (!array_size)
+ array_size = 1;
+
+ assert(child->deref_array_type != nir_deref_array_type_wildcard);
+
+ if (child->deref_array_type == nir_deref_array_type_indirect) {
+ LLVMValueRef indirect = get_src(ctx, child->indirect);
+
+ indirect = LLVMBuildMul(ctx->ac.builder, indirect,
+ LLVMConstInt(ctx->ac.i32, array_size, false), "");
+
+ if (!index)
+ index = indirect;
+ else
+ index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
+ }
+
+ constant_index += child->base_offset * array_size;
+
+ tail = &child->deref;
+ }
+
+ return ctx->abi->load_sampler_desc(ctx->abi,
+ deref->var->data.descriptor_set,
+ deref->var->data.binding,
+ constant_index, index,
+ desc_type, image, write);
}
-static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
+static void set_tex_fetch_args(struct ac_llvm_context *ctx,
struct ac_image_args *args,
const nir_tex_instr *instr,
nir_texop op,
@@ -4272,7 +4443,7 @@
param[count++] = LLVMGetUndef(ctx->i32);
if (count > 1)
- args->addr = ac_build_gather_values(&ctx->ac, param, count);
+ args->addr = ac_build_gather_values(ctx, param, count);
else
args->addr = param[0];
@@ -4300,56 +4471,56 @@
* VI:
* The ANISO_OVERRIDE sampler field enables this fix in TA.
*/
-static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
+static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
LLVMValueRef res, LLVMValueRef samp)
{
- LLVMBuilderRef builder = ctx->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef img7, samp0;
- if (ctx->options->chip_class >= VI)
+ if (ctx->ac.chip_class >= VI)
return samp;
img7 = LLVMBuildExtractElement(builder, res,
- LLVMConstInt(ctx->i32, 7, 0), "");
+ LLVMConstInt(ctx->ac.i32, 7, 0), "");
samp0 = LLVMBuildExtractElement(builder, samp,
- LLVMConstInt(ctx->i32, 0, 0), "");
+ LLVMConstInt(ctx->ac.i32, 0, 0), "");
samp0 = LLVMBuildAnd(builder, samp0, img7, "");
return LLVMBuildInsertElement(builder, samp, samp0,
- LLVMConstInt(ctx->i32, 0, 0), "");
+ LLVMConstInt(ctx->ac.i32, 0, 0), "");
}
-static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
+static void tex_fetch_ptrs(struct ac_nir_context *ctx,
nir_tex_instr *instr,
LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
LLVMValueRef *fmask_ptr)
{
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
- *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
+ *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, false, false);
else
- *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
+ *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, false, false);
if (samp_ptr) {
if (instr->sampler)
- *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
+ *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, false, false);
else
- *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
+ *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, false, false);
if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
}
if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
instr->op == nir_texop_samples_identical))
- *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
+ *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, false, false);
}
-static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
+static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
LLVMValueRef coord)
{
- coord = to_float(&ctx->ac, coord);
- coord = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
- coord = to_integer(&ctx->ac, coord);
+ coord = ac_to_float(ctx, coord);
+ coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
+ coord = ac_to_integer(ctx, coord);
return coord;
}
-static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
+static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
{
LLVMValueRef result = NULL;
struct ac_image_args args = { 0 };
@@ -4364,6 +4535,7 @@
unsigned chan, count = 0;
unsigned const_src = 0, num_deriv_comp = 0;
bool lod_is_zero = false;
+
tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
for (unsigned i = 0; i < instr->num_srcs; i++) {
@@ -4418,48 +4590,48 @@
if (instr->op == nir_texop_texture_samples) {
LLVMValueRef res, samples, is_msaa;
- res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
- samples = LLVMBuildExtractElement(ctx->builder, res,
- LLVMConstInt(ctx->i32, 3, false), "");
- is_msaa = LLVMBuildLShr(ctx->builder, samples,
- LLVMConstInt(ctx->i32, 28, false), "");
- is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
- LLVMConstInt(ctx->i32, 0xe, false), "");
- is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
- LLVMConstInt(ctx->i32, 0xe, false), "");
-
- samples = LLVMBuildLShr(ctx->builder, samples,
- LLVMConstInt(ctx->i32, 16, false), "");
- samples = LLVMBuildAnd(ctx->builder, samples,
- LLVMConstInt(ctx->i32, 0xf, false), "");
- samples = LLVMBuildShl(ctx->builder, ctx->i32one,
+ res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, "");
+ samples = LLVMBuildExtractElement(ctx->ac.builder, res,
+ LLVMConstInt(ctx->ac.i32, 3, false), "");
+ is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
+ LLVMConstInt(ctx->ac.i32, 28, false), "");
+ is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
+ LLVMConstInt(ctx->ac.i32, 0xe, false), "");
+ is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
+ LLVMConstInt(ctx->ac.i32, 0xe, false), "");
+
+ samples = LLVMBuildLShr(ctx->ac.builder, samples,
+ LLVMConstInt(ctx->ac.i32, 16, false), "");
+ samples = LLVMBuildAnd(ctx->ac.builder, samples,
+ LLVMConstInt(ctx->ac.i32, 0xf, false), "");
+ samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
samples, "");
- samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
- ctx->i32one, "");
+ samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
+ ctx->ac.i32_1, "");
result = samples;
goto write_result;
}
if (coord)
for (chan = 0; chan < instr->coord_components; chan++)
- coords[chan] = llvm_extract_elem(ctx, coord, chan);
+ coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan);
if (offsets && instr->op != nir_texop_txf) {
LLVMValueRef offset[3], pack;
for (chan = 0; chan < 3; ++chan)
- offset[chan] = ctx->i32zero;
+ offset[chan] = ctx->ac.i32_0;
args.offset = true;
for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
- offset[chan] = llvm_extract_elem(ctx, offsets, chan);
- offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
- LLVMConstInt(ctx->i32, 0x3f, false), "");
+ offset[chan] = llvm_extract_elem(&ctx->ac, offsets, chan);
+ offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
+ LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
if (chan)
- offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
- LLVMConstInt(ctx->i32, chan * 8, false), "");
+ offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
+ LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
}
- pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
- pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
+ pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
+ pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
address[count++] = pack;
}
@@ -4470,7 +4642,20 @@
/* Pack depth comparison value */
if (instr->is_shadow && comparator) {
- address[count++] = llvm_extract_elem(ctx, comparator, 0);
+ LLVMValueRef z = ac_to_float(&ctx->ac,
+ llvm_extract_elem(&ctx->ac, comparator, 0));
+
+ /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here.
+ *
+ * It's unnecessary if the original texture format was
+ * Z32_FLOAT, but we don't know that here.
+ */
+ if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
+ z = ac_build_clamp(&ctx->ac, z);
+
+ address[count++] = z;
}
/* pack derivatives */
@@ -4491,7 +4676,7 @@
break;
case GLSL_SAMPLER_DIM_1D:
num_src_deriv_channels = 1;
- if (ctx->options->chip_class >= GFX9) {
+ if (ctx->ac.chip_class >= GFX9) {
num_dest_deriv_channels = 2;
num_deriv_comp = 2;
} else {
@@ -4502,8 +4687,8 @@
}
for (unsigned i = 0; i < num_src_deriv_channels; i++) {
- derivs[i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddx, i));
- derivs[num_dest_deriv_channels + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
+ derivs[i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx, i));
+ derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddy, i));
}
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
derivs[i] = ctx->ac.f32_0;
@@ -4513,9 +4698,9 @@
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
for (chan = 0; chan < instr->coord_components; chan++)
- coords[chan] = to_float(&ctx->ac, coords[chan]);
+ coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
if (instr->coord_components == 3)
- coords[3] = LLVMGetUndef(ctx->f32);
+ coords[3] = LLVMGetUndef(ctx->ac.f32);
ac_prepare_cube_coords(&ctx->ac,
instr->op == nir_texop_txd, instr->is_array,
instr->op == nir_texop_lod, coords, derivs);
@@ -4533,7 +4718,7 @@
address[count++] = coords[0];
if (instr->coord_components > 1) {
if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
- coords[1] = apply_round_slice(ctx, coords[1]);
+ coords[1] = apply_round_slice(&ctx->ac, coords[1]);
}
address[count++] = coords[1];
}
@@ -4542,17 +4727,17 @@
if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D &&
instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE &&
instr->op != nir_texop_txf) {
- coords[2] = apply_round_slice(ctx, coords[2]);
+ coords[2] = apply_round_slice(&ctx->ac, coords[2]);
}
address[count++] = coords[2];
}
- if (ctx->options->chip_class >= GFX9) {
+ if (ctx->ac.chip_class >= GFX9) {
LLVMValueRef filler;
if (instr->op == nir_texop_txf)
filler = ctx->ac.i32_0;
else
- filler = LLVMConstReal(ctx->f32, 0.5);
+ filler = LLVMConstReal(ctx->ac.f32, 0.5);
if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
/* No nir_texop_lod, because it does not take a slice
@@ -4578,12 +4763,12 @@
if (lod)
address[count++] = lod;
else
- address[count++] = ctx->i32zero;
+ address[count++] = ctx->ac.i32_0;
}
for (chan = 0; chan < count; chan++) {
- address[chan] = LLVMBuildBitCast(ctx->builder,
- address[chan], ctx->i32, "");
+ address[chan] = LLVMBuildBitCast(ctx->ac.builder,
+ address[chan], ctx->ac.i32, "");
}
if (instr->op == nir_texop_samples_identical) {
@@ -4593,24 +4778,24 @@
memcpy(txf_address, address, sizeof(txf_address));
if (!instr->is_array)
- txf_address[2] = ctx->i32zero;
- txf_address[3] = ctx->i32zero;
+ txf_address[2] = ctx->ac.i32_0;
+ txf_address[3] = ctx->ac.i32_0;
- set_tex_fetch_args(ctx, &txf_args, instr, nir_texop_txf,
+ set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf,
fmask_ptr, NULL,
txf_address, txf_count, 0xf);
result = build_tex_intrinsic(ctx, instr, false, &txf_args);
- result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
- result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->i32zero);
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
goto write_result;
}
if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
instr->op != nir_texop_txs) {
unsigned sample_chan = instr->is_array ? 3 : 2;
- address[sample_chan] = adjust_sample_index_using_fmask(ctx,
+ address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac,
address[0],
address[1],
instr->is_array ? address[2] : NULL,
@@ -4625,13 +4810,13 @@
assert(const_offset);
num_offsets = MIN2(num_offsets, instr->coord_components);
if (num_offsets > 2)
- address[2] = LLVMBuildAdd(ctx->builder,
- address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
+ address[2] = LLVMBuildAdd(ctx->ac.builder,
+ address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), "");
if (num_offsets > 1)
- address[1] = LLVMBuildAdd(ctx->builder,
- address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
- address[0] = LLVMBuildAdd(ctx->builder,
- address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
+ address[1] = LLVMBuildAdd(ctx->ac.builder,
+ address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), "");
+ address[0] = LLVMBuildAdd(ctx->ac.builder,
+ address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), "");
}
@@ -4642,53 +4827,55 @@
else
dmask = 1 << instr->component;
}
- set_tex_fetch_args(ctx, &args, instr, instr->op,
+ set_tex_fetch_args(&ctx->ac, &args, instr, instr->op,
res_ptr, samp_ptr, address, count, dmask);
result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
if (instr->op == nir_texop_query_levels)
- result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
- else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
- result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
+ else if (instr->is_shadow && instr->is_new_style_shadow &&
+ instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
+ instr->op != nir_texop_tg4)
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
else if (instr->op == nir_texop_txs &&
instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
instr->is_array) {
- LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
- LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
- LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
- z = LLVMBuildSDiv(ctx->builder, z, six, "");
- result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
- } else if (ctx->options->chip_class >= GFX9 &&
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
+ LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
+ z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
+ result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
+ } else if (ctx->ac.chip_class >= GFX9 &&
instr->op == nir_texop_txs &&
instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
instr->is_array) {
- LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
- LLVMValueRef layers = LLVMBuildExtractElement(ctx->builder, result, two, "");
- result = LLVMBuildInsertElement(ctx->builder, result, layers,
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
+ result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
ctx->ac.i32_1, "");
} else if (instr->dest.ssa.num_components != 4)
- result = trim_vector(ctx, result, instr->dest.ssa.num_components);
+ result = trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
write_result:
if (result) {
assert(instr->dest.is_ssa);
- result = to_integer(&ctx->ac, result);
+ result = ac_to_integer(&ctx->ac, result);
_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
}
}
-static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
+static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
{
LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
- LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
+ LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
_mesa_hash_table_insert(ctx->phis, instr, result);
}
-static void visit_post_phi(struct nir_to_llvm_context *ctx,
+static void visit_post_phi(struct ac_nir_context *ctx,
nir_phi_instr *instr,
LLVMValueRef llvm_phi)
{
@@ -4700,7 +4887,7 @@
}
}
-static void phi_post_pass(struct nir_to_llvm_context *ctx)
+static void phi_post_pass(struct ac_nir_context *ctx)
{
struct hash_entry *entry;
hash_table_foreach(ctx->phis, entry) {
@@ -4710,31 +4897,31 @@
}
-static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
+static void visit_ssa_undef(struct ac_nir_context *ctx,
const nir_ssa_undef_instr *instr)
{
unsigned num_components = instr->def.num_components;
LLVMValueRef undef;
if (num_components == 1)
- undef = LLVMGetUndef(ctx->i32);
+ undef = LLVMGetUndef(ctx->ac.i32);
else {
- undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
+ undef = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, num_components));
}
_mesa_hash_table_insert(ctx->defs, &instr->def, undef);
}
-static void visit_jump(struct nir_to_llvm_context *ctx,
+static void visit_jump(struct ac_nir_context *ctx,
const nir_jump_instr *instr)
{
switch (instr->type) {
case nir_jump_break:
- LLVMBuildBr(ctx->builder, ctx->break_block);
- LLVMClearInsertionPosition(ctx->builder);
+ LLVMBuildBr(ctx->ac.builder, ctx->break_block);
+ LLVMClearInsertionPosition(ctx->ac.builder);
break;
case nir_jump_continue:
- LLVMBuildBr(ctx->builder, ctx->continue_block);
- LLVMClearInsertionPosition(ctx->builder);
+ LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
+ LLVMClearInsertionPosition(ctx->ac.builder);
break;
default:
fprintf(stderr, "Unknown NIR jump instr: ");
@@ -4744,12 +4931,12 @@
}
}
-static void visit_cf_list(struct nir_to_llvm_context *ctx,
+static void visit_cf_list(struct ac_nir_context *ctx,
struct exec_list *list);
-static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
+static void visit_block(struct ac_nir_context *ctx, nir_block *block)
{
- LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
+ LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder);
nir_foreach_instr(instr, block)
{
switch (instr->type) {
@@ -4785,61 +4972,63 @@
_mesa_hash_table_insert(ctx->defs, block, llvm_block);
}
-static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
+static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
{
LLVMValueRef value = get_src(ctx, if_stmt->condition);
+ LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
LLVMBasicBlockRef merge_block =
- LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
+ LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
LLVMBasicBlockRef if_block =
- LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
+ LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
LLVMBasicBlockRef else_block = merge_block;
if (!exec_list_is_empty(&if_stmt->else_list))
else_block = LLVMAppendBasicBlockInContext(
- ctx->context, ctx->main_function, "");
+ ctx->ac.context, fn, "");
- LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
- LLVMConstInt(ctx->i32, 0, false), "");
- LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
+ LLVMConstInt(ctx->ac.i32, 0, false), "");
+ LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
- LLVMPositionBuilderAtEnd(ctx->builder, if_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
visit_cf_list(ctx, &if_stmt->then_list);
- if (LLVMGetInsertBlock(ctx->builder))
- LLVMBuildBr(ctx->builder, merge_block);
+ if (LLVMGetInsertBlock(ctx->ac.builder))
+ LLVMBuildBr(ctx->ac.builder, merge_block);
if (!exec_list_is_empty(&if_stmt->else_list)) {
- LLVMPositionBuilderAtEnd(ctx->builder, else_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block);
visit_cf_list(ctx, &if_stmt->else_list);
- if (LLVMGetInsertBlock(ctx->builder))
- LLVMBuildBr(ctx->builder, merge_block);
+ if (LLVMGetInsertBlock(ctx->ac.builder))
+ LLVMBuildBr(ctx->ac.builder, merge_block);
}
- LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
}
-static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
+static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
{
+ LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
LLVMBasicBlockRef continue_parent = ctx->continue_block;
LLVMBasicBlockRef break_parent = ctx->break_block;
ctx->continue_block =
- LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
+ LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
ctx->break_block =
- LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
+ LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
- LLVMBuildBr(ctx->builder, ctx->continue_block);
- LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
+ LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block);
visit_cf_list(ctx, &loop->body);
- if (LLVMGetInsertBlock(ctx->builder))
- LLVMBuildBr(ctx->builder, ctx->continue_block);
- LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
+ if (LLVMGetInsertBlock(ctx->ac.builder))
+ LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block);
ctx->continue_block = continue_parent;
ctx->break_block = break_parent;
}
-static void visit_cf_list(struct nir_to_llvm_context *ctx,
+static void visit_cf_list(struct ac_nir_context *ctx,
struct exec_list *list)
{
foreach_list_typed(nir_cf_node, node, node, list)
@@ -4879,18 +5068,18 @@
variable->data.driver_location = idx * 4;
if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
- buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
- ctx->start_instance, "");
+ buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
+ ctx->abi.start_instance, "");
ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
ctx->shader_info->vs.vgpr_comp_cnt);
} else
- buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
- ctx->base_vertex, "");
+ buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
+ ctx->abi.base_vertex, "");
for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
t_offset = LLVMConstInt(ctx->i32, index + i, false);
- t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
+ t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
input = ac_build_buffer_load_format(&ctx->ac, t_list,
buffer_index,
@@ -4900,7 +5089,7 @@
for (unsigned chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
- to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
+ ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
input, llvm_chan, ""));
}
}
@@ -4972,7 +5161,7 @@
unsigned interp_type;
if (variable->data.sample) {
interp_type = INTERP_SAMPLE;
- ctx->shader_info->fs.force_persample = true;
+ ctx->shader_info->info.ps.force_persample = true;
} else if (variable->data.centroid)
interp_type = INTERP_CENTROID;
else
@@ -4988,27 +5177,54 @@
}
static void
-handle_shader_input_decl(struct nir_to_llvm_context *ctx,
- struct nir_variable *variable)
-{
- switch (ctx->stage) {
- case MESA_SHADER_VERTEX:
+handle_vs_inputs(struct nir_to_llvm_context *ctx,
+ struct nir_shader *nir) {
+ nir_foreach_variable(variable, &nir->inputs)
handle_vs_input_decl(ctx, variable);
- break;
- case MESA_SHADER_FRAGMENT:
- handle_fs_input_decl(ctx, variable);
- break;
- default:
- break;
+}
+
+static void
+prepare_interp_optimize(struct nir_to_llvm_context *ctx,
+ struct nir_shader *nir)
+{
+ if (!ctx->options->key.fs.multisample)
+ return;
+
+ bool uses_center = false;
+ bool uses_centroid = false;
+ nir_foreach_variable(variable, &nir->inputs) {
+ if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
+ variable->data.sample)
+ continue;
+
+ if (variable->data.centroid)
+ uses_centroid = true;
+ else
+ uses_center = true;
}
+ if (uses_center && uses_centroid) {
+ LLVMValueRef sel = LLVMBuildICmp(ctx->builder, LLVMIntSLT, ctx->prim_mask, ctx->ac.i32_0, "");
+ ctx->persp_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->persp_center, ctx->persp_centroid, "");
+ ctx->linear_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->linear_center, ctx->linear_centroid, "");
+ }
}
static void
-handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
- struct nir_shader *nir)
+handle_fs_inputs(struct nir_to_llvm_context *ctx,
+ struct nir_shader *nir)
{
+ prepare_interp_optimize(ctx, nir);
+
+ nir_foreach_variable(variable, &nir->inputs)
+ handle_fs_input_decl(ctx, variable);
+
unsigned index = 0;
+
+ if (ctx->shader_info->info.ps.uses_input_attachments ||
+ ctx->shader_info->info.needs_multiview_view_index)
+ ctx->input_mask |= 1ull << VARYING_SLOT_LAYER;
+
for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
LLVMValueRef interp_param;
LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
@@ -5027,9 +5243,10 @@
++index;
} else if (i == VARYING_SLOT_POS) {
for(int i = 0; i < 3; ++i)
- inputs[i] = ctx->frag_pos[i];
+ inputs[i] = ctx->abi.frag_pos[i];
- inputs[3] = ac_build_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]);
+ inputs[3] = ac_build_fdiv(&ctx->ac, ctx->f32one,
+ ctx->abi.frag_pos[3]);
}
}
ctx->shader_info->fs.num_interp = index;
@@ -5040,19 +5257,22 @@
if (ctx->input_mask & (1 << VARYING_SLOT_LAYER))
ctx->shader_info->fs.layer_input = true;
ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
+
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ ctx->view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
}
static LLVMValueRef
-ac_build_alloca(struct nir_to_llvm_context *ctx,
+ac_build_alloca(struct ac_llvm_context *ac,
LLVMTypeRef type,
const char *name)
{
- LLVMBuilderRef builder = ctx->builder;
+ LLVMBuilderRef builder = ac->builder;
LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
- LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
+ LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
LLVMValueRef res;
if (first_instr) {
@@ -5069,41 +5289,45 @@
return res;
}
-static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
+static LLVMValueRef si_build_alloca_undef(struct ac_llvm_context *ac,
LLVMTypeRef type,
const char *name)
{
- LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
- LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
+ LLVMValueRef ptr = ac_build_alloca(ac, type, name);
+ LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr);
return ptr;
}
static void
-handle_shader_output_decl(struct nir_to_llvm_context *ctx,
- struct nir_variable *variable)
+scan_shader_output_decl(struct nir_to_llvm_context *ctx,
+ struct nir_variable *variable,
+ struct nir_shader *shader,
+ gl_shader_stage stage)
{
int idx = variable->data.location + variable->data.index;
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
uint64_t mask_attribs;
+
variable->data.driver_location = idx * 4;
/* tess ctrl has it's own load/store paths for outputs */
- if (ctx->stage == MESA_SHADER_TESS_CTRL)
+ if (stage == MESA_SHADER_TESS_CTRL)
return;
mask_attribs = ((1ull << attrib_count) - 1) << idx;
- if (ctx->stage == MESA_SHADER_VERTEX ||
- ctx->stage == MESA_SHADER_TESS_EVAL ||
- ctx->stage == MESA_SHADER_GEOMETRY) {
+ if (stage == MESA_SHADER_VERTEX ||
+ stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY) {
if (idx == VARYING_SLOT_CLIP_DIST0) {
- int length = ctx->num_output_clips + ctx->num_output_culls;
- if (ctx->stage == MESA_SHADER_VERTEX) {
- ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
- ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
+ int length = shader->info.clip_distance_array_size +
+ shader->info.cull_distance_array_size;
+ if (stage == MESA_SHADER_VERTEX) {
+ ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
+ ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
}
- if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
- ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
+ if (stage == MESA_SHADER_TESS_EVAL) {
+ ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
+ ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
}
if (length > 4)
@@ -5114,13 +5338,42 @@
}
}
+ ctx->output_mask |= mask_attribs;
+}
+
+static void
+handle_shader_output_decl(struct ac_nir_context *ctx,
+ struct nir_shader *nir,
+ struct nir_variable *variable)
+{
+ unsigned output_loc = variable->data.driver_location / 4;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+
+ /* tess ctrl has it's own load/store paths for outputs */
+ if (ctx->stage == MESA_SHADER_TESS_CTRL)
+ return;
+
+ if (ctx->stage == MESA_SHADER_VERTEX ||
+ ctx->stage == MESA_SHADER_TESS_EVAL ||
+ ctx->stage == MESA_SHADER_GEOMETRY) {
+ int idx = variable->data.location + variable->data.index;
+ if (idx == VARYING_SLOT_CLIP_DIST0) {
+ int length = nir->info.clip_distance_array_size +
+ nir->info.cull_distance_array_size;
+
+ if (length > 4)
+ attrib_count = 2;
+ else
+ attrib_count = 1;
+ }
+ }
+
for (unsigned i = 0; i < attrib_count; ++i) {
for (unsigned chan = 0; chan < 4; chan++) {
- ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
- si_build_alloca_undef(ctx, ctx->f32, "");
+ ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] =
+ si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
}
}
- ctx->output_mask |= mask_attribs;
}
static LLVMTypeRef
@@ -5186,7 +5439,7 @@
}
static void
-setup_locals(struct nir_to_llvm_context *ctx,
+setup_locals(struct ac_nir_context *ctx,
struct nir_function *func)
{
int i, j;
@@ -5194,6 +5447,7 @@
nir_foreach_variable(variable, &func->impl->locals) {
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
variable->data.driver_location = ctx->num_locals * 4;
+ variable->data.location_frac = 0;
ctx->num_locals += attrib_count;
}
ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
@@ -5203,19 +5457,19 @@
for (i = 0; i < ctx->num_locals; i++) {
for (j = 0; j < 4; j++) {
ctx->locals[i * 4 + j] =
- si_build_alloca_undef(ctx, ctx->f32, "temp");
+ si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
}
}
}
static void
-setup_shared(struct nir_to_llvm_context *ctx,
+setup_shared(struct ac_nir_context *ctx,
struct nir_shader *nir)
{
nir_foreach_variable(variable, &nir->shared) {
LLVMValueRef shared =
LLVMAddGlobalInAddressSpace(
- ctx->module, glsl_to_llvm_type(ctx, variable->type),
+ ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type),
variable->name ? variable->name : "",
LOCAL_ADDR_SPACE);
_mesa_hash_table_insert(ctx->vars, variable, shared);
@@ -5225,9 +5479,9 @@
static LLVMValueRef
emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
{
- v = to_float(ctx, v);
- v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, lo));
- return emit_intrin_2f_param(ctx, "llvm.minnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, hi));
+ v = ac_to_float(ctx, v);
+ v = emit_intrin_2f_param(ctx, "llvm.maxnum", ctx->f32, v, LLVMConstReal(ctx->f32, lo));
+ return emit_intrin_2f_param(ctx, "llvm.minnum", ctx->f32, v, LLVMConstReal(ctx->f32, hi));
}
@@ -5359,7 +5613,7 @@
LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
for (unsigned chan = 0; chan < 4; chan++) {
- val[chan] = to_integer(&ctx->ac, values[chan]);
+ val[chan] = ac_to_integer(&ctx->ac, values[chan]);
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb);
}
@@ -5379,7 +5633,7 @@
/* Clamp. */
for (unsigned chan = 0; chan < 4; chan++) {
- val[chan] = to_integer(&ctx->ac, values[chan]);
+ val[chan] = ac_to_integer(&ctx->ac, values[chan]);
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb);
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb);
}
@@ -5399,7 +5653,7 @@
memcpy(&args->out[0], values, sizeof(values[0]) * 4);
for (unsigned i = 0; i < 4; ++i)
- args->out[i] = to_float(&ctx->ac, args->out[i]);
+ args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
}
static void
@@ -5414,6 +5668,18 @@
LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
int i;
+ if (ctx->options->key.has_multiview_view_index) {
+ LLVMValueRef* tmp_out = &ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ if(!*tmp_out) {
+ for(unsigned i = 0; i < 4; ++i)
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
+ si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
+ }
+
+ LLVMBuildStore(ctx->builder, ac_to_float(&ctx->ac, ctx->view_index), *tmp_out);
+ ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
+ }
+
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
sizeof(outinfo->vs_output_param_offset));
@@ -5426,8 +5692,8 @@
i = VARYING_SLOT_CLIP_DIST0;
for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++)
- slots[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
+ slots[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++)
slots[i] = LLVMGetUndef(ctx->f32);
@@ -5450,26 +5716,26 @@
if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) {
for (unsigned j = 0; j < 4; j++)
pos_values[j] = LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], "");
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], "");
}
si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) {
outinfo->writes_pointsize = true;
psize_value = LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], "");
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], "");
}
if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) {
outinfo->writes_layer = true;
layer_value = LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], "");
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], "");
}
if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) {
outinfo->writes_viewport_index = true;
viewport_index_value = LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
}
if (outinfo->writes_pointsize ||
@@ -5496,14 +5762,14 @@
* index in out.z[19:16].
*/
LLVMValueRef v = viewport_index_value;
- v = to_integer(&ctx->ac, v);
+ v = ac_to_integer(&ctx->ac, v);
v = LLVMBuildShl(ctx->builder, v,
LLVMConstInt(ctx->i32, 16, false),
"");
v = LLVMBuildOr(ctx->builder, v,
- to_integer(&ctx->ac, pos_args[1].out[2]), "");
+ ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
- pos_args[1].out[2] = to_float(&ctx->ac, v);
+ pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
pos_args[1].enabled_channels |= 1 << 2;
} else {
pos_args[1].out[3] = viewport_index_value;
@@ -5534,8 +5800,8 @@
continue;
for (unsigned j = 0; j < 4; j++)
- values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
+ values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
if (i == VARYING_SLOT_LAYER) {
target = V_008DFC_SQ_EXP_PARAM + param_count;
@@ -5590,8 +5856,9 @@
{
int j;
uint64_t max_output_written = 0;
+ LLVMValueRef lds_base = NULL;
+
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
- LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
int param_index;
int length = 4;
@@ -5604,32 +5871,72 @@
param_index = shader_io_get_unique_index(i);
max_output_written = MAX2(param_index + (length > 4), max_output_written);
+ }
+
+ outinfo->esgs_itemsize = (max_output_written + 1) * 16;
+
+ if (ctx->ac.chip_class >= GFX9) {
+ unsigned itemsize_dw = outinfo->esgs_itemsize / 4;
+ LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
+ LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
+ LLVMConstInt(ctx->ac.i32, 24, false),
+ LLVMConstInt(ctx->ac.i32, 4, false), false);
+ vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
+ LLVMBuildMul(ctx->ac.builder, wave_idx,
+ LLVMConstInt(ctx->i32, 64, false), ""), "");
+ lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
+ LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
+ }
+
+ for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
+ LLVMValueRef dw_addr;
+ LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
+ int param_index;
+ int length = 4;
+
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ if (i == VARYING_SLOT_CLIP_DIST0)
+ length = ctx->num_output_clips + ctx->num_output_culls;
+ param_index = shader_io_get_unique_index(i);
+
+ if (lds_base) {
+ dw_addr = LLVMBuildAdd(ctx->builder, lds_base,
+ LLVMConstInt(ctx->i32, param_index * 4, false),
+ "");
+ }
for (j = 0; j < length; j++) {
LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
- ac_build_buffer_store_dword(&ctx->ac,
- ctx->esgs_ring,
- out_val, 1,
- NULL, ctx->es2gs_offset,
- (4 * param_index + j) * 4,
- 1, 1, true, true);
+ if (ctx->ac.chip_class >= GFX9) {
+ lds_store(ctx, dw_addr,
+ LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
+ dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
+ } else {
+ ac_build_buffer_store_dword(&ctx->ac,
+ ctx->esgs_ring,
+ out_val, 1,
+ NULL, ctx->es2gs_offset,
+ (4 * param_index + j) * 4,
+ 1, 1, true, true);
+ }
}
}
- outinfo->esgs_itemsize = (max_output_written + 1) * 16;
}
static void
handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
{
LLVMValueRef vertex_id = ctx->rel_auto_id;
- LLVMValueRef vertex_dw_stride = unpack_param(ctx, ctx->ls_out_layout, 13, 8);
+ LLVMValueRef vertex_dw_stride = unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8);
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id,
vertex_dw_stride, "");
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
- LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
+ LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
int length = 4;
if (!(ctx->output_mask & (1ull << i)))
@@ -5748,8 +6055,8 @@
{
unsigned stride, outer_comps, inner_comps;
struct ac_build_if_state if_ctx, inner_if_ctx;
- LLVMValueRef invocation_id = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5);
- LLVMValueRef rel_patch_id = unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
+ LLVMValueRef invocation_id = unpack_param(&ctx->ac, ctx->tcs_rel_ids, 8, 5);
+ LLVMValueRef rel_patch_id = unpack_param(&ctx->ac, ctx->tcs_rel_ids, 0, 8);
unsigned tess_inner_index, tess_outer_index;
LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
@@ -5829,29 +6136,34 @@
tf_base = ctx->tess_factor_offset;
byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id,
LLVMConstInt(ctx->i32, 4 * stride, false), "");
+ unsigned tf_offset = 0;
- ac_nir_build_if(&inner_if_ctx, ctx,
- LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- rel_patch_id, ctx->i32zero, ""));
-
- /* Store the dynamic HS control word. */
- ac_build_buffer_store_dword(&ctx->ac, buffer,
- LLVMConstInt(ctx->i32, 0x80000000, false),
- 1, ctx->i32zero, tf_base,
- 0, 1, 0, true, false);
- ac_nir_build_endif(&inner_if_ctx);
+ if (ctx->options->chip_class <= VI) {
+ ac_nir_build_if(&inner_if_ctx, ctx,
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ rel_patch_id, ctx->i32zero, ""));
+
+ /* Store the dynamic HS control word. */
+ ac_build_buffer_store_dword(&ctx->ac, buffer,
+ LLVMConstInt(ctx->i32, 0x80000000, false),
+ 1, ctx->i32zero, tf_base,
+ 0, 1, 0, true, false);
+ tf_offset += 4;
+
+ ac_nir_build_endif(&inner_if_ctx);
+ }
/* Store the tessellation factors. */
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
MIN2(stride, 4), byteoffset, tf_base,
- 4, 1, 0, true, false);
+ tf_offset, 1, 0, true, false);
if (vec1)
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
stride - 4, byteoffset, tf_base,
- 20, 1, 0, true, false);
+ 16 + tf_offset, 1, 0, true, false);
- //TODO store to offchip for TES to read - only if TES reads them
- if (1) {
+ //store to offchip for TES to read - only if TES reads them
+ if (ctx->options->key.tcs.tes_reads_tess_factors) {
LLVMValueRef inner_vec, outer_vec, tf_outer_offset;
LLVMValueRef tf_inner_offset;
unsigned param_outer, param_inner;
@@ -5963,21 +6275,21 @@
if (i == FRAG_RESULT_DEPTH) {
ctx->shader_info->fs.writes_z = true;
- depth = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
+ depth = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
} else if (i == FRAG_RESULT_STENCIL) {
ctx->shader_info->fs.writes_stencil = true;
- stencil = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
+ stencil = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
} else if (i == FRAG_RESULT_SAMPLE_MASK) {
ctx->shader_info->fs.writes_sample_mask = true;
- samplemask = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
+ samplemask = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
} else {
bool last = false;
for (unsigned j = 0; j < 4; j++)
- values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
- ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
+ values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
+ ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
@@ -6007,8 +6319,11 @@
}
static void
-handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
+handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
+ LLVMValueRef *addrs)
{
+ struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
+
switch (ctx->stage) {
case MESA_SHADER_VERTEX:
if (ctx->options->key.vs.as_ls)
@@ -6104,16 +6419,16 @@
{
if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
(ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
- ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
+ ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
}
if (ctx->is_gs_copy_shader) {
- ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
+ ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
}
if (ctx->stage == MESA_SHADER_GEOMETRY) {
LLVMValueRef tmp;
- ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
- ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
+ ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
+ ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v4i32, "");
@@ -6125,8 +6440,8 @@
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
ctx->stage == MESA_SHADER_TESS_EVAL) {
- ctx->hs_ring_tess_offchip = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
- ctx->hs_ring_tess_factor = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
+ ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
+ ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
}
}
@@ -6134,11 +6449,11 @@
ac_nir_get_max_workgroup_size(enum chip_class chip_class,
const struct nir_shader *nir)
{
- switch (nir->stage) {
+ switch (nir->info.stage) {
case MESA_SHADER_TESS_CTRL:
return chip_class >= CIK ? 128 : 64;
case MESA_SHADER_GEOMETRY:
- return 64;
+ return chip_class >= GFX9 ? 128 : 64;
case MESA_SHADER_COMPUTE:
break;
default:
@@ -6151,14 +6466,90 @@
return max_workgroup_size;
}
+/* Fixup the HW not emitting the TCS regs if there are no HS threads. */
+static void ac_nir_fixup_ls_hs_input_vgprs(struct nir_to_llvm_context *ctx)
+{
+ LLVMValueRef count = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
+ LLVMConstInt(ctx->ac.i32, 8, false),
+ LLVMConstInt(ctx->ac.i32, 8, false), false);
+ LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
+ LLVMConstInt(ctx->ac.i32, 0, false), "");
+ ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, "");
+ ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, "");
+ ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->tcs_rel_ids, ctx->rel_auto_id, "");
+ ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->tcs_patch_id, ctx->abi.vertex_id, "");
+}
+
+static void prepare_gs_input_vgprs(struct nir_to_llvm_context *ctx)
+{
+ for(int i = 5; i >= 0; --i) {
+ ctx->gs_vtx_offset[i] = ac_build_bfe(&ctx->ac, ctx->gs_vtx_offset[i & ~1],
+ LLVMConstInt(ctx->ac.i32, (i & 1) * 16, false),
+ LLVMConstInt(ctx->ac.i32, 16, false), false);
+ }
+
+ ctx->gs_wave_id = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
+ LLVMConstInt(ctx->ac.i32, 16, false),
+ LLVMConstInt(ctx->ac.i32, 8, false), false);
+}
+
+void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
+ struct nir_shader *nir, struct nir_to_llvm_context *nctx)
+{
+ struct ac_nir_context ctx = {};
+ struct nir_function *func;
+
+ ctx.ac = *ac;
+ ctx.abi = abi;
+
+ ctx.nctx = nctx;
+ if (nctx)
+ nctx->nir = &ctx;
+
+ ctx.stage = nir->info.stage;
+
+ ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
+
+ nir_foreach_variable(variable, &nir->outputs)
+ handle_shader_output_decl(&ctx, nir, variable);
+
+ ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ func = (struct nir_function *)exec_list_get_head(&nir->functions);
+
+ setup_locals(&ctx, func);
+
+ if (nir->info.stage == MESA_SHADER_COMPUTE)
+ setup_shared(&ctx, nir);
+
+ visit_cf_list(&ctx, &func->impl->body);
+ phi_post_pass(&ctx);
+
+ ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS,
+ ctx.outputs);
+
+ free(ctx.locals);
+ ralloc_free(ctx.defs);
+ ralloc_free(ctx.phis);
+ ralloc_free(ctx.vars);
+
+ if (nctx)
+ nctx->nir = NULL;
+}
+
static
LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
- struct nir_shader *nir,
+ struct nir_shader *const *shaders,
+ int shader_count,
struct ac_shader_variant_info *shader_info,
const struct ac_nir_compiler_options *options)
{
struct nir_to_llvm_context ctx = {0};
- struct nir_function *func;
unsigned i;
ctx.options = options;
ctx.shader_info = shader_info;
@@ -6167,13 +6558,6 @@
ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
ctx.ac.module = ctx.module;
-
- ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
-
- memset(shader_info, 0, sizeof(*shader_info));
-
- ac_nir_shader_info_pass(nir, options, &shader_info->info);
-
LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
@@ -6183,80 +6567,123 @@
LLVMDisposeMessage(data_layout_str);
setup_types(&ctx);
-
ctx.builder = LLVMCreateBuilderInContext(ctx.context);
ctx.ac.builder = ctx.builder;
- ctx.stage = nir->stage;
- ctx.max_workgroup_size = ac_nir_get_max_workgroup_size(ctx.options->chip_class, nir);
+
+ memset(shader_info, 0, sizeof(*shader_info));
+
+ for(int i = 0; i < shader_count; ++i)
+ ac_nir_shader_info_pass(shaders[i], options, &shader_info->info);
for (i = 0; i < AC_UD_MAX_SETS; i++)
shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
for (i = 0; i < AC_UD_MAX_UD; i++)
shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
- create_function(&ctx);
-
- if (nir->stage == MESA_SHADER_GEOMETRY) {
- ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, "gs_next_vertex");
-
- ctx.gs_max_out_vertices = nir->info.gs.vertices_out;
- } else if (nir->stage == MESA_SHADER_TESS_EVAL) {
- ctx.tes_primitive_mode = nir->info.tess.primitive_mode;
- }
-
- ac_setup_rings(&ctx);
-
- nir_foreach_variable(variable, &nir->inputs)
- handle_shader_input_decl(&ctx, variable);
-
- if (nir->stage == MESA_SHADER_FRAGMENT)
- handle_fs_inputs_pre(&ctx, nir);
-
- ctx.num_output_clips = nir->info.clip_distance_array_size;
- ctx.num_output_culls = nir->info.cull_distance_array_size;
-
- nir_foreach_variable(variable, &nir->outputs)
- handle_shader_output_decl(&ctx, variable);
-
- ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
-
- func = (struct nir_function *)exec_list_get_head(&nir->functions);
+ ctx.max_workgroup_size = 0;
+ for (int i = 0; i < shader_count; ++i) {
+ ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
+ ac_nir_get_max_workgroup_size(ctx.options->chip_class,
+ shaders[i]));
+ }
+
+ create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
+ shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
+
+ ctx.abi.inputs = &ctx.inputs[0];
+ ctx.abi.emit_outputs = handle_shader_outputs_post;
+ ctx.abi.load_ssbo = radv_load_ssbo;
+ ctx.abi.load_sampler_desc = radv_get_sampler_desc;
+ ctx.abi.clamp_shadow_reference = false;
+
+ if (shader_count >= 2)
+ ac_init_exec_full_mask(&ctx.ac);
+
+ if (ctx.ac.chip_class == GFX9 &&
+ shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
+ ac_nir_fixup_ls_hs_input_vgprs(&ctx);
+
+ for(int i = 0; i < shader_count; ++i) {
+ ctx.stage = shaders[i]->info.stage;
+ ctx.output_mask = 0;
+ ctx.tess_outputs_written = 0;
+ ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
+ ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
+
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
+ ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.i32, "gs_next_vertex");
+
+ ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
+ } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
+ ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
+ } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
+ if (shader_info->info.vs.needs_instance_id) {
+ ctx.shader_info->vs.vgpr_comp_cnt =
+ MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt);
+ }
+ } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
+ shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard;
+ }
- setup_locals(&ctx, func);
+ if (i)
+ emit_barrier(&ctx);
- if (nir->stage == MESA_SHADER_COMPUTE)
- setup_shared(&ctx, nir);
+ ac_setup_rings(&ctx);
- visit_cf_list(&ctx, &func->impl->body);
- phi_post_pass(&ctx);
+ LLVMBasicBlockRef merge_block;
+ if (shader_count >= 2) {
+ LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
+ LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
+ merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
+
+ LLVMValueRef count = ac_build_bfe(&ctx.ac, ctx.merged_wave_info,
+ LLVMConstInt(ctx.ac.i32, 8 * i, false),
+ LLVMConstInt(ctx.ac.i32, 8, false), false);
+ LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
+ LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
+ thread_id, count, "");
+ LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
+
+ LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
+ }
+
+ if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT)
+ handle_fs_inputs(&ctx, shaders[i]);
+ else if(shaders[i]->info.stage == MESA_SHADER_VERTEX)
+ handle_vs_inputs(&ctx, shaders[i]);
+ else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
+ prepare_gs_input_vgprs(&ctx);
+
+ nir_foreach_variable(variable, &shaders[i]->outputs)
+ scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
+
+ ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i], &ctx);
+
+ if (shader_count >= 2) {
+ LLVMBuildBr(ctx.ac.builder, merge_block);
+ LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
+ }
+
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
+ unsigned addclip = shaders[i]->info.clip_distance_array_size +
+ shaders[i]->info.cull_distance_array_size > 4;
+ shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
+ shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
+ shaders[i]->info.gs.vertices_out;
+ } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+ shader_info->tcs.outputs_written = ctx.tess_outputs_written;
+ shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
+ } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
+ shader_info->vs.outputs_written = ctx.tess_outputs_written;
+ }
+ }
- handle_shader_outputs_post(&ctx);
LLVMBuildRetVoid(ctx.builder);
ac_llvm_finalize_module(&ctx);
- ac_nir_eliminate_const_vs_outputs(&ctx);
- free(ctx.locals);
- ralloc_free(ctx.defs);
- ralloc_free(ctx.phis);
- ralloc_free(ctx.vars);
-
- if (nir->stage == MESA_SHADER_GEOMETRY) {
- unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4;
- shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
- shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
- nir->info.gs.vertices_out;
- } else if (nir->stage == MESA_SHADER_TESS_CTRL) {
- shader_info->tcs.outputs_written = ctx.tess_outputs_written;
- shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
- } else if (nir->stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
- shader_info->vs.outputs_written = ctx.tess_outputs_written;
- }
+ if (shader_count == 1)
+ ac_nir_eliminate_const_vs_outputs(&ctx);
return ctx.module;
}
@@ -6387,53 +6814,61 @@
shader_info->num_input_sgprs + 3);
}
+static void
+ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options)
+{
+ switch (nir->info.stage) {
+ case MESA_SHADER_COMPUTE:
+ for (int i = 0; i < 3; ++i)
+ shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
+ break;
+ case MESA_SHADER_FRAGMENT:
+ shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ shader_info->gs.vertices_in = nir->info.gs.vertices_in;
+ shader_info->gs.vertices_out = nir->info.gs.vertices_out;
+ shader_info->gs.output_prim = nir->info.gs.output_primitive;
+ shader_info->gs.invocations = nir->info.gs.invocations;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
+ shader_info->tes.spacing = nir->info.tess.spacing;
+ shader_info->tes.ccw = nir->info.tess.ccw;
+ shader_info->tes.point_mode = nir->info.tess.point_mode;
+ shader_info->tes.as_es = options->key.tes.as_es;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
+ break;
+ case MESA_SHADER_VERTEX:
+ shader_info->vs.as_es = options->key.vs.as_es;
+ shader_info->vs.as_ls = options->key.vs.as_ls;
+ /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */
+ if (options->key.vs.as_ls)
+ shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
+ break;
+ default:
+ break;
+ }
+}
+
void ac_compile_nir_shader(LLVMTargetMachineRef tm,
struct ac_shader_binary *binary,
struct ac_shader_config *config,
struct ac_shader_variant_info *shader_info,
- struct nir_shader *nir,
+ struct nir_shader *const *nir,
+ int nir_count,
const struct ac_nir_compiler_options *options,
bool dump_shader)
{
- LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
+ LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
options);
- ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill);
- switch (nir->stage) {
- case MESA_SHADER_COMPUTE:
- for (int i = 0; i < 3; ++i)
- shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
- break;
- case MESA_SHADER_FRAGMENT:
- shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
- break;
- case MESA_SHADER_GEOMETRY:
- shader_info->gs.vertices_in = nir->info.gs.vertices_in;
- shader_info->gs.vertices_out = nir->info.gs.vertices_out;
- shader_info->gs.output_prim = nir->info.gs.output_primitive;
- shader_info->gs.invocations = nir->info.gs.invocations;
- break;
- case MESA_SHADER_TESS_EVAL:
- shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
- shader_info->tes.spacing = nir->info.tess.spacing;
- shader_info->tes.ccw = nir->info.tess.ccw;
- shader_info->tes.point_mode = nir->info.tess.point_mode;
- shader_info->tes.as_es = options->key.tes.as_es;
- break;
- case MESA_SHADER_TESS_CTRL:
- shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
- break;
- case MESA_SHADER_VERTEX:
- shader_info->vs.as_es = options->key.vs.as_es;
- shader_info->vs.as_ls = options->key.vs.as_ls;
- /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */
- if (options->key.vs.as_ls)
- shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
- break;
- default:
- break;
- }
+ ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill);
+ for (int i = 0; i < nir_count; ++i)
+ ac_fill_shader_info(shader_info, nir[i], options);
}
static void
@@ -6441,7 +6876,7 @@
{
LLVMValueRef args[9];
args[0] = ctx->gsvs_ring;
- args[1] = LLVMBuildMul(ctx->builder, ctx->vertex_id, LLVMConstInt(ctx->i32, 4, false), "");
+ args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->i32, 4, false), "");
args[3] = ctx->i32zero;
args[4] = ctx->i32one; /* OFFEN */
args[5] = ctx->i32zero; /* IDXEN */
@@ -6478,7 +6913,7 @@
AC_FUNC_ATTR_LEGACY);
LLVMBuildStore(ctx->builder,
- to_float(&ctx->ac, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
+ ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
}
idx += slot_inc;
}
@@ -6510,7 +6945,7 @@
ctx.ac.builder = ctx.builder;
ctx.stage = MESA_SHADER_VERTEX;
- create_function(&ctx);
+ create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
ac_setup_rings(&ctx);
@@ -6518,11 +6953,22 @@
ctx.num_output_clips = geom_shader->info.clip_distance_array_size;
ctx.num_output_culls = geom_shader->info.cull_distance_array_size;
- nir_foreach_variable(variable, &geom_shader->outputs)
- handle_shader_output_decl(&ctx, variable);
+ struct ac_nir_context nir_ctx = {};
+ nir_ctx.ac = ctx.ac;
+ nir_ctx.abi = &ctx.abi;
+
+ nir_ctx.nctx = &ctx;
+ ctx.nir = &nir_ctx;
+
+ nir_foreach_variable(variable, &geom_shader->outputs) {
+ scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
+ handle_shader_output_decl(&nir_ctx, geom_shader, variable);
+ }
ac_gs_copy_shader_emit(&ctx);
+ ctx.nir = NULL;
+
LLVMBuildRetVoid(ctx.builder);
ac_llvm_finalize_module(&ctx);
diff -Nru mesa-17.2.4/src/amd/common/ac_nir_to_llvm.h mesa-17.3.3/src/amd/common/ac_nir_to_llvm.h
--- mesa-17.2.4/src/amd/common/ac_nir_to_llvm.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_nir_to_llvm.h 2018-01-18 21:30:28.000000000 +0000
@@ -30,12 +30,14 @@
#include "amd_family.h"
#include "../vulkan/radv_descriptor_set.h"
#include "ac_shader_info.h"
-#include "shader_enums.h"
+#include "compiler/shader_enums.h"
struct ac_shader_binary;
struct ac_shader_config;
struct nir_shader;
struct radv_pipeline_layout;
+struct ac_llvm_context;
+struct ac_shader_abi;
struct ac_vs_variant_key {
uint32_t instance_rate_inputs;
@@ -50,28 +52,35 @@
};
struct ac_tcs_variant_key {
+ struct ac_vs_variant_key vs_key;
unsigned primitive_mode;
unsigned input_vertices;
+ uint32_t tes_reads_tess_factors:1;
};
struct ac_fs_variant_key {
uint32_t col_format;
uint32_t is_int8;
uint32_t is_int10;
+ uint32_t multisample : 1;
};
-union ac_shader_variant_key {
- struct ac_vs_variant_key vs;
- struct ac_fs_variant_key fs;
- struct ac_tes_variant_key tes;
- struct ac_tcs_variant_key tcs;
+struct ac_shader_variant_key {
+ union {
+ struct ac_vs_variant_key vs;
+ struct ac_fs_variant_key fs;
+ struct ac_tes_variant_key tes;
+ struct ac_tcs_variant_key tcs;
+ };
+ bool has_multiview_view_index;
};
struct ac_nir_compiler_options {
struct radv_pipeline_layout *layout;
- union ac_shader_variant_key key;
+ struct ac_shader_variant_key key;
bool unsafe_math;
bool supports_spill;
+ bool clamp_shadow_reference;
enum radeon_family family;
enum chip_class chip_class;
};
@@ -87,7 +96,8 @@
AC_UD_SCRATCH_RING_OFFSETS = 0,
AC_UD_PUSH_CONSTANTS = 1,
AC_UD_INDIRECT_DESCRIPTOR_SETS = 2,
- AC_UD_SHADER_START = 3,
+ AC_UD_VIEW_INDEX = 3,
+ AC_UD_SHADER_START = 4,
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
AC_UD_VS_LS_TCS_IN_LAYOUT,
@@ -96,13 +106,13 @@
AC_UD_PS_MAX_UD,
AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
AC_UD_CS_MAX_UD,
- AC_UD_GS_VS_RING_STRIDE_ENTRIES = AC_UD_SHADER_START,
+ AC_UD_GS_VS_RING_STRIDE_ENTRIES = AC_UD_VS_MAX_UD,
AC_UD_GS_MAX_UD,
- AC_UD_TCS_OFFCHIP_LAYOUT = AC_UD_SHADER_START,
+ AC_UD_TCS_OFFCHIP_LAYOUT = AC_UD_VS_MAX_UD,
AC_UD_TCS_MAX_UD,
AC_UD_TES_OFFCHIP_LAYOUT = AC_UD_SHADER_START,
AC_UD_TES_MAX_UD,
- AC_UD_MAX_UD = AC_UD_VS_MAX_UD,
+ AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
};
/* descriptor index into scratch ring offsets */
@@ -147,7 +157,7 @@
unsigned num_input_sgprs;
unsigned num_input_vgprs;
bool need_indirect_descriptor_sets;
- union {
+ struct {
struct {
struct ac_vs_output_info outinfo;
struct ac_es_output_info es_info;
@@ -168,7 +178,6 @@
bool writes_sample_mask;
bool early_fragment_test;
bool writes_memory;
- bool force_persample;
bool prim_id_input;
bool layer_input;
} fs;
@@ -210,7 +219,8 @@
struct ac_shader_binary *binary,
struct ac_shader_config *config,
struct ac_shader_variant_info *shader_info,
- struct nir_shader *nir,
+ struct nir_shader *const *nir,
+ int nir_count,
const struct ac_nir_compiler_options *options,
bool dump_shader);
@@ -222,4 +232,8 @@
const struct ac_nir_compiler_options *options,
bool dump_shader);
+struct nir_to_llvm_context;
+void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
+ struct nir_shader *nir, struct nir_to_llvm_context *nctx);
+
#endif /* AC_NIR_TO_LLVM_H */
diff -Nru mesa-17.2.4/src/amd/common/ac_shader_abi.h mesa-17.3.3/src/amd/common/ac_shader_abi.h
--- mesa-17.2.4/src/amd/common/ac_shader_abi.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_shader_abi.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AC_SHADER_ABI_H
+#define AC_SHADER_ABI_H
+
+#include
+
+enum ac_descriptor_type {
+ AC_DESC_IMAGE,
+ AC_DESC_FMASK,
+ AC_DESC_SAMPLER,
+ AC_DESC_BUFFER,
+};
+
+/* Document the shader ABI during compilation. This is what allows radeonsi and
+ * radv to share a compiler backend.
+ */
+struct ac_shader_abi {
+ LLVMValueRef base_vertex;
+ LLVMValueRef start_instance;
+ LLVMValueRef draw_id;
+ LLVMValueRef vertex_id;
+ LLVMValueRef instance_id;
+ LLVMValueRef frag_pos[4];
+ LLVMValueRef front_face;
+ LLVMValueRef ancillary;
+ LLVMValueRef sample_coverage;
+
+ /* For VS and PS: pre-loaded shader inputs.
+ *
+ * Currently only used for NIR shaders; indexed by variables'
+ * driver_location.
+ */
+ LLVMValueRef *inputs;
+
+ void (*emit_outputs)(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs);
+
+ LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
+
+ /**
+ * Load the descriptor for the given buffer.
+ *
+ * \param buffer the buffer as presented in NIR: this is the descriptor
+ * in Vulkan, and the buffer index in OpenGL/Gallium
+ * \param write whether buffer contents will be written
+ */
+ LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
+ LLVMValueRef buffer, bool write);
+
+ /**
+ * Load a descriptor associated to a sampler.
+ *
+ * \param descriptor_set the descriptor set index (only for Vulkan)
+ * \param base_index the base index of the sampler variable
+ * \param constant_index constant part of an array index (or 0, if the
+ * sampler variable is not an array)
+ * \param index non-constant part of an array index (may be NULL)
+ * \param desc_type the type of descriptor to load
+ * \param image whether the descriptor is loaded for an image operation
+ */
+ LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi,
+ unsigned descriptor_set,
+ unsigned base_index,
+ unsigned constant_index,
+ LLVMValueRef index,
+ enum ac_descriptor_type desc_type,
+ bool image, bool write);
+
+ /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
+ * uses it due to promoting D16 to D32, but radv needs it off. */
+ bool clamp_shadow_reference;
+};
+
+#endif /* AC_SHADER_ABI_H */
diff -Nru mesa-17.2.4/src/amd/common/ac_shader_info.c mesa-17.3.3/src/amd/common/ac_shader_info.c
--- mesa-17.2.4/src/amd/common/ac_shader_info.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_shader_info.c 2018-01-18 21:30:28.000000000 +0000
@@ -39,9 +39,21 @@
case nir_intrinsic_load_draw_id:
info->vs.needs_draw_id = true;
break;
+ case nir_intrinsic_load_instance_id:
+ info->vs.needs_instance_id = true;
+ break;
case nir_intrinsic_load_num_work_groups:
info->cs.grid_components_used = instr->num_components;
break;
+ case nir_intrinsic_load_sample_id:
+ info->ps.force_persample = true;
+ break;
+ case nir_intrinsic_load_sample_pos:
+ info->ps.force_persample = true;
+ break;
+ case nir_intrinsic_load_view_index:
+ info->needs_multiview_view_index = true;
+ break;
case nir_intrinsic_vulkan_resource_index:
info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr));
break;
@@ -55,9 +67,18 @@
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
- case nir_intrinsic_image_size:
+ case nir_intrinsic_image_size: {
+ const struct glsl_type *type = instr->variables[0]->var->type;
+ if(instr->variables[0]->deref.child)
+ type = instr->variables[0]->deref.child->type;
+
+ enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
+ if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
+ dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
+ info->ps.uses_input_attachments = true;
mark_sampler_desc(instr->variables[0]->var, info);
break;
+ }
default:
break;
}
@@ -95,7 +116,7 @@
nir_variable *var,
struct ac_shader_info *info)
{
- switch (nir->stage) {
+ switch (nir->info.stage) {
case MESA_SHADER_VERTEX:
info->vs.has_vertex_buffers = true;
break;
diff -Nru mesa-17.2.4/src/amd/common/ac_shader_info.h mesa-17.3.3/src/amd/common/ac_shader_info.h
--- mesa-17.2.4/src/amd/common/ac_shader_info.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_shader_info.h 2018-01-18 21:30:28.000000000 +0000
@@ -30,12 +30,16 @@
struct ac_shader_info {
bool needs_push_constants;
uint32_t desc_set_used_mask;
+ bool needs_multiview_view_index;
struct {
bool has_vertex_buffers; /* needs vertex buffers and base/start */
bool needs_draw_id;
+ bool needs_instance_id;
} vs;
struct {
+ bool force_persample;
bool needs_sample_positions;
+ bool uses_input_attachments;
} ps;
struct {
uint8_t grid_components_used;
diff -Nru mesa-17.2.4/src/amd/common/ac_surface.c mesa-17.3.3/src/amd/common/ac_surface.c
--- mesa-17.2.4/src/amd/common/ac_surface.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_surface.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,6 +30,7 @@
#include "amdgpu_id.h"
#include "ac_gpu_info.h"
#include "util/macros.h"
+#include "util/u_atomic.h"
#include "util/u_math.h"
#include
@@ -406,12 +407,16 @@
}
/**
+ * This must be called after the first level is computed.
+ *
* Copy surface-global settings like pipe/bank config from level 0 surface
- * computation.
+ * computation, and compute tile swizzle.
*/
-static void gfx6_surface_settings(const struct radeon_info* info,
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio,
- struct radeon_surf *surf)
+static int gfx6_surface_settings(ADDR_HANDLE addrlib,
+ const struct radeon_info *info,
+ const struct ac_surf_config *config,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio,
+ struct radeon_surf *surf)
{
surf->surf_alignment = csio->baseAlign;
surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
@@ -428,6 +433,36 @@
} else {
surf->u.legacy.macro_tile_index = 0;
}
+
+ /* Compute tile swizzle. */
+ /* TODO: fix tile swizzle with mipmapping for SI */
+ if ((info->chip_class >= CIK || config->info.levels == 1) &&
+ config->info.surf_index &&
+ surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
+ !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
+ (config->info.samples > 1 || !(surf->flags & RADEON_SURF_SCANOUT))) {
+ ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+ AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
+ AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
+
+ AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
+ AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
+ AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
+ AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
+ AddrBaseSwizzleIn.tileMode = csio->tileMode;
+
+ int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn,
+ &AddrBaseSwizzleOut);
+ if (r != ADDR_OK)
+ return r;
+
+ assert(AddrBaseSwizzleOut.tileSwizzle <=
+ u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
+ surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+ }
+ return 0;
}
/**
@@ -640,6 +675,7 @@
}
}
+ surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
surf->num_dcc_levels = 0;
surf->surf_size = 0;
surf->dcc_size = 0;
@@ -683,7 +719,10 @@
assert(stencil_tile_idx >= 0);
}
- gfx6_surface_settings(info, &AddrSurfInfoOut, surf);
+ r = gfx6_surface_settings(addrlib, info, config,
+ &AddrSurfInfoOut, surf);
+ if (r)
+ return r;
}
}
@@ -716,8 +755,12 @@
}
if (level == 0) {
- if (only_stencil)
- gfx6_surface_settings(info, &AddrSurfInfoOut, surf);
+ if (only_stencil) {
+ r = gfx6_surface_settings(addrlib, info, config,
+ &AddrSurfInfoOut, surf);
+ if (r)
+ return r;
+ }
/* For 2D modes only. */
if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
@@ -733,9 +776,16 @@
* complicated.
*/
if (surf->dcc_size && config->info.levels > 1) {
+ /* The smallest miplevels that are never compressed by DCC
+ * still read the DCC buffer via TC if the base level uses DCC,
+ * and for some reason the DCC buffer needs to be larger if
+ * the miptree uses non-zero tile_swizzle. Otherwise there are
+ * VM faults.
+ *
+ * "dcc_alignment * 4" was determined by trial and error.
+ */
surf->dcc_size = align64(surf->surf_size >> 8,
- info->pipe_interleave_bytes *
- info->num_tile_pipes);
+ surf->dcc_alignment * 4);
}
/* Make sure HTILE covers the whole miptree, because the shader reads
@@ -745,20 +795,9 @@
surf->htile_size *= 2;
surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- /* workout base swizzle */
- if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
- ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
- ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
-
- AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
- AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
- AddrBaseSwizzleIn.macroModeIndex = AddrSurfInfoOut.macroModeIndex;
- AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
- AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
- AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
- surf->u.legacy.tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
- }
+ surf->is_displayable = surf->is_linear ||
+ surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
+ surf->micro_tile_mode == RADEON_MICRO_MODE_ROTATED;
return 0;
}
@@ -888,9 +927,11 @@
in->numSamples == 1) {
ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
+ ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
+ dout.pMipInfo = meta_mip_info;
din.dccKeyFlags.pipeAligned = 1;
din.dccKeyFlags.rbAligned = 1;
@@ -914,6 +955,39 @@
surf->u.gfx9.dcc_pitch_max = dout.pitch - 1;
surf->dcc_size = dout.dccRamSize;
surf->dcc_alignment = dout.dccRamBaseAlign;
+ surf->num_dcc_levels = in->numMipLevels;
+
+ /* Disable DCC for levels that are in the mip tail.
+ *
+ * There are two issues that this is intended to
+ * address:
+ *
+ * 1. Multiple mip levels may share a cache line. This
+ * can lead to corruption when switching between
+ * rendering to different mip levels because the
+ * RBs don't maintain coherency.
+ *
+ * 2. Texturing with metadata after rendering sometimes
+ * fails with corruption, probably for a similar
+ * reason.
+ *
+ * Working around these issues for all levels in the
+ * mip tail may be overly conservative, but it's what
+ * Vulkan does.
+ *
+ * Alternative solutions that also work but are worse:
+ * - Disable DCC entirely.
+ * - Flush TC L2 after rendering.
+ */
+ for (unsigned i = 0; i < in->numMipLevels; i++) {
+ if (meta_mip_info[i].inMiptail) {
+ surf->num_dcc_levels = i;
+ break;
+ }
+ }
+
+ if (!surf->num_dcc_levels)
+ surf->dcc_size = 0;
}
/* FMASK */
@@ -1066,7 +1140,9 @@
}
surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
+ surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
+ surf->num_dcc_levels = 0;
surf->surf_size = 0;
surf->dcc_size = 0;
surf->htile_size = 0;
@@ -1100,7 +1176,14 @@
}
surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR;
- surf->num_dcc_levels = surf->dcc_size ? config->info.levels : 0;
+
+ /* Query whether the surface is displayable. */
+ bool displayable = false;
+ r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode,
+ surf->bpe * 8, &displayable);
+ if (r)
+ return r;
+ surf->is_displayable = displayable;
switch (surf->u.gfx9.surf.swizzle_mode) {
/* S = standard. */
diff -Nru mesa-17.2.4/src/amd/common/ac_surface.h mesa-17.3.3/src/amd/common/ac_surface.h
--- mesa-17.2.4/src/amd/common/ac_surface.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/ac_surface.h 2018-01-18 21:30:28.000000000 +0000
@@ -62,12 +62,12 @@
#define RADEON_SURF_SBUFFER (1 << 18)
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
/* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
-#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
#define RADEON_SURF_FMASK (1 << 21)
#define RADEON_SURF_DISABLE_DCC (1 << 22)
#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
#define RADEON_SURF_IMPORTED (1 << 24)
#define RADEON_SURF_OPTIMIZE_FOR_SPACE (1 << 25)
+#define RADEON_SURF_SHAREABLE (1 << 26)
struct legacy_surf_level {
uint64_t offset;
@@ -97,7 +97,6 @@
unsigned depth_adjusted:1;
unsigned stencil_adjusted:1;
- uint8_t tile_swizzle;
struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
@@ -131,11 +130,12 @@
struct gfx9_surf_meta_flags cmask; /* metadata of fmask */
enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
+ uint16_t surf_pitch; /* in blocks */
+ uint16_t surf_height;
+
uint64_t surf_offset; /* 0 unless imported with an offset */
/* The size of the 2D plane containing all mipmap levels. */
uint64_t surf_slice_size;
- uint16_t surf_pitch; /* in blocks */
- uint16_t surf_height;
/* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
uint32_t offset[RADEON_SURF_MAX_LEVELS];
@@ -160,6 +160,9 @@
*/
unsigned num_dcc_levels:4;
unsigned is_linear:1;
+ unsigned has_stencil:1;
+ /* This might be true even if micro_tile_mode isn't displayable or rotated. */
+ unsigned is_displayable:1;
/* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
unsigned micro_tile_mode:3;
uint32_t flags;
@@ -168,6 +171,21 @@
* they will be treated as hints (e.g. bankw, bankh) and might be
* changed by the calculator.
*/
+
+ /* Tile swizzle can be OR'd with low bits of the BASE_256B address.
+ * The value is the same for all mipmap levels. Supported tile modes:
+ * - GFX6: Only macro tiling.
+ * - GFX9: Only *_X swizzle modes. Level 0 must not be in the mip tail.
+ *
+ * Only these surfaces are allowed to set it:
+ * - color (if it doesn't have to be displayable)
+ * - DCC (same tile swizzle as color)
+ * - FMASK
+ * - CMASK if it's TC-compatible or if the gen is GFX9
+ * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
+ */
+ uint8_t tile_swizzle;
+
uint64_t surf_size;
uint64_t dcc_size;
uint64_t htile_size;
@@ -195,10 +213,10 @@
uint32_t width;
uint32_t height;
uint32_t depth;
- uint32_t surf_index;
uint8_t samples;
uint8_t levels;
uint16_t array_size;
+ uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
};
struct ac_surf_config {
diff -Nru mesa-17.2.4/src/amd/common/meson.build mesa-17.3.3/src/amd/common/meson.build
--- mesa-17.2.4/src/amd/common/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/common/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,63 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_amd_common = include_directories('.')
+
+sid_tables_h = custom_target(
+ 'sid_tables_h',
+ input : ['sid_tables.py', 'sid.h', 'gfx9d.h'],
+ output : 'sid_tables.h',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+)
+
+amd_common_files = files(
+ 'ac_binary.c',
+ 'ac_binary.h',
+ 'ac_exp_param.h',
+ 'ac_llvm_build.c',
+ 'ac_llvm_build.h',
+ 'ac_llvm_helper.cpp',
+ 'ac_llvm_util.c',
+ 'ac_llvm_util.h',
+ 'ac_shader_abi.h',
+ 'ac_shader_info.c',
+ 'ac_shader_info.h',
+ 'ac_nir_to_llvm.c',
+ 'ac_nir_to_llvm.h',
+ 'ac_gpu_info.c',
+ 'ac_gpu_info.h',
+ 'ac_surface.c',
+ 'ac_surface.h',
+ 'ac_debug.c',
+ 'ac_debug.h',
+)
+
+libamd_common = static_library(
+ 'amd_common',
+ [amd_common_files, sid_tables_h, nir_opcodes_h],
+ include_directories : [inc_common, inc_compiler, inc_nir, inc_mesa, inc_mapi,
+ inc_amd],
+ dependencies : [dep_llvm, dep_thread, dep_elf, dep_libdrm_amdgpu,
+ dep_valgrind],
+ c_args : [c_vis_args],
+ cpp_args : [cpp_vis_args],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/amd/common/r600d_common.h mesa-17.3.3/src/amd/common/r600d_common.h
--- mesa-17.2.4/src/amd/common/r600d_common.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/r600d_common.h 1970-01-01 00:00:00.000000000 +0000
@@ -1,298 +0,0 @@
-/*
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Marek Olšák
- */
-
-#ifndef R600D_COMMON_H
-#define R600D_COMMON_H
-
-#define R600_CONFIG_REG_OFFSET 0x08000
-#define R600_CONTEXT_REG_OFFSET 0x28000
-#define SI_SH_REG_OFFSET 0x0000B000
-#define SI_SH_REG_END 0x0000C000
-#define CIK_UCONFIG_REG_OFFSET 0x00030000
-#define CIK_UCONFIG_REG_END 0x00038000
-
-#define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
-#define PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
-#define PKT3_IT_OPCODE_S(x) (((unsigned)(x) & 0xFF) << 8)
-#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
-#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
-
-#define RADEON_CP_PACKET3_COMPUTE_MODE 0x00000002
-
-#define PKT3_NOP 0x10
-#define PKT3_SET_PREDICATION 0x20
-#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
-#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
-#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x) & 0x3) << 1)
-#define STRMOUT_OFFSET_FROM_PACKET 0
-#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
-#define STRMOUT_OFFSET_FROM_MEM 2
-#define STRMOUT_OFFSET_NONE 3
-#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x) & 0x3) << 8)
-#define PKT3_WAIT_REG_MEM 0x3C
-#define WAIT_REG_MEM_EQUAL 3
-#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4)
-#define PKT3_COPY_DATA 0x40
-#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
-#define COPY_DATA_REG 0
-#define COPY_DATA_MEM 1
-#define COPY_DATA_PERF 4
-#define COPY_DATA_IMM 5
-#define COPY_DATA_TIMESTAMP 9
-#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8)
-#define COPY_DATA_MEM_ASYNC 5
-#define COPY_DATA_COUNT_SEL (1 << 16)
-#define COPY_DATA_WR_CONFIRM (1 << 20)
-#define PKT3_EVENT_WRITE 0x46
-#define PKT3_EVENT_WRITE_EOP 0x47
-#define EOP_DATA_SEL(x) ((x) << 29)
- /* 0 - discard
- * 1 - send low 32bit data
- * 2 - send 64bit data
- * 3 - send 64bit GPU counter value
- * 4 - send 64bit sys counter value
- */
-#define PKT3_RELEASE_MEM 0x49 /* GFX9+ */
-#define PKT3_SET_CONFIG_REG 0x68
-#define PKT3_SET_CONTEXT_REG 0x69
-#define PKT3_STRMOUT_BASE_UPDATE 0x72 /* r700 only */
-#define PKT3_SURFACE_BASE_UPDATE 0x73 /* r600 only */
-#define SURFACE_BASE_UPDATE_DEPTH (1 << 0)
-#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x))
-#define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
-#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
-#define PKT3_SET_SH_REG 0x76 /* SI and later */
-#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */
-
-#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */
-#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */
-#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS3 0x3 /* EG and later */
-#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
-#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
-#define EVENT_TYPE_ZPASS_DONE 0x15
-#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
-#define EVENT_TYPE_PERFCOUNTER_START 0x17
-#define EVENT_TYPE_PERFCOUNTER_STOP 0x18
-#define EVENT_TYPE_PIPELINESTAT_START 25
-#define EVENT_TYPE_PIPELINESTAT_STOP 26
-#define EVENT_TYPE_PERFCOUNTER_SAMPLE 0x1B
-#define EVENT_TYPE_SAMPLE_PIPELINESTAT 30
-#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
-#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
-#define EVENT_TYPE_BOTTOM_OF_PIPE_TS 40
-#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
-#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
-#define EVENT_TYPE(x) ((x) << 0)
-#define EVENT_INDEX(x) ((x) << 8)
- /* 0 - any non-TS event
- * 1 - ZPASS_DONE
- * 2 - SAMPLE_PIPELINESTAT
- * 3 - SAMPLE_STREAMOUTSTAT*
- * 4 - *S_PARTIAL_FLUSH
- * 5 - TS events
- */
-
-#define PREDICATION_OP_CLEAR 0x0
-#define PREDICATION_OP_ZPASS 0x1
-#define PREDICATION_OP_PRIMCOUNT 0x2
-#define PRED_OP(x) ((x) << 16)
-#define PREDICATION_CONTINUE (1 << 31)
-#define PREDICATION_HINT_WAIT (0 << 12)
-#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
-#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
-#define PREDICATION_DRAW_VISIBLE (1 << 8)
-
-/* R600-R700*/
-#define R_008490_CP_STRMOUT_CNTL 0x008490
-#define S_008490_OFFSET_UPDATE_DONE(x) (((unsigned)(x) & 0x1) << 0)
-#define R_028AB0_VGT_STRMOUT_EN 0x028AB0
-#define S_028AB0_STREAMOUT(x) (((unsigned)(x) & 0x1) << 0)
-#define G_028AB0_STREAMOUT(x) (((x) >> 0) & 0x1)
-#define C_028AB0_STREAMOUT 0xFFFFFFFE
-#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20
-#define S_028B20_BUFFER_0_EN(x) (((unsigned)(x) & 0x1) << 0)
-#define G_028B20_BUFFER_0_EN(x) (((x) >> 0) & 0x1)
-#define C_028B20_BUFFER_0_EN 0xFFFFFFFE
-#define S_028B20_BUFFER_1_EN(x) (((unsigned)(x) & 0x1) << 1)
-#define G_028B20_BUFFER_1_EN(x) (((x) >> 1) & 0x1)
-#define C_028B20_BUFFER_1_EN 0xFFFFFFFD
-#define S_028B20_BUFFER_2_EN(x) (((unsigned)(x) & 0x1) << 2)
-#define G_028B20_BUFFER_2_EN(x) (((x) >> 2) & 0x1)
-#define C_028B20_BUFFER_2_EN 0xFFFFFFFB
-#define S_028B20_BUFFER_3_EN(x) (((unsigned)(x) & 0x1) << 3)
-#define G_028B20_BUFFER_3_EN(x) (((x) >> 3) & 0x1)
-#define C_028B20_BUFFER_3_EN 0xFFFFFFF7
-#define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 0x028AD0
-
-#define V_0280A0_SWAP_STD 0x00000000
-#define V_0280A0_SWAP_ALT 0x00000001
-#define V_0280A0_SWAP_STD_REV 0x00000002
-#define V_0280A0_SWAP_ALT_REV 0x00000003
-
-/* EG+ */
-#define R_0084FC_CP_STRMOUT_CNTL 0x0084FC
-#define S_0084FC_OFFSET_UPDATE_DONE(x) (((unsigned)(x) & 0x1) << 0)
-#define R_028B94_VGT_STRMOUT_CONFIG 0x028B94
-#define S_028B94_STREAMOUT_0_EN(x) (((unsigned)(x) & 0x1) << 0)
-#define G_028B94_STREAMOUT_0_EN(x) (((x) >> 0) & 0x1)
-#define C_028B94_STREAMOUT_0_EN 0xFFFFFFFE
-#define S_028B94_STREAMOUT_1_EN(x) (((unsigned)(x) & 0x1) << 1)
-#define G_028B94_STREAMOUT_1_EN(x) (((x) >> 1) & 0x1)
-#define C_028B94_STREAMOUT_1_EN 0xFFFFFFFD
-#define S_028B94_STREAMOUT_2_EN(x) (((unsigned)(x) & 0x1) << 2)
-#define G_028B94_STREAMOUT_2_EN(x) (((x) >> 2) & 0x1)
-#define C_028B94_STREAMOUT_2_EN 0xFFFFFFFB
-#define S_028B94_STREAMOUT_3_EN(x) (((unsigned)(x) & 0x1) << 3)
-#define G_028B94_STREAMOUT_3_EN(x) (((x) >> 3) & 0x1)
-#define C_028B94_STREAMOUT_3_EN 0xFFFFFFF7
-#define S_028B94_RAST_STREAM(x) (((unsigned)(x) & 0x07) << 4)
-#define G_028B94_RAST_STREAM(x) (((x) >> 4) & 0x07)
-#define C_028B94_RAST_STREAM 0xFFFFFF8F
-#define S_028B94_RAST_STREAM_MASK(x) (((unsigned)(x) & 0x0F) << 8) /* SI+ */
-#define G_028B94_RAST_STREAM_MASK(x) (((x) >> 8) & 0x0F)
-#define C_028B94_RAST_STREAM_MASK 0xFFFFF0FF
-#define S_028B94_USE_RAST_STREAM_MASK(x) (((unsigned)(x) & 0x1) << 31) /* SI+ */
-#define G_028B94_USE_RAST_STREAM_MASK(x) (((x) >> 31) & 0x1)
-#define C_028B94_USE_RAST_STREAM_MASK 0x7FFFFFFF
-#define R_028B98_VGT_STRMOUT_BUFFER_CONFIG 0x028B98
-#define S_028B98_STREAM_0_BUFFER_EN(x) (((unsigned)(x) & 0x0F) << 0)
-#define G_028B98_STREAM_0_BUFFER_EN(x) (((x) >> 0) & 0x0F)
-#define C_028B98_STREAM_0_BUFFER_EN 0xFFFFFFF0
-#define S_028B98_STREAM_1_BUFFER_EN(x) (((unsigned)(x) & 0x0F) << 4)
-#define G_028B98_STREAM_1_BUFFER_EN(x) (((x) >> 4) & 0x0F)
-#define C_028B98_STREAM_1_BUFFER_EN 0xFFFFFF0F
-#define S_028B98_STREAM_2_BUFFER_EN(x) (((unsigned)(x) & 0x0F) << 8)
-#define G_028B98_STREAM_2_BUFFER_EN(x) (((x) >> 8) & 0x0F)
-#define C_028B98_STREAM_2_BUFFER_EN 0xFFFFF0FF
-#define S_028B98_STREAM_3_BUFFER_EN(x) (((unsigned)(x) & 0x0F) << 12)
-#define G_028B98_STREAM_3_BUFFER_EN(x) (((x) >> 12) & 0x0F)
-#define C_028B98_STREAM_3_BUFFER_EN 0xFFFF0FFF
-
-#define EG_R_028A4C_PA_SC_MODE_CNTL_1 0x028A4C
-#define EG_S_028A4C_PS_ITER_SAMPLE(x) (((unsigned)(x) & 0x1) << 16)
-#define EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((unsigned)(x) & 0x1) << 25)
-#define EG_S_028A4C_FORCE_EOV_REZ_ENABLE(x) (((unsigned)(x) & 0x1) << 26)
-
-#define CM_R_028804_DB_EQAA 0x00028804
-#define S_028804_MAX_ANCHOR_SAMPLES(x) (((unsigned)(x) & 0x07) << 0)
-#define G_028804_MAX_ANCHOR_SAMPLES(x) (((x) >> 0) & 0x07)
-#define C_028804_MAX_ANCHOR_SAMPLES 0xFFFFFFF8
-#define S_028804_PS_ITER_SAMPLES(x) (((unsigned)(x) & 0x07) << 4)
-#define G_028804_PS_ITER_SAMPLES(x) (((x) >> 4) & 0x07)
-#define C_028804_PS_ITER_SAMPLES 0xFFFFFF8F
-#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((unsigned)(x) & 0x07) << 8)
-#define G_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) >> 8) & 0x07)
-#define C_028804_MASK_EXPORT_NUM_SAMPLES 0xFFFFF8FF
-#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((unsigned)(x) & 0x07) << 12)
-#define G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) >> 12) & 0x07)
-#define C_028804_ALPHA_TO_MASK_NUM_SAMPLES 0xFFFF8FFF
-#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((unsigned)(x) & 0x1) << 16)
-#define G_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) >> 16) & 0x1)
-#define C_028804_HIGH_QUALITY_INTERSECTIONS 0xFFFEFFFF
-#define S_028804_INCOHERENT_EQAA_READS(x) (((unsigned)(x) & 0x1) << 17)
-#define G_028804_INCOHERENT_EQAA_READS(x) (((x) >> 17) & 0x1)
-#define C_028804_INCOHERENT_EQAA_READS 0xFFFDFFFF
-#define S_028804_INTERPOLATE_COMP_Z(x) (((unsigned)(x) & 0x1) << 18)
-#define G_028804_INTERPOLATE_COMP_Z(x) (((x) >> 18) & 0x1)
-#define C_028804_INTERPOLATE_COMP_Z 0xFFFBFFFF
-#define S_028804_INTERPOLATE_SRC_Z(x) (((unsigned)(x) & 0x1) << 19)
-#define G_028804_INTERPOLATE_SRC_Z(x) (((x) >> 19) & 0x1)
-#define C_028804_INTERPOLATE_SRC_Z 0xFFF7FFFF
-#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((unsigned)(x) & 0x1) << 20)
-#define G_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) >> 20) & 0x1)
-#define C_028804_STATIC_ANCHOR_ASSOCIATIONS 0xFFEFFFFF
-#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((unsigned)(x) & 0x1) << 21)
-#define G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) >> 21) & 0x1)
-#define C_028804_ALPHA_TO_MASK_EQAA_DISABLE 0xFFDFFFFF
-#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((unsigned)(x) & 0x07) << 24)
-#define G_028804_OVERRASTERIZATION_AMOUNT(x) (((x) >> 24) & 0x07)
-#define C_028804_OVERRASTERIZATION_AMOUNT 0xF8FFFFFF
-#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((unsigned)(x) & 0x1) << 27)
-#define G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) >> 27) & 0x1)
-#define C_028804_ENABLE_POSTZ_OVERRASTERIZATION 0xF7FFFFFF
-#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc
-#define S_028BDC_EXPAND_LINE_WIDTH(x) (((unsigned)(x) & 0x1) << 9)
-#define G_028BDC_EXPAND_LINE_WIDTH(x) (((x) >> 9) & 0x1)
-#define C_028BDC_EXPAND_LINE_WIDTH 0xFFFFFDFF
-#define S_028BDC_LAST_PIXEL(x) (((unsigned)(x) & 0x1) << 10)
-#define G_028BDC_LAST_PIXEL(x) (((x) >> 10) & 0x1)
-#define C_028BDC_LAST_PIXEL 0xFFFFFBFF
-#define S_028BDC_PERPENDICULAR_ENDCAP_ENA(x) (((unsigned)(x) & 0x1) << 11)
-#define G_028BDC_PERPENDICULAR_ENDCAP_ENA(x) (((x) >> 11) & 0x1)
-#define C_028BDC_PERPENDICULAR_ENDCAP_ENA 0xFFFFF7FF
-#define S_028BDC_DX10_DIAMOND_TEST_ENA(x) (((unsigned)(x) & 0x1) << 12)
-#define G_028BDC_DX10_DIAMOND_TEST_ENA(x) (((x) >> 12) & 0x1)
-#define C_028BDC_DX10_DIAMOND_TEST_ENA 0xFFFFEFFF
-#define CM_R_028BE0_PA_SC_AA_CONFIG 0x28be0
-#define S_028BE0_MSAA_NUM_SAMPLES(x) (((unsigned)(x) & 0x07) << 0)
-#define G_028BE0_MSAA_NUM_SAMPLES(x) (((x) >> 0) & 0x07)
-#define C_028BE0_MSAA_NUM_SAMPLES 0xFFFFFFF8
-#define S_028BE0_AA_MASK_CENTROID_DTMN(x) (((unsigned)(x) & 0x1) << 4)
-#define G_028BE0_AA_MASK_CENTROID_DTMN(x) (((x) >> 4) & 0x1)
-#define C_028BE0_AA_MASK_CENTROID_DTMN 0xFFFFFFEF
-#define S_028BE0_MAX_SAMPLE_DIST(x) (((unsigned)(x) & 0x0F) << 13)
-#define G_028BE0_MAX_SAMPLE_DIST(x) (((x) >> 13) & 0x0F)
-#define C_028BE0_MAX_SAMPLE_DIST 0xFFFE1FFF
-#define S_028BE0_MSAA_EXPOSED_SAMPLES(x) (((unsigned)(x) & 0x07) << 20)
-#define G_028BE0_MSAA_EXPOSED_SAMPLES(x) (((x) >> 20) & 0x07)
-#define C_028BE0_MSAA_EXPOSED_SAMPLES 0xFF8FFFFF
-#define S_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((unsigned)(x) & 0x03) << 24)
-#define G_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((x) >> 24) & 0x03)
-#define C_028BE0_DETAIL_TO_EXPOSED_MODE 0xFCFFFFFF
-#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8
-#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08
-#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18
-#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28
-
-#define EG_S_028C70_FAST_CLEAR(x) (((unsigned)(x) & 0x1) << 17)
-#define SI_S_028C70_FAST_CLEAR(x) (((unsigned)(x) & 0x1) << 13)
-
-/*CIK+*/
-#define R_0300FC_CP_STRMOUT_CNTL 0x0300FC
-
-#define R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C
-#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8
-#define R_02843C_PA_CL_VPORT_XSCALE 0x02843C
-
-#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250
-#define S_028250_TL_X(x) (((unsigned)(x) & 0x7FFF) << 0)
-#define G_028250_TL_X(x) (((x) >> 0) & 0x7FFF)
-#define C_028250_TL_X 0xFFFF8000
-#define S_028250_TL_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
-#define G_028250_TL_Y(x) (((x) >> 16) & 0x7FFF)
-#define C_028250_TL_Y 0x8000FFFF
-#define S_028250_WINDOW_OFFSET_DISABLE(x) (((unsigned)(x) & 0x1) << 31)
-#define G_028250_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1)
-#define C_028250_WINDOW_OFFSET_DISABLE 0x7FFFFFFF
-#define S_028254_BR_X(x) (((unsigned)(x) & 0x7FFF) << 0)
-#define G_028254_BR_X(x) (((x) >> 0) & 0x7FFF)
-#define C_028254_BR_X 0xFFFF8000
-#define S_028254_BR_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
-#define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF)
-#define C_028254_BR_Y 0x8000FFFF
-#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0
-#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4
-
-#endif
diff -Nru mesa-17.2.4/src/amd/common/sid.h mesa-17.3.3/src/amd/common/sid.h
--- mesa-17.2.4/src/amd/common/sid.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/sid.h 2018-01-18 21:30:28.000000000 +0000
@@ -113,6 +113,13 @@
#define PKT3_INDIRECT_BUFFER_SI 0x32 /* not on CIK */
#define PKT3_INDIRECT_BUFFER_CONST 0x33
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
+#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
+#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x) & 0x3) << 1)
+#define STRMOUT_OFFSET_FROM_PACKET 0
+#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
+#define STRMOUT_OFFSET_FROM_MEM 2
+#define STRMOUT_OFFSET_NONE 3
+#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x) & 0x3) << 8)
#define PKT3_DRAW_INDEX_OFFSET_2 0x35
#define PKT3_WRITE_DATA 0x37
#define R_370_CONTROL 0x370 /* 0x[packet number][word index] */
@@ -137,6 +144,7 @@
#define PKT3_MPEG_INDEX 0x3A /* not on CIK */
#define PKT3_WAIT_REG_MEM 0x3C
#define WAIT_REG_MEM_EQUAL 3
+#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4)
#define PKT3_MEM_WRITE 0x3D /* not on CIK */
#define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */
#define R_3F0_IB_BASE_LO 0x3F0
@@ -156,6 +164,7 @@
#define COPY_DATA_IMM 5
#define COPY_DATA_TIMESTAMP 9
#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8)
+#define COPY_DATA_MEM_ASYNC 5
#define COPY_DATA_COUNT_SEL (1 << 16)
#define COPY_DATA_WR_CONFIRM (1 << 20)
#define PKT3_PFP_SYNC_ME 0x42
@@ -164,6 +173,14 @@
#define PKT3_COND_WRITE 0x45
#define PKT3_EVENT_WRITE 0x46
#define PKT3_EVENT_WRITE_EOP 0x47 /* not on GFX9 */
+#define EOP_INT_SEL(x) ((x) << 24)
+#define EOP_INT_SEL_NONE 0
+#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
+#define EOP_DATA_SEL(x) ((x) << 29)
+#define EOP_DATA_SEL_DISCARD 0
+#define EOP_DATA_SEL_VALUE_32BIT 1
+#define EOP_DATA_SEL_VALUE_64BIT 2
+#define EOP_DATA_SEL_TIMESTAMP 3
/* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
* are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
* DST_SEL=MC. Only CIK chips are affected.
diff -Nru mesa-17.2.4/src/amd/common/sid_tables.h mesa-17.3.3/src/amd/common/sid_tables.h
--- mesa-17.2.4/src/amd/common/sid_tables.h 2017-10-30 14:50:50.000000000 +0000
+++ mesa-17.3.3/src/amd/common/sid_tables.h 2018-01-18 21:31:07.000000000 +0000
@@ -105,4406 +105,5247 @@
{818, PKT3_DMA_DATA},
};
+static const struct si_reg sid_reg_table[] = {
+ {827, R_2C3_DRAW_INDEX_LOC, 2, 0},
+ {195, R_370_CONTROL, 4, 2},
+ {842, R_371_DST_ADDR_LO},
+ {854, R_372_DST_ADDR_HI},
+ {866, R_3F0_IB_BASE_LO},
+ {877, R_3F1_IB_BASE_HI},
+ {195, R_3F2_CONTROL, 3, 6},
+ {888, R_410_CP_DMA_WORD0, 1, 9},
+ {901, R_411_CP_DMA_WORD1, 5, 10},
+ {914, R_412_CP_DMA_WORD2, 1, 15},
+ {927, R_413_CP_DMA_WORD3, 1, 16},
+ {940, R_414_COMMAND, 11, 17},
+ {948, R_500_DMA_DATA_WORD0, 4, 28},
+ {963, R_501_SRC_ADDR_LO},
+ {975, R_502_SRC_ADDR_HI},
+ {842, R_503_DST_ADDR_LO},
+ {854, R_504_DST_ADDR_HI},
+ {987, R_000E4C_SRBM_STATUS2, 21, 32},
+ {1000, R_000E50_SRBM_STATUS, 20, 53},
+ {1012, R_000E54_SRBM_STATUS3, 16, 73},
+ {1025, R_00D034_SDMA0_STATUS_REG, 29, 89},
+ {1042, R_00D834_SDMA1_STATUS_REG, 29, 89},
+ {1059, R_008008_GRBM_STATUS2, 18, 118},
+ {1072, R_008010_GRBM_STATUS, 24, 136},
+ {1084, R_0084FC_CP_STRMOUT_CNTL, 1, 160},
+ {1100, R_0085F0_CP_COHER_CNTL, 19, 161},
+ {1114, R_0085F4_CP_COHER_SIZE},
+ {1128, R_0085F8_CP_COHER_BASE},
+ {1142, R_008014_GRBM_STATUS_SE0, 11, 180},
+ {1158, R_008018_GRBM_STATUS_SE1, 11, 191},
+ {1174, R_008038_GRBM_STATUS_SE2, 11, 202},
+ {1190, R_00803C_GRBM_STATUS_SE3, 11, 213},
+ {1084, R_0300FC_CP_STRMOUT_CNTL, 1, 224},
+ {1206, R_0301E4_CP_COHER_BASE_HI, 1, 225},
+ {1223, R_0301EC_CP_COHER_START_DELAY, 1, 226},
+ {1100, R_0301F0_CP_COHER_CNTL, 27, 227},
+ {1114, R_0301F4_CP_COHER_SIZE},
+ {1128, R_0301F8_CP_COHER_BASE},
+ {1244, R_0301FC_CP_COHER_STATUS, 4, 254},
+ {1260, R_008210_CP_CPC_STATUS, 15, 258},
+ {1274, R_008214_CP_CPC_BUSY_STAT, 28, 273},
+ {1291, R_008218_CP_CPC_STALLED_STAT1, 14, 301},
+ {1312, R_00821C_CP_CPF_STATUS, 21, 315},
+ {1326, R_008220_CP_CPF_BUSY_STAT, 31, 336},
+ {1343, R_008224_CP_CPF_STALLED_STAT1, 9, 367},
+ {1364, R_030230_CP_COHER_SIZE_HI, 1, 376},
+ {1381, R_0088B0_VGT_VTX_VECT_EJECT_REG, 1, 377},
+ {1404, R_0088C4_VGT_CACHE_INVALIDATION, 3, 378},
+ {1427, R_0088C8_VGT_ESGS_RING_SIZE},
+ {1446, R_0088CC_VGT_GSVS_RING_SIZE},
+ {1465, R_0088D4_VGT_GS_VERTEX_REUSE, 1, 381},
+ {1485, R_008958_VGT_PRIMITIVE_TYPE, 1, 382},
+ {1504, R_00895C_VGT_INDEX_TYPE, 1, 383},
+ {1519, R_008960_VGT_STRMOUT_BUFFER_FILLED_SIZE_0},
+ {1552, R_008964_VGT_STRMOUT_BUFFER_FILLED_SIZE_1},
+ {1585, R_008968_VGT_STRMOUT_BUFFER_FILLED_SIZE_2},
+ {1618, R_00896C_VGT_STRMOUT_BUFFER_FILLED_SIZE_3},
+ {1651, R_008970_VGT_NUM_INDICES},
+ {1667, R_008974_VGT_NUM_INSTANCES},
+ {1685, R_008988_VGT_TF_RING_SIZE, 1, 384},
+ {1702, R_0089B0_VGT_HS_OFFCHIP_PARAM, 1, 385},
+ {1723, R_0089B8_VGT_TF_MEMORY_BASE},
+ {1742, R_008A14_PA_CL_ENHANCE, 4, 386},
+ {1756, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 1, 390},
+ {1781, R_008B10_PA_SC_LINE_STIPPLE_STATE, 2, 391},
+ {1806, R_008670_CP_STALLED_STAT3, 19, 393},
+ {1823, R_008674_CP_STALLED_STAT1, 16, 412},
+ {1840, R_008678_CP_STALLED_STAT2, 29, 428},
+ {1857, R_008680_CP_STAT, 23, 457},
+ {1865, R_030800_GRBM_GFX_INDEX, 6, 480},
+ {1427, R_030900_VGT_ESGS_RING_SIZE},
+ {1446, R_030904_VGT_GSVS_RING_SIZE},
+ {1485, R_030908_VGT_PRIMITIVE_TYPE, 1, 486},
+ {1504, R_03090C_VGT_INDEX_TYPE, 1, 487},
+ {1519, R_030910_VGT_STRMOUT_BUFFER_FILLED_SIZE_0},
+ {1552, R_030914_VGT_STRMOUT_BUFFER_FILLED_SIZE_1},
+ {1585, R_030918_VGT_STRMOUT_BUFFER_FILLED_SIZE_2},
+ {1618, R_03091C_VGT_STRMOUT_BUFFER_FILLED_SIZE_3},
+ {1651, R_030930_VGT_NUM_INDICES},
+ {1667, R_030934_VGT_NUM_INSTANCES},
+ {1685, R_030938_VGT_TF_RING_SIZE, 1, 488},
+ {1702, R_03093C_VGT_HS_OFFCHIP_PARAM, 2, 489},
+ {1723, R_030940_VGT_TF_MEMORY_BASE},
+ {1756, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 1, 491},
+ {1781, R_030A04_PA_SC_LINE_STIPPLE_STATE, 2, 492},
+ {1880, R_030A10_PA_SC_SCREEN_EXTENT_MIN_0, 2, 494},
+ {1906, R_030A14_PA_SC_SCREEN_EXTENT_MAX_0, 2, 496},
+ {1932, R_030A18_PA_SC_SCREEN_EXTENT_MIN_1, 2, 498},
+ {1958, R_030A2C_PA_SC_SCREEN_EXTENT_MAX_1, 2, 500},
+ {1984, R_008BF0_PA_SC_ENHANCE, 9, 502},
+ {1998, R_008C08_SQC_CACHES, 2, 511},
+ {1998, R_030D20_SQC_CACHES, 3, 513},
+ {2009, R_008C0C_SQ_RANDOM_WAVE_PRI, 3, 516},
+ {2028, R_008DFC_SQ_EXP_0, 6, 519},
+ {2037, R_030E00_TA_CS_BC_BASE_ADDR},
+ {2056, R_030E04_TA_CS_BC_BASE_ADDR_HI, 1, 525},
+ {2078, R_030F00_DB_OCCLUSION_COUNT0_LOW},
+ {2102, R_008F00_SQ_BUF_RSRC_WORD0},
+ {2120, R_030F04_DB_OCCLUSION_COUNT0_HI, 1, 526},
+ {2143, R_008F04_SQ_BUF_RSRC_WORD1, 4, 527},
+ {2161, R_030F08_DB_OCCLUSION_COUNT1_LOW},
+ {2185, R_008F08_SQ_BUF_RSRC_WORD2},
+ {2203, R_030F0C_DB_OCCLUSION_COUNT1_HI, 1, 531},
+ {2226, R_008F0C_SQ_BUF_RSRC_WORD3, 14, 532},
+ {2244, R_030F10_DB_OCCLUSION_COUNT2_LOW},
+ {2268, R_008F10_SQ_IMG_RSRC_WORD0},
+ {2286, R_030F14_DB_OCCLUSION_COUNT2_HI, 1, 546},
+ {2309, R_008F14_SQ_IMG_RSRC_WORD1, 5, 547},
+ {2327, R_030F18_DB_OCCLUSION_COUNT3_LOW},
+ {2351, R_008F18_SQ_IMG_RSRC_WORD2, 4, 552},
+ {2369, R_030F1C_DB_OCCLUSION_COUNT3_HI, 1, 556},
+ {2392, R_008F1C_SQ_IMG_RSRC_WORD3, 11, 557},
+ {2410, R_008F20_SQ_IMG_RSRC_WORD4, 2, 568},
+ {2428, R_008F24_SQ_IMG_RSRC_WORD5, 2, 570},
+ {2446, R_008F28_SQ_IMG_RSRC_WORD6, 8, 572},
+ {2464, R_008F2C_SQ_IMG_RSRC_WORD7},
+ {2482, R_008F30_SQ_IMG_SAMP_WORD0, 14, 580},
+ {2500, R_008F34_SQ_IMG_SAMP_WORD1, 4, 594},
+ {2518, R_008F38_SQ_IMG_SAMP_WORD2, 10, 598},
+ {2536, R_008F3C_SQ_IMG_SAMP_WORD3, 3, 608},
+ {2554, R_0090DC_SPI_DYN_GPR_LOCK_EN, 5, 611},
+ {2574, R_0090E0_SPI_STATIC_THREAD_MGMT_1, 2, 616},
+ {2599, R_0090E4_SPI_STATIC_THREAD_MGMT_2, 2, 618},
+ {2624, R_0090E8_SPI_STATIC_THREAD_MGMT_3, 1, 620},
+ {2649, R_0090EC_SPI_PS_MAX_WAVE_ID, 1, 621},
+ {2649, R_0090E8_SPI_PS_MAX_WAVE_ID, 1, 622},
+ {2668, R_0090F0_SPI_ARB_PRIORITY, 3, 623},
+ {2668, R_00C700_SPI_ARB_PRIORITY, 8, 626},
+ {2685, R_0090F4_SPI_ARB_CYCLES_0, 2, 634},
+ {2702, R_0090F8_SPI_ARB_CYCLES_1, 1, 636},
+ {2719, R_008F40_SQ_FLAT_SCRATCH_WORD0, 1, 637},
+ {2741, R_008F44_SQ_FLAT_SCRATCH_WORD1, 1, 638},
+ {2763, R_030FF8_DB_ZPASS_COUNT_LOW},
+ {2782, R_030FFC_DB_ZPASS_COUNT_HI, 1, 639},
+ {2800, R_009100_SPI_CONFIG_CNTL, 5, 640},
+ {2816, R_00913C_SPI_CONFIG_CNTL_1, 5, 645},
+ {2834, R_00936C_SPI_RESOURCE_RESERVE_CU_AB_0, 12, 650},
+ {2037, R_00950C_TA_CS_BC_BASE_ADDR},
+ {2863, R_009858_DB_SUBTILE_CONTROL, 10, 662},
+ {2882, R_0098F8_GB_ADDR_CONFIG, 9, 672},
+ {2897, R_009910_GB_TILE_MODE0, 10, 681},
+ {2911, R_009914_GB_TILE_MODE1, 10, 681},
+ {2925, R_009918_GB_TILE_MODE2, 10, 681},
+ {2939, R_00991C_GB_TILE_MODE3, 10, 681},
+ {2953, R_009920_GB_TILE_MODE4, 10, 681},
+ {2967, R_009924_GB_TILE_MODE5, 10, 681},
+ {2981, R_009928_GB_TILE_MODE6, 10, 681},
+ {2995, R_00992C_GB_TILE_MODE7, 10, 681},
+ {3009, R_009930_GB_TILE_MODE8, 10, 681},
+ {3023, R_009934_GB_TILE_MODE9, 10, 681},
+ {3037, R_009938_GB_TILE_MODE10, 10, 681},
+ {3052, R_00993C_GB_TILE_MODE11, 10, 681},
+ {3067, R_009940_GB_TILE_MODE12, 10, 681},
+ {3082, R_009944_GB_TILE_MODE13, 10, 681},
+ {3097, R_009948_GB_TILE_MODE14, 10, 681},
+ {3112, R_00994C_GB_TILE_MODE15, 10, 681},
+ {3127, R_009950_GB_TILE_MODE16, 10, 681},
+ {3142, R_009954_GB_TILE_MODE17, 10, 681},
+ {3157, R_009958_GB_TILE_MODE18, 10, 681},
+ {3172, R_00995C_GB_TILE_MODE19, 10, 681},
+ {3187, R_009960_GB_TILE_MODE20, 10, 681},
+ {3202, R_009964_GB_TILE_MODE21, 10, 681},
+ {3217, R_009968_GB_TILE_MODE22, 10, 681},
+ {3232, R_00996C_GB_TILE_MODE23, 10, 681},
+ {3247, R_009970_GB_TILE_MODE24, 10, 681},
+ {3262, R_009974_GB_TILE_MODE25, 10, 681},
+ {3277, R_009978_GB_TILE_MODE26, 10, 681},
+ {3292, R_00997C_GB_TILE_MODE27, 10, 681},
+ {3307, R_009980_GB_TILE_MODE28, 10, 681},
+ {3322, R_009984_GB_TILE_MODE29, 10, 681},
+ {3337, R_009988_GB_TILE_MODE30, 10, 681},
+ {3352, R_00998C_GB_TILE_MODE31, 10, 681},
+ {3367, R_009990_GB_MACROTILE_MODE0, 4, 691},
+ {3386, R_009994_GB_MACROTILE_MODE1, 4, 691},
+ {3405, R_009998_GB_MACROTILE_MODE2, 4, 691},
+ {3424, R_00999C_GB_MACROTILE_MODE3, 4, 691},
+ {3443, R_0099A0_GB_MACROTILE_MODE4, 4, 691},
+ {3462, R_0099A4_GB_MACROTILE_MODE5, 4, 691},
+ {3481, R_0099A8_GB_MACROTILE_MODE6, 4, 691},
+ {3500, R_0099AC_GB_MACROTILE_MODE7, 4, 691},
+ {3519, R_0099B0_GB_MACROTILE_MODE8, 4, 691},
+ {3538, R_0099B4_GB_MACROTILE_MODE9, 4, 691},
+ {3557, R_0099B8_GB_MACROTILE_MODE10, 4, 691},
+ {3577, R_0099BC_GB_MACROTILE_MODE11, 4, 691},
+ {3597, R_0099C0_GB_MACROTILE_MODE12, 4, 691},
+ {3617, R_0099C4_GB_MACROTILE_MODE13, 4, 691},
+ {3637, R_0099C8_GB_MACROTILE_MODE14, 4, 691},
+ {3657, R_0099CC_GB_MACROTILE_MODE15, 4, 691},
+ {3677, R_00B000_SPI_SHADER_TBA_LO_PS},
+ {3698, R_00B004_SPI_SHADER_TBA_HI_PS, 1, 695},
+ {3719, R_00B008_SPI_SHADER_TMA_LO_PS},
+ {3740, R_00B00C_SPI_SHADER_TMA_HI_PS, 1, 696},
+ {3761, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3, 697},
+ {3785, R_00B020_SPI_SHADER_PGM_LO_PS},
+ {3806, R_00B024_SPI_SHADER_PGM_HI_PS, 1, 700},
+ {3827, R_00B028_SPI_SHADER_PGM_RSRC1_PS, 11, 701},
+ {3851, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 7, 712},
+ {3875, R_00B030_SPI_SHADER_USER_DATA_PS_0},
+ {3901, R_00B034_SPI_SHADER_USER_DATA_PS_1},
+ {3927, R_00B038_SPI_SHADER_USER_DATA_PS_2},
+ {3953, R_00B03C_SPI_SHADER_USER_DATA_PS_3},
+ {3979, R_00B040_SPI_SHADER_USER_DATA_PS_4},
+ {4005, R_00B044_SPI_SHADER_USER_DATA_PS_5},
+ {4031, R_00B048_SPI_SHADER_USER_DATA_PS_6},
+ {4057, R_00B04C_SPI_SHADER_USER_DATA_PS_7},
+ {4083, R_00B050_SPI_SHADER_USER_DATA_PS_8},
+ {4109, R_00B054_SPI_SHADER_USER_DATA_PS_9},
+ {4135, R_00B058_SPI_SHADER_USER_DATA_PS_10},
+ {4162, R_00B05C_SPI_SHADER_USER_DATA_PS_11},
+ {4189, R_00B060_SPI_SHADER_USER_DATA_PS_12},
+ {4216, R_00B064_SPI_SHADER_USER_DATA_PS_13},
+ {4243, R_00B068_SPI_SHADER_USER_DATA_PS_14},
+ {4270, R_00B06C_SPI_SHADER_USER_DATA_PS_15},
+ {4297, R_00B100_SPI_SHADER_TBA_LO_VS},
+ {4318, R_00B104_SPI_SHADER_TBA_HI_VS, 1, 719},
+ {4339, R_00B108_SPI_SHADER_TMA_LO_VS},
+ {4360, R_00B10C_SPI_SHADER_TMA_HI_VS, 1, 720},
+ {4381, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, 721},
+ {4405, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 1, 724},
+ {4430, R_00B120_SPI_SHADER_PGM_LO_VS},
+ {4451, R_00B124_SPI_SHADER_PGM_HI_VS, 1, 725},
+ {4472, R_00B128_SPI_SHADER_PGM_RSRC1_VS, 12, 726},
+ {4496, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, 12, 738},
+ {4520, R_00B130_SPI_SHADER_USER_DATA_VS_0},
+ {4546, R_00B134_SPI_SHADER_USER_DATA_VS_1},
+ {4572, R_00B138_SPI_SHADER_USER_DATA_VS_2},
+ {4598, R_00B13C_SPI_SHADER_USER_DATA_VS_3},
+ {4624, R_00B140_SPI_SHADER_USER_DATA_VS_4},
+ {4650, R_00B144_SPI_SHADER_USER_DATA_VS_5},
+ {4676, R_00B148_SPI_SHADER_USER_DATA_VS_6},
+ {4702, R_00B14C_SPI_SHADER_USER_DATA_VS_7},
+ {4728, R_00B150_SPI_SHADER_USER_DATA_VS_8},
+ {4754, R_00B154_SPI_SHADER_USER_DATA_VS_9},
+ {4780, R_00B158_SPI_SHADER_USER_DATA_VS_10},
+ {4807, R_00B15C_SPI_SHADER_USER_DATA_VS_11},
+ {4834, R_00B160_SPI_SHADER_USER_DATA_VS_12},
+ {4861, R_00B164_SPI_SHADER_USER_DATA_VS_13},
+ {4888, R_00B168_SPI_SHADER_USER_DATA_VS_14},
+ {4915, R_00B16C_SPI_SHADER_USER_DATA_VS_15},
+ {4942, R_00B200_SPI_SHADER_TBA_LO_GS},
+ {4963, R_00B204_SPI_SHADER_TBA_HI_GS, 1, 750},
+ {4984, R_00B208_SPI_SHADER_TMA_LO_GS},
+ {5005, R_00B20C_SPI_SHADER_TMA_HI_GS, 1, 751},
+ {5026, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 4, 752},
+ {5050, R_00B220_SPI_SHADER_PGM_LO_GS},
+ {5071, R_00B224_SPI_SHADER_PGM_HI_GS, 1, 756},
+ {5092, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 11, 757},
+ {5116, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, 5, 768},
+ {5140, R_00B230_SPI_SHADER_USER_DATA_GS_0},
+ {5166, R_00B234_SPI_SHADER_USER_DATA_GS_1},
+ {5192, R_00B238_SPI_SHADER_USER_DATA_GS_2},
+ {5218, R_00B23C_SPI_SHADER_USER_DATA_GS_3},
+ {5244, R_00B240_SPI_SHADER_USER_DATA_GS_4},
+ {5270, R_00B244_SPI_SHADER_USER_DATA_GS_5},
+ {5296, R_00B248_SPI_SHADER_USER_DATA_GS_6},
+ {5322, R_00B24C_SPI_SHADER_USER_DATA_GS_7},
+ {5348, R_00B250_SPI_SHADER_USER_DATA_GS_8},
+ {5374, R_00B254_SPI_SHADER_USER_DATA_GS_9},
+ {5400, R_00B258_SPI_SHADER_USER_DATA_GS_10},
+ {5427, R_00B25C_SPI_SHADER_USER_DATA_GS_11},
+ {5454, R_00B260_SPI_SHADER_USER_DATA_GS_12},
+ {5481, R_00B264_SPI_SHADER_USER_DATA_GS_13},
+ {5508, R_00B268_SPI_SHADER_USER_DATA_GS_14},
+ {5535, R_00B26C_SPI_SHADER_USER_DATA_GS_15},
+ {5562, R_00B300_SPI_SHADER_TBA_LO_ES},
+ {5583, R_00B304_SPI_SHADER_TBA_HI_ES, 1, 773},
+ {5604, R_00B308_SPI_SHADER_TMA_LO_ES},
+ {5625, R_00B30C_SPI_SHADER_TMA_HI_ES, 1, 774},
+ {5646, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 4, 775},
+ {5670, R_00B320_SPI_SHADER_PGM_LO_ES},
+ {5691, R_00B324_SPI_SHADER_PGM_HI_ES, 1, 779},
+ {5712, R_00B328_SPI_SHADER_PGM_RSRC1_ES, 12, 780},
+ {5736, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, 7, 792},
+ {5760, R_00B330_SPI_SHADER_USER_DATA_ES_0},
+ {5786, R_00B334_SPI_SHADER_USER_DATA_ES_1},
+ {5812, R_00B338_SPI_SHADER_USER_DATA_ES_2},
+ {5838, R_00B33C_SPI_SHADER_USER_DATA_ES_3},
+ {5864, R_00B340_SPI_SHADER_USER_DATA_ES_4},
+ {5890, R_00B344_SPI_SHADER_USER_DATA_ES_5},
+ {5916, R_00B348_SPI_SHADER_USER_DATA_ES_6},
+ {5942, R_00B34C_SPI_SHADER_USER_DATA_ES_7},
+ {5968, R_00B350_SPI_SHADER_USER_DATA_ES_8},
+ {5994, R_00B354_SPI_SHADER_USER_DATA_ES_9},
+ {6020, R_00B358_SPI_SHADER_USER_DATA_ES_10},
+ {6047, R_00B35C_SPI_SHADER_USER_DATA_ES_11},
+ {6074, R_00B360_SPI_SHADER_USER_DATA_ES_12},
+ {6101, R_00B364_SPI_SHADER_USER_DATA_ES_13},
+ {6128, R_00B368_SPI_SHADER_USER_DATA_ES_14},
+ {6155, R_00B36C_SPI_SHADER_USER_DATA_ES_15},
+ {6182, R_00B400_SPI_SHADER_TBA_LO_HS},
+ {6203, R_00B404_SPI_SHADER_TBA_HI_HS, 1, 799},
+ {6224, R_00B408_SPI_SHADER_TMA_LO_HS},
+ {6245, R_00B40C_SPI_SHADER_TMA_HI_HS, 1, 800},
+ {6266, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3, 801},
+ {6290, R_00B420_SPI_SHADER_PGM_LO_HS},
+ {6311, R_00B424_SPI_SHADER_PGM_HI_HS, 1, 804},
+ {6332, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 10, 805},
+ {6356, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, 7, 815},
+ {6380, R_00B430_SPI_SHADER_USER_DATA_HS_0},
+ {6406, R_00B434_SPI_SHADER_USER_DATA_HS_1},
+ {6432, R_00B438_SPI_SHADER_USER_DATA_HS_2},
+ {6458, R_00B43C_SPI_SHADER_USER_DATA_HS_3},
+ {6484, R_00B440_SPI_SHADER_USER_DATA_HS_4},
+ {6510, R_00B444_SPI_SHADER_USER_DATA_HS_5},
+ {6536, R_00B448_SPI_SHADER_USER_DATA_HS_6},
+ {6562, R_00B44C_SPI_SHADER_USER_DATA_HS_7},
+ {6588, R_00B450_SPI_SHADER_USER_DATA_HS_8},
+ {6614, R_00B454_SPI_SHADER_USER_DATA_HS_9},
+ {6640, R_00B458_SPI_SHADER_USER_DATA_HS_10},
+ {6667, R_00B45C_SPI_SHADER_USER_DATA_HS_11},
+ {6694, R_00B460_SPI_SHADER_USER_DATA_HS_12},
+ {6721, R_00B464_SPI_SHADER_USER_DATA_HS_13},
+ {6748, R_00B468_SPI_SHADER_USER_DATA_HS_14},
+ {6775, R_00B46C_SPI_SHADER_USER_DATA_HS_15},
+ {6802, R_00B500_SPI_SHADER_TBA_LO_LS},
+ {6823, R_00B504_SPI_SHADER_TBA_HI_LS, 1, 822},
+ {6844, R_00B508_SPI_SHADER_TMA_LO_LS},
+ {6865, R_00B50C_SPI_SHADER_TMA_HI_LS, 1, 823},
+ {6886, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 4, 824},
+ {6910, R_00B520_SPI_SHADER_PGM_LO_LS},
+ {6931, R_00B524_SPI_SHADER_PGM_HI_LS, 1, 828},
+ {6952, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 11, 829},
+ {6976, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, 6, 840},
+ {7000, R_00B530_SPI_SHADER_USER_DATA_LS_0},
+ {7026, R_00B534_SPI_SHADER_USER_DATA_LS_1},
+ {7052, R_00B538_SPI_SHADER_USER_DATA_LS_2},
+ {7078, R_00B53C_SPI_SHADER_USER_DATA_LS_3},
+ {7104, R_00B540_SPI_SHADER_USER_DATA_LS_4},
+ {7130, R_00B544_SPI_SHADER_USER_DATA_LS_5},
+ {7156, R_00B548_SPI_SHADER_USER_DATA_LS_6},
+ {7182, R_00B54C_SPI_SHADER_USER_DATA_LS_7},
+ {7208, R_00B550_SPI_SHADER_USER_DATA_LS_8},
+ {7234, R_00B554_SPI_SHADER_USER_DATA_LS_9},
+ {7260, R_00B558_SPI_SHADER_USER_DATA_LS_10},
+ {7287, R_00B55C_SPI_SHADER_USER_DATA_LS_11},
+ {7314, R_00B560_SPI_SHADER_USER_DATA_LS_12},
+ {7341, R_00B564_SPI_SHADER_USER_DATA_LS_13},
+ {7368, R_00B568_SPI_SHADER_USER_DATA_LS_14},
+ {7395, R_00B56C_SPI_SHADER_USER_DATA_LS_15},
+ {7422, R_00B800_COMPUTE_DISPATCH_INITIATOR, 12, 846},
+ {7449, R_00B804_COMPUTE_DIM_X},
+ {7463, R_00B808_COMPUTE_DIM_Y},
+ {7477, R_00B80C_COMPUTE_DIM_Z},
+ {7491, R_00B810_COMPUTE_START_X},
+ {7507, R_00B814_COMPUTE_START_Y},
+ {7523, R_00B818_COMPUTE_START_Z},
+ {7539, R_00B81C_COMPUTE_NUM_THREAD_X, 2, 858},
+ {7560, R_00B820_COMPUTE_NUM_THREAD_Y, 2, 860},
+ {7581, R_00B824_COMPUTE_NUM_THREAD_Z, 2, 862},
+ {7602, R_00B82C_COMPUTE_MAX_WAVE_ID, 1, 864},
+ {7622, R_00B828_COMPUTE_PIPELINESTAT_ENABLE, 1, 865},
+ {7650, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 1, 866},
+ {7675, R_00B830_COMPUTE_PGM_LO},
+ {7690, R_00B834_COMPUTE_PGM_HI, 2, 867},
+ {7705, R_00B838_COMPUTE_TBA_LO},
+ {7720, R_00B83C_COMPUTE_TBA_HI, 1, 869},
+ {7735, R_00B840_COMPUTE_TMA_LO},
+ {7750, R_00B844_COMPUTE_TMA_HI, 1, 870},
+ {7765, R_00B848_COMPUTE_PGM_RSRC1, 10, 871},
+ {7783, R_00B84C_COMPUTE_PGM_RSRC2, 11, 881},
+ {7801, R_00B850_COMPUTE_VMID, 1, 892},
+ {7814, R_00B854_COMPUTE_RESOURCE_LIMITS, 7, 893},
+ {7838, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2, 900},
+ {7869, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, 2, 902},
+ {7900, R_00B860_COMPUTE_TMPRING_SIZE, 2, 904},
+ {7921, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2, 906},
+ {7952, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, 2, 908},
+ {7983, R_00B86C_COMPUTE_RESTART_X},
+ {8001, R_00B870_COMPUTE_RESTART_Y},
+ {8019, R_00B874_COMPUTE_RESTART_Z},
+ {8037, R_00B87C_COMPUTE_MISC_RESERVED, 5, 910},
+ {8059, R_00B880_COMPUTE_DISPATCH_ID},
+ {8079, R_00B884_COMPUTE_THREADGROUP_ID},
+ {8102, R_00B888_COMPUTE_RELAUNCH, 3, 915},
+ {8119, R_00B88C_COMPUTE_WAVE_RESTORE_ADDR_LO},
+ {8148, R_00B890_COMPUTE_WAVE_RESTORE_ADDR_HI, 1, 918},
+ {8177, R_00B894_COMPUTE_WAVE_RESTORE_CONTROL, 2, 919},
+ {8206, R_00B900_COMPUTE_USER_DATA_0},
+ {8226, R_00B904_COMPUTE_USER_DATA_1},
+ {8246, R_00B908_COMPUTE_USER_DATA_2},
+ {8266, R_00B90C_COMPUTE_USER_DATA_3},
+ {8286, R_00B910_COMPUTE_USER_DATA_4},
+ {8306, R_00B914_COMPUTE_USER_DATA_5},
+ {8326, R_00B918_COMPUTE_USER_DATA_6},
+ {8346, R_00B91C_COMPUTE_USER_DATA_7},
+ {8366, R_00B920_COMPUTE_USER_DATA_8},
+ {8386, R_00B924_COMPUTE_USER_DATA_9},
+ {8406, R_00B928_COMPUTE_USER_DATA_10},
+ {8427, R_00B92C_COMPUTE_USER_DATA_11},
+ {8448, R_00B930_COMPUTE_USER_DATA_12},
+ {8469, R_00B934_COMPUTE_USER_DATA_13},
+ {8490, R_00B938_COMPUTE_USER_DATA_14},
+ {8511, R_00B93C_COMPUTE_USER_DATA_15},
+ {8532, R_00B9FC_COMPUTE_NOWHERE},
+ {8548, R_034000_CPG_PERFCOUNTER1_LO},
+ {8568, R_034004_CPG_PERFCOUNTER1_HI},
+ {8588, R_034008_CPG_PERFCOUNTER0_LO},
+ {8608, R_03400C_CPG_PERFCOUNTER0_HI},
+ {8628, R_034010_CPC_PERFCOUNTER1_LO},
+ {8648, R_034014_CPC_PERFCOUNTER1_HI},
+ {8668, R_034018_CPC_PERFCOUNTER0_LO},
+ {8688, R_03401C_CPC_PERFCOUNTER0_HI},
+ {8708, R_034020_CPF_PERFCOUNTER1_LO},
+ {8728, R_034024_CPF_PERFCOUNTER1_HI},
+ {8748, R_034028_CPF_PERFCOUNTER0_LO},
+ {8768, R_03402C_CPF_PERFCOUNTER0_HI},
+ {8788, R_034100_GRBM_PERFCOUNTER0_LO},
+ {8809, R_034104_GRBM_PERFCOUNTER0_HI},
+ {8830, R_03410C_GRBM_PERFCOUNTER1_LO},
+ {8851, R_034110_GRBM_PERFCOUNTER1_HI},
+ {8872, R_034114_GRBM_SE0_PERFCOUNTER_LO},
+ {8896, R_034118_GRBM_SE0_PERFCOUNTER_HI},
+ {8920, R_03411C_GRBM_SE1_PERFCOUNTER_LO},
+ {8944, R_034120_GRBM_SE1_PERFCOUNTER_HI},
+ {8968, R_034124_GRBM_SE2_PERFCOUNTER_LO},
+ {8992, R_034128_GRBM_SE2_PERFCOUNTER_HI},
+ {9016, R_03412C_GRBM_SE3_PERFCOUNTER_LO},
+ {9040, R_034130_GRBM_SE3_PERFCOUNTER_HI},
+ {9064, R_034200_WD_PERFCOUNTER0_LO},
+ {9083, R_034204_WD_PERFCOUNTER0_HI},
+ {9102, R_034208_WD_PERFCOUNTER1_LO},
+ {9121, R_03420C_WD_PERFCOUNTER1_HI},
+ {9140, R_034210_WD_PERFCOUNTER2_LO},
+ {9159, R_034214_WD_PERFCOUNTER2_HI},
+ {9178, R_034218_WD_PERFCOUNTER3_LO},
+ {9197, R_03421C_WD_PERFCOUNTER3_HI},
+ {9216, R_034220_IA_PERFCOUNTER0_LO},
+ {9235, R_034224_IA_PERFCOUNTER0_HI},
+ {9254, R_034228_IA_PERFCOUNTER1_LO},
+ {9273, R_03422C_IA_PERFCOUNTER1_HI},
+ {9292, R_034230_IA_PERFCOUNTER2_LO},
+ {9311, R_034234_IA_PERFCOUNTER2_HI},
+ {9330, R_034238_IA_PERFCOUNTER3_LO},
+ {9349, R_03423C_IA_PERFCOUNTER3_HI},
+ {9368, R_034240_VGT_PERFCOUNTER0_LO},
+ {9388, R_034244_VGT_PERFCOUNTER0_HI},
+ {9408, R_034248_VGT_PERFCOUNTER1_LO},
+ {9428, R_03424C_VGT_PERFCOUNTER1_HI},
+ {9448, R_034250_VGT_PERFCOUNTER2_LO},
+ {9468, R_034254_VGT_PERFCOUNTER2_HI},
+ {9488, R_034258_VGT_PERFCOUNTER3_LO},
+ {9508, R_03425C_VGT_PERFCOUNTER3_HI},
+ {9528, R_034400_PA_SU_PERFCOUNTER0_LO},
+ {9550, R_034404_PA_SU_PERFCOUNTER0_HI, 1, 921},
+ {9572, R_034408_PA_SU_PERFCOUNTER1_LO},
+ {9594, R_03440C_PA_SU_PERFCOUNTER1_HI, 1, 921},
+ {9616, R_034410_PA_SU_PERFCOUNTER2_LO},
+ {9638, R_034414_PA_SU_PERFCOUNTER2_HI, 1, 921},
+ {9660, R_034418_PA_SU_PERFCOUNTER3_LO},
+ {9682, R_03441C_PA_SU_PERFCOUNTER3_HI, 1, 921},
+ {9704, R_034500_PA_SC_PERFCOUNTER0_LO},
+ {9726, R_034504_PA_SC_PERFCOUNTER0_HI},
+ {9748, R_034508_PA_SC_PERFCOUNTER1_LO},
+ {9770, R_03450C_PA_SC_PERFCOUNTER1_HI},
+ {9792, R_034510_PA_SC_PERFCOUNTER2_LO},
+ {9814, R_034514_PA_SC_PERFCOUNTER2_HI},
+ {9836, R_034518_PA_SC_PERFCOUNTER3_LO},
+ {9858, R_03451C_PA_SC_PERFCOUNTER3_HI},
+ {9880, R_034520_PA_SC_PERFCOUNTER4_LO},
+ {9902, R_034524_PA_SC_PERFCOUNTER4_HI},
+ {9924, R_034528_PA_SC_PERFCOUNTER5_LO},
+ {9946, R_03452C_PA_SC_PERFCOUNTER5_HI},
+ {9968, R_034530_PA_SC_PERFCOUNTER6_LO},
+ {9990, R_034534_PA_SC_PERFCOUNTER6_HI},
+ {10012, R_034538_PA_SC_PERFCOUNTER7_LO},
+ {10034, R_03453C_PA_SC_PERFCOUNTER7_HI},
+ {10056, R_034600_SPI_PERFCOUNTER0_HI},
+ {10076, R_034604_SPI_PERFCOUNTER0_LO},
+ {10096, R_034608_SPI_PERFCOUNTER1_HI},
+ {10116, R_03460C_SPI_PERFCOUNTER1_LO},
+ {10136, R_034610_SPI_PERFCOUNTER2_HI},
+ {10156, R_034614_SPI_PERFCOUNTER2_LO},
+ {10176, R_034618_SPI_PERFCOUNTER3_HI},
+ {10196, R_03461C_SPI_PERFCOUNTER3_LO},
+ {10216, R_034620_SPI_PERFCOUNTER4_HI},
+ {10236, R_034624_SPI_PERFCOUNTER4_LO},
+ {10256, R_034628_SPI_PERFCOUNTER5_HI},
+ {10276, R_03462C_SPI_PERFCOUNTER5_LO},
+ {10296, R_034700_SQ_PERFCOUNTER0_LO},
+ {10315, R_034704_SQ_PERFCOUNTER0_HI},
+ {10334, R_034708_SQ_PERFCOUNTER1_LO},
+ {10353, R_03470C_SQ_PERFCOUNTER1_HI},
+ {10372, R_034710_SQ_PERFCOUNTER2_LO},
+ {10391, R_034714_SQ_PERFCOUNTER2_HI},
+ {10410, R_034718_SQ_PERFCOUNTER3_LO},
+ {10429, R_03471C_SQ_PERFCOUNTER3_HI},
+ {10448, R_034720_SQ_PERFCOUNTER4_LO},
+ {10467, R_034724_SQ_PERFCOUNTER4_HI},
+ {10486, R_034728_SQ_PERFCOUNTER5_LO},
+ {10505, R_03472C_SQ_PERFCOUNTER5_HI},
+ {10524, R_034730_SQ_PERFCOUNTER6_LO},
+ {10543, R_034734_SQ_PERFCOUNTER6_HI},
+ {10562, R_034738_SQ_PERFCOUNTER7_LO},
+ {10581, R_03473C_SQ_PERFCOUNTER7_HI},
+ {10600, R_034740_SQ_PERFCOUNTER8_LO},
+ {10619, R_034744_SQ_PERFCOUNTER8_HI},
+ {10638, R_034748_SQ_PERFCOUNTER9_LO},
+ {10657, R_03474C_SQ_PERFCOUNTER9_HI},
+ {10676, R_034750_SQ_PERFCOUNTER10_LO},
+ {10696, R_034754_SQ_PERFCOUNTER10_HI},
+ {10716, R_034758_SQ_PERFCOUNTER11_LO},
+ {10736, R_03475C_SQ_PERFCOUNTER11_HI},
+ {10756, R_034760_SQ_PERFCOUNTER12_LO},
+ {10776, R_034764_SQ_PERFCOUNTER12_HI},
+ {10796, R_034768_SQ_PERFCOUNTER13_LO},
+ {10816, R_03476C_SQ_PERFCOUNTER13_HI},
+ {10836, R_034770_SQ_PERFCOUNTER14_LO},
+ {10856, R_034774_SQ_PERFCOUNTER14_HI},
+ {10876, R_034778_SQ_PERFCOUNTER15_LO},
+ {10896, R_03477C_SQ_PERFCOUNTER15_HI},
+ {10916, R_034900_SX_PERFCOUNTER0_LO},
+ {10935, R_034904_SX_PERFCOUNTER0_HI},
+ {10954, R_034908_SX_PERFCOUNTER1_LO},
+ {10973, R_03490C_SX_PERFCOUNTER1_HI},
+ {10992, R_034910_SX_PERFCOUNTER2_LO},
+ {11011, R_034914_SX_PERFCOUNTER2_HI},
+ {11030, R_034918_SX_PERFCOUNTER3_LO},
+ {11049, R_03491C_SX_PERFCOUNTER3_HI},
+ {11068, R_034A00_GDS_PERFCOUNTER0_LO},
+ {11088, R_034A04_GDS_PERFCOUNTER0_HI},
+ {11108, R_034A08_GDS_PERFCOUNTER1_LO},
+ {11128, R_034A0C_GDS_PERFCOUNTER1_HI},
+ {11148, R_034A10_GDS_PERFCOUNTER2_LO},
+ {11168, R_034A14_GDS_PERFCOUNTER2_HI},
+ {11188, R_034A18_GDS_PERFCOUNTER3_LO},
+ {11208, R_034A1C_GDS_PERFCOUNTER3_HI},
+ {11228, R_034B00_TA_PERFCOUNTER0_LO},
+ {11247, R_034B04_TA_PERFCOUNTER0_HI},
+ {11266, R_034B08_TA_PERFCOUNTER1_LO},
+ {11285, R_034B0C_TA_PERFCOUNTER1_HI},
+ {11304, R_034C00_TD_PERFCOUNTER0_LO},
+ {11323, R_034C04_TD_PERFCOUNTER0_HI},
+ {11342, R_034C08_TD_PERFCOUNTER1_LO},
+ {11361, R_034C0C_TD_PERFCOUNTER1_HI},
+ {11380, R_034D00_TCP_PERFCOUNTER0_LO},
+ {11400, R_034D04_TCP_PERFCOUNTER0_HI},
+ {11420, R_034D08_TCP_PERFCOUNTER1_LO},
+ {11440, R_034D0C_TCP_PERFCOUNTER1_HI},
+ {11460, R_034D10_TCP_PERFCOUNTER2_LO},
+ {11480, R_034D14_TCP_PERFCOUNTER2_HI},
+ {11500, R_034D18_TCP_PERFCOUNTER3_LO},
+ {11520, R_034D1C_TCP_PERFCOUNTER3_HI},
+ {11540, R_034E00_TCC_PERFCOUNTER0_LO},
+ {11560, R_034E04_TCC_PERFCOUNTER0_HI},
+ {11580, R_034E08_TCC_PERFCOUNTER1_LO},
+ {11600, R_034E0C_TCC_PERFCOUNTER1_HI},
+ {11620, R_034E10_TCC_PERFCOUNTER2_LO},
+ {11640, R_034E14_TCC_PERFCOUNTER2_HI},
+ {11660, R_034E18_TCC_PERFCOUNTER3_LO},
+ {11680, R_034E1C_TCC_PERFCOUNTER3_HI},
+ {11700, R_034E40_TCA_PERFCOUNTER0_LO},
+ {11720, R_034E44_TCA_PERFCOUNTER0_HI},
+ {11740, R_034E48_TCA_PERFCOUNTER1_LO},
+ {11760, R_034E4C_TCA_PERFCOUNTER1_HI},
+ {11780, R_034E50_TCA_PERFCOUNTER2_LO},
+ {11800, R_034E54_TCA_PERFCOUNTER2_HI},
+ {11820, R_034E58_TCA_PERFCOUNTER3_LO},
+ {11840, R_034E5C_TCA_PERFCOUNTER3_HI},
+ {11860, R_035018_CB_PERFCOUNTER0_LO},
+ {11879, R_03501C_CB_PERFCOUNTER0_HI},
+ {11898, R_035020_CB_PERFCOUNTER1_LO},
+ {11917, R_035024_CB_PERFCOUNTER1_HI},
+ {11936, R_035028_CB_PERFCOUNTER2_LO},
+ {11955, R_03502C_CB_PERFCOUNTER2_HI},
+ {11974, R_035030_CB_PERFCOUNTER3_LO},
+ {11993, R_035034_CB_PERFCOUNTER3_HI},
+ {12012, R_035100_DB_PERFCOUNTER0_LO},
+ {12031, R_035104_DB_PERFCOUNTER0_HI},
+ {12050, R_035108_DB_PERFCOUNTER1_LO},
+ {12069, R_03510C_DB_PERFCOUNTER1_HI},
+ {12088, R_035110_DB_PERFCOUNTER2_LO},
+ {12107, R_035114_DB_PERFCOUNTER2_HI},
+ {12126, R_035118_DB_PERFCOUNTER3_LO},
+ {12145, R_03511C_DB_PERFCOUNTER3_HI},
+ {12164, R_035200_RLC_PERFCOUNTER0_LO},
+ {12184, R_035204_RLC_PERFCOUNTER0_HI},
+ {12204, R_035208_RLC_PERFCOUNTER1_LO},
+ {12224, R_03520C_RLC_PERFCOUNTER1_HI},
+ {12244, R_036000_CPG_PERFCOUNTER1_SELECT, 3, 922},
+ {12268, R_036004_CPG_PERFCOUNTER0_SELECT1, 2, 925},
+ {12293, R_036008_CPG_PERFCOUNTER0_SELECT, 3, 922},
+ {12317, R_03600C_CPC_PERFCOUNTER1_SELECT, 3, 927},
+ {12341, R_036010_CPC_PERFCOUNTER0_SELECT1, 2, 930},
+ {12366, R_036014_CPF_PERFCOUNTER1_SELECT, 3, 932},
+ {12390, R_036018_CPF_PERFCOUNTER0_SELECT1, 2, 935},
+ {12415, R_03601C_CPF_PERFCOUNTER0_SELECT, 3, 932},
+ {12439, R_036020_CP_PERFMON_CNTL, 4, 937},
+ {12455, R_036024_CPC_PERFCOUNTER0_SELECT, 3, 927},
+ {12479, R_036100_GRBM_PERFCOUNTER0_SELECT, 19, 941},
+ {12504, R_036104_GRBM_PERFCOUNTER1_SELECT, 19, 941},
+ {12529, R_036108_GRBM_SE0_PERFCOUNTER_SELECT, 12, 960},
+ {12557, R_03610C_GRBM_SE1_PERFCOUNTER_SELECT, 12, 972},
+ {12585, R_036110_GRBM_SE2_PERFCOUNTER_SELECT, 12, 984},
+ {12613, R_036114_GRBM_SE3_PERFCOUNTER_SELECT, 12, 996},
+ {12641, R_036200_WD_PERFCOUNTER0_SELECT, 2, 1008},
+ {12664, R_036204_WD_PERFCOUNTER1_SELECT, 2, 1008},
+ {12687, R_036208_WD_PERFCOUNTER2_SELECT, 2, 1008},
+ {12710, R_03620C_WD_PERFCOUNTER3_SELECT, 2, 1008},
+ {12733, R_036210_IA_PERFCOUNTER0_SELECT, 5, 1010},
+ {12756, R_036214_IA_PERFCOUNTER1_SELECT, 5, 1010},
+ {12779, R_036218_IA_PERFCOUNTER2_SELECT, 5, 1010},
+ {12802, R_03621C_IA_PERFCOUNTER3_SELECT, 5, 1010},
+ {12825, R_036220_IA_PERFCOUNTER0_SELECT1, 4, 1015},
+ {12849, R_036230_VGT_PERFCOUNTER0_SELECT, 5, 1019},
+ {12873, R_036234_VGT_PERFCOUNTER1_SELECT, 5, 1019},
+ {12897, R_036238_VGT_PERFCOUNTER2_SELECT, 5, 1019},
+ {12921, R_03623C_VGT_PERFCOUNTER3_SELECT, 5, 1019},
+ {12945, R_036240_VGT_PERFCOUNTER0_SELECT1, 4, 1024},
+ {12970, R_036244_VGT_PERFCOUNTER1_SELECT1},
+ {12995, R_036250_VGT_PERFCOUNTER_SEID_MASK, 1, 1028},
+ {13021, R_036400_PA_SU_PERFCOUNTER0_SELECT, 3, 1029},
+ {13047, R_036404_PA_SU_PERFCOUNTER0_SELECT1, 2, 1032},
+ {13074, R_036408_PA_SU_PERFCOUNTER1_SELECT, 3, 1029},
+ {13100, R_03640C_PA_SU_PERFCOUNTER1_SELECT1},
+ {13127, R_036410_PA_SU_PERFCOUNTER2_SELECT, 3, 1029},
+ {13153, R_036414_PA_SU_PERFCOUNTER3_SELECT, 3, 1029},
+ {13179, R_036500_PA_SC_PERFCOUNTER0_SELECT, 3, 1034},
+ {13205, R_036504_PA_SC_PERFCOUNTER0_SELECT1, 2, 1037},
+ {13232, R_036508_PA_SC_PERFCOUNTER1_SELECT, 3, 1034},
+ {13258, R_03650C_PA_SC_PERFCOUNTER2_SELECT, 3, 1034},
+ {13284, R_036510_PA_SC_PERFCOUNTER3_SELECT, 3, 1034},
+ {13310, R_036514_PA_SC_PERFCOUNTER4_SELECT, 3, 1034},
+ {13336, R_036518_PA_SC_PERFCOUNTER5_SELECT, 3, 1034},
+ {13362, R_03651C_PA_SC_PERFCOUNTER6_SELECT, 3, 1034},
+ {13388, R_036520_PA_SC_PERFCOUNTER7_SELECT, 3, 1034},
+ {13414, R_036600_SPI_PERFCOUNTER0_SELECT, 3, 1039},
+ {13438, R_036604_SPI_PERFCOUNTER1_SELECT, 3, 1039},
+ {13462, R_036608_SPI_PERFCOUNTER2_SELECT, 3, 1039},
+ {13486, R_03660C_SPI_PERFCOUNTER3_SELECT, 3, 1039},
+ {13510, R_036610_SPI_PERFCOUNTER0_SELECT1, 2, 1042},
+ {13535, R_036614_SPI_PERFCOUNTER1_SELECT1},
+ {13560, R_036618_SPI_PERFCOUNTER2_SELECT1},
+ {13585, R_03661C_SPI_PERFCOUNTER3_SELECT1},
+ {13610, R_036620_SPI_PERFCOUNTER4_SELECT, 3, 1039},
+ {13634, R_036624_SPI_PERFCOUNTER5_SELECT, 3, 1039},
+ {13658, R_036628_SPI_PERFCOUNTER_BINS, 8, 1044},
+ {13679, R_036700_SQ_PERFCOUNTER0_SELECT, 6, 1052},
+ {13702, R_036704_SQ_PERFCOUNTER1_SELECT, 6, 1052},
+ {13725, R_036708_SQ_PERFCOUNTER2_SELECT, 6, 1052},
+ {13748, R_03670C_SQ_PERFCOUNTER3_SELECT, 6, 1052},
+ {13771, R_036710_SQ_PERFCOUNTER4_SELECT, 6, 1052},
+ {13794, R_036714_SQ_PERFCOUNTER5_SELECT, 6, 1052},
+ {13817, R_036718_SQ_PERFCOUNTER6_SELECT, 6, 1052},
+ {13840, R_03671C_SQ_PERFCOUNTER7_SELECT, 6, 1052},
+ {13863, R_036720_SQ_PERFCOUNTER8_SELECT, 6, 1052},
+ {13886, R_036724_SQ_PERFCOUNTER9_SELECT, 6, 1052},
+ {13909, R_036728_SQ_PERFCOUNTER10_SELECT, 6, 1052},
+ {13933, R_03672C_SQ_PERFCOUNTER11_SELECT, 6, 1052},
+ {13957, R_036730_SQ_PERFCOUNTER12_SELECT, 6, 1052},
+ {13981, R_036734_SQ_PERFCOUNTER13_SELECT, 6, 1052},
+ {14005, R_036738_SQ_PERFCOUNTER14_SELECT, 6, 1052},
+ {14029, R_03673C_SQ_PERFCOUNTER15_SELECT, 6, 1052},
+ {14053, R_036780_SQ_PERFCOUNTER_CTRL, 9, 1058},
+ {14073, R_036784_SQ_PERFCOUNTER_MASK, 2, 1067},
+ {14093, R_036788_SQ_PERFCOUNTER_CTRL2, 1, 1069},
+ {14114, R_036900_SX_PERFCOUNTER0_SELECT, 3, 1070},
+ {14137, R_036904_SX_PERFCOUNTER1_SELECT, 3, 1070},
+ {14160, R_036908_SX_PERFCOUNTER2_SELECT, 3, 1070},
+ {14183, R_03690C_SX_PERFCOUNTER3_SELECT, 3, 1070},
+ {14206, R_036910_SX_PERFCOUNTER0_SELECT1, 2, 1073},
+ {14230, R_036914_SX_PERFCOUNTER1_SELECT1},
+ {14254, R_036A00_GDS_PERFCOUNTER0_SELECT, 3, 1075},
+ {14278, R_036A04_GDS_PERFCOUNTER1_SELECT, 3, 1075},
+ {14302, R_036A08_GDS_PERFCOUNTER2_SELECT, 3, 1075},
+ {14326, R_036A0C_GDS_PERFCOUNTER3_SELECT, 3, 1075},
+ {14350, R_036A10_GDS_PERFCOUNTER0_SELECT1, 2, 1078},
+ {14375, R_036B00_TA_PERFCOUNTER0_SELECT, 5, 1080},
+ {14398, R_036B04_TA_PERFCOUNTER0_SELECT1, 4, 1085},
+ {14422, R_036B08_TA_PERFCOUNTER1_SELECT, 5, 1080},
+ {14445, R_036C00_TD_PERFCOUNTER0_SELECT, 5, 1089},
+ {14468, R_036C04_TD_PERFCOUNTER0_SELECT1, 4, 1094},
+ {14492, R_036C08_TD_PERFCOUNTER1_SELECT, 5, 1089},
+ {14515, R_036D00_TCP_PERFCOUNTER0_SELECT, 5, 1098},
+ {14539, R_036D04_TCP_PERFCOUNTER0_SELECT1, 4, 1103},
+ {14564, R_036D08_TCP_PERFCOUNTER1_SELECT, 5, 1098},
+ {14588, R_036D0C_TCP_PERFCOUNTER1_SELECT1},
+ {14613, R_036D10_TCP_PERFCOUNTER2_SELECT, 5, 1098},
+ {14637, R_036D14_TCP_PERFCOUNTER3_SELECT, 5, 1098},
+ {14661, R_036E00_TCC_PERFCOUNTER0_SELECT, 5, 1107},
+ {14685, R_036E04_TCC_PERFCOUNTER0_SELECT1, 4, 1112},
+ {14710, R_036E08_TCC_PERFCOUNTER1_SELECT, 5, 1107},
+ {14734, R_036E0C_TCC_PERFCOUNTER1_SELECT1},
+ {14759, R_036E10_TCC_PERFCOUNTER2_SELECT, 5, 1107},
+ {14783, R_036E14_TCC_PERFCOUNTER3_SELECT, 5, 1107},
+ {14807, R_036E40_TCA_PERFCOUNTER0_SELECT, 5, 1116},
+ {14831, R_036E44_TCA_PERFCOUNTER0_SELECT1, 4, 1121},
+ {14856, R_036E48_TCA_PERFCOUNTER1_SELECT, 5, 1116},
+ {14880, R_036E4C_TCA_PERFCOUNTER1_SELECT1},
+ {14905, R_036E50_TCA_PERFCOUNTER2_SELECT, 5, 1116},
+ {14929, R_036E54_TCA_PERFCOUNTER3_SELECT, 5, 1116},
+ {14953, R_037000_CB_PERFCOUNTER_FILTER, 12, 1125},
+ {14975, R_037004_CB_PERFCOUNTER0_SELECT, 5, 1137},
+ {14998, R_037008_CB_PERFCOUNTER0_SELECT1, 4, 1142},
+ {15022, R_03700C_CB_PERFCOUNTER1_SELECT, 5, 1137},
+ {15045, R_037010_CB_PERFCOUNTER2_SELECT, 5, 1137},
+ {15068, R_037014_CB_PERFCOUNTER3_SELECT, 5, 1137},
+ {15091, R_037100_DB_PERFCOUNTER0_SELECT, 5, 1146},
+ {15114, R_037104_DB_PERFCOUNTER0_SELECT1, 4, 1151},
+ {15138, R_037108_DB_PERFCOUNTER1_SELECT, 5, 1146},
+ {15161, R_03710C_DB_PERFCOUNTER1_SELECT1},
+ {15185, R_037110_DB_PERFCOUNTER2_SELECT, 5, 1146},
+ {15208, R_037118_DB_PERFCOUNTER3_SELECT, 5, 1146},
+ {15231, R_028000_DB_RENDER_CONTROL, 10, 1155},
+ {15249, R_028004_DB_COUNT_CONTROL, 9, 1165},
+ {15266, R_028008_DB_DEPTH_VIEW, 4, 1174},
+ {15280, R_02800C_DB_RENDER_OVERRIDE, 23, 1178},
+ {15299, R_028010_DB_RENDER_OVERRIDE2, 15, 1201},
+ {15319, R_028014_DB_HTILE_DATA_BASE},
+ {15338, R_028020_DB_DEPTH_BOUNDS_MIN},
+ {15358, R_028024_DB_DEPTH_BOUNDS_MAX},
+ {15378, R_028028_DB_STENCIL_CLEAR, 1, 1216},
+ {15395, R_02802C_DB_DEPTH_CLEAR},
+ {15410, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2, 1217},
+ {15434, R_028034_PA_SC_SCREEN_SCISSOR_BR, 2, 1219},
+ {15458, R_02803C_DB_DEPTH_INFO, 7, 1221},
+ {15472, R_028040_DB_Z_INFO, 10, 1228},
+ {15482, R_028044_DB_STENCIL_INFO, 6, 1238},
+ {15498, R_028048_DB_Z_READ_BASE},
+ {15513, R_02804C_DB_STENCIL_READ_BASE},
+ {15534, R_028050_DB_Z_WRITE_BASE},
+ {15550, R_028054_DB_STENCIL_WRITE_BASE},
+ {15572, R_028058_DB_DEPTH_SIZE, 2, 1244},
+ {15586, R_02805C_DB_DEPTH_SLICE, 1, 1246},
+ {15601, R_028080_TA_BC_BASE_ADDR},
+ {15617, R_028084_TA_BC_BASE_ADDR_HI, 1, 1247},
+ {15636, R_0281E8_COHER_DEST_BASE_HI_0},
+ {15657, R_0281EC_COHER_DEST_BASE_HI_1},
+ {15678, R_0281F0_COHER_DEST_BASE_HI_2},
+ {15699, R_0281F4_COHER_DEST_BASE_HI_3},
+ {15720, R_0281F8_COHER_DEST_BASE_2},
+ {15738, R_0281FC_COHER_DEST_BASE_3},
+ {15756, R_028200_PA_SC_WINDOW_OFFSET, 2, 1248},
+ {15776, R_028204_PA_SC_WINDOW_SCISSOR_TL, 3, 1250},
+ {15800, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2, 1253},
+ {15824, R_02820C_PA_SC_CLIPRECT_RULE, 1, 1255},
+ {15844, R_028210_PA_SC_CLIPRECT_0_TL, 2, 1256},
+ {15864, R_028214_PA_SC_CLIPRECT_0_BR, 2, 1258},
+ {15884, R_028218_PA_SC_CLIPRECT_1_TL, 2, 1256},
+ {15904, R_02821C_PA_SC_CLIPRECT_1_BR, 2, 1258},
+ {15924, R_028220_PA_SC_CLIPRECT_2_TL, 2, 1256},
+ {15944, R_028224_PA_SC_CLIPRECT_2_BR, 2, 1258},
+ {15964, R_028228_PA_SC_CLIPRECT_3_TL, 2, 1256},
+ {15984, R_02822C_PA_SC_CLIPRECT_3_BR, 2, 1258},
+ {16004, R_028230_PA_SC_EDGERULE, 7, 1260},
+ {16019, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 2, 1267},
+ {16048, R_028238_CB_TARGET_MASK, 8, 1269},
+ {16063, R_02823C_CB_SHADER_MASK, 8, 1277},
+ {16078, R_028240_PA_SC_GENERIC_SCISSOR_TL, 3, 1285},
+ {16103, R_028244_PA_SC_GENERIC_SCISSOR_BR, 2, 1288},
+ {16128, R_028248_COHER_DEST_BASE_0},
+ {16146, R_02824C_COHER_DEST_BASE_1},
+ {16164, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 3, 1290},
+ {16189, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 2, 1293},
+ {16214, R_028258_PA_SC_VPORT_SCISSOR_1_TL, 3, 1290},
+ {16239, R_02825C_PA_SC_VPORT_SCISSOR_1_BR, 2, 1293},
+ {16264, R_028260_PA_SC_VPORT_SCISSOR_2_TL, 3, 1290},
+ {16289, R_028264_PA_SC_VPORT_SCISSOR_2_BR, 2, 1293},
+ {16314, R_028268_PA_SC_VPORT_SCISSOR_3_TL, 3, 1290},
+ {16339, R_02826C_PA_SC_VPORT_SCISSOR_3_BR, 2, 1293},
+ {16364, R_028270_PA_SC_VPORT_SCISSOR_4_TL, 3, 1290},
+ {16389, R_028274_PA_SC_VPORT_SCISSOR_4_BR, 2, 1293},
+ {16414, R_028278_PA_SC_VPORT_SCISSOR_5_TL, 3, 1290},
+ {16439, R_02827C_PA_SC_VPORT_SCISSOR_5_BR, 2, 1293},
+ {16464, R_028280_PA_SC_VPORT_SCISSOR_6_TL, 3, 1290},
+ {16489, R_028284_PA_SC_VPORT_SCISSOR_6_BR, 2, 1293},
+ {16514, R_028288_PA_SC_VPORT_SCISSOR_7_TL, 3, 1290},
+ {16539, R_02828C_PA_SC_VPORT_SCISSOR_7_BR, 2, 1293},
+ {16564, R_028290_PA_SC_VPORT_SCISSOR_8_TL, 3, 1290},
+ {16589, R_028294_PA_SC_VPORT_SCISSOR_8_BR, 2, 1293},
+ {16614, R_028298_PA_SC_VPORT_SCISSOR_9_TL, 3, 1290},
+ {16639, R_02829C_PA_SC_VPORT_SCISSOR_9_BR, 2, 1293},
+ {16664, R_0282A0_PA_SC_VPORT_SCISSOR_10_TL, 3, 1290},
+ {16690, R_0282A4_PA_SC_VPORT_SCISSOR_10_BR, 2, 1293},
+ {16716, R_0282A8_PA_SC_VPORT_SCISSOR_11_TL, 3, 1290},
+ {16742, R_0282AC_PA_SC_VPORT_SCISSOR_11_BR, 2, 1293},
+ {16768, R_0282B0_PA_SC_VPORT_SCISSOR_12_TL, 3, 1290},
+ {16794, R_0282B4_PA_SC_VPORT_SCISSOR_12_BR, 2, 1293},
+ {16820, R_0282B8_PA_SC_VPORT_SCISSOR_13_TL, 3, 1290},
+ {16846, R_0282BC_PA_SC_VPORT_SCISSOR_13_BR, 2, 1293},
+ {16872, R_0282C0_PA_SC_VPORT_SCISSOR_14_TL, 3, 1290},
+ {16898, R_0282C4_PA_SC_VPORT_SCISSOR_14_BR, 2, 1293},
+ {16924, R_0282C8_PA_SC_VPORT_SCISSOR_15_TL, 3, 1290},
+ {16950, R_0282CC_PA_SC_VPORT_SCISSOR_15_BR, 2, 1293},
+ {16976, R_0282D0_PA_SC_VPORT_ZMIN_0},
+ {16995, R_0282D4_PA_SC_VPORT_ZMAX_0},
+ {17014, R_0282D8_PA_SC_VPORT_ZMIN_1},
+ {17033, R_0282DC_PA_SC_VPORT_ZMAX_1},
+ {17052, R_0282E0_PA_SC_VPORT_ZMIN_2},
+ {17071, R_0282E4_PA_SC_VPORT_ZMAX_2},
+ {17090, R_0282E8_PA_SC_VPORT_ZMIN_3},
+ {17109, R_0282EC_PA_SC_VPORT_ZMAX_3},
+ {17128, R_0282F0_PA_SC_VPORT_ZMIN_4},
+ {17147, R_0282F4_PA_SC_VPORT_ZMAX_4},
+ {17166, R_0282F8_PA_SC_VPORT_ZMIN_5},
+ {17185, R_0282FC_PA_SC_VPORT_ZMAX_5},
+ {17204, R_028300_PA_SC_VPORT_ZMIN_6},
+ {17223, R_028304_PA_SC_VPORT_ZMAX_6},
+ {17242, R_028308_PA_SC_VPORT_ZMIN_7},
+ {17261, R_02830C_PA_SC_VPORT_ZMAX_7},
+ {17280, R_028310_PA_SC_VPORT_ZMIN_8},
+ {17299, R_028314_PA_SC_VPORT_ZMAX_8},
+ {17318, R_028318_PA_SC_VPORT_ZMIN_9},
+ {17337, R_02831C_PA_SC_VPORT_ZMAX_9},
+ {17356, R_028320_PA_SC_VPORT_ZMIN_10},
+ {17376, R_028324_PA_SC_VPORT_ZMAX_10},
+ {17396, R_028328_PA_SC_VPORT_ZMIN_11},
+ {17416, R_02832C_PA_SC_VPORT_ZMAX_11},
+ {17436, R_028330_PA_SC_VPORT_ZMIN_12},
+ {17456, R_028334_PA_SC_VPORT_ZMAX_12},
+ {17476, R_028338_PA_SC_VPORT_ZMIN_13},
+ {17496, R_02833C_PA_SC_VPORT_ZMAX_13},
+ {17516, R_028340_PA_SC_VPORT_ZMIN_14},
+ {17536, R_028344_PA_SC_VPORT_ZMAX_14},
+ {17556, R_028348_PA_SC_VPORT_ZMIN_15},
+ {17576, R_02834C_PA_SC_VPORT_ZMAX_15},
+ {17596, R_028350_PA_SC_RASTER_CONFIG, 15, 1295},
+ {17616, R_028354_PA_SC_RASTER_CONFIG_1, 3, 1310},
+ {17638, R_028358_PA_SC_SCREEN_EXTENT_CONTROL, 2, 1313},
+ {17666, R_028400_VGT_MAX_VTX_INDX},
+ {17683, R_028404_VGT_MIN_VTX_INDX},
+ {17700, R_028408_VGT_INDX_OFFSET},
+ {17716, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX},
+ {17745, R_028414_CB_BLEND_RED},
+ {17758, R_028418_CB_BLEND_GREEN},
+ {17773, R_02841C_CB_BLEND_BLUE},
+ {17787, R_028420_CB_BLEND_ALPHA},
+ {17802, R_028424_CB_DCC_CONTROL, 3, 1315},
+ {17817, R_02842C_DB_STENCIL_CONTROL, 6, 1318},
+ {17836, R_028430_DB_STENCILREFMASK, 4, 1324},
+ {17854, R_028434_DB_STENCILREFMASK_BF, 4, 1328},
+ {17875, R_02843C_PA_CL_VPORT_XSCALE},
+ {17894, R_028440_PA_CL_VPORT_XOFFSET},
+ {17914, R_028444_PA_CL_VPORT_YSCALE},
+ {17933, R_028448_PA_CL_VPORT_YOFFSET},
+ {17953, R_02844C_PA_CL_VPORT_ZSCALE},
+ {17972, R_028450_PA_CL_VPORT_ZOFFSET},
+ {17992, R_028454_PA_CL_VPORT_XSCALE_1},
+ {18013, R_028458_PA_CL_VPORT_XOFFSET_1},
+ {18035, R_02845C_PA_CL_VPORT_YSCALE_1},
+ {18056, R_028460_PA_CL_VPORT_YOFFSET_1},
+ {18078, R_028464_PA_CL_VPORT_ZSCALE_1},
+ {18099, R_028468_PA_CL_VPORT_ZOFFSET_1},
+ {18121, R_02846C_PA_CL_VPORT_XSCALE_2},
+ {18142, R_028470_PA_CL_VPORT_XOFFSET_2},
+ {18164, R_028474_PA_CL_VPORT_YSCALE_2},
+ {18185, R_028478_PA_CL_VPORT_YOFFSET_2},
+ {18207, R_02847C_PA_CL_VPORT_ZSCALE_2},
+ {18228, R_028480_PA_CL_VPORT_ZOFFSET_2},
+ {18250, R_028484_PA_CL_VPORT_XSCALE_3},
+ {18271, R_028488_PA_CL_VPORT_XOFFSET_3},
+ {18293, R_02848C_PA_CL_VPORT_YSCALE_3},
+ {18314, R_028490_PA_CL_VPORT_YOFFSET_3},
+ {18336, R_028494_PA_CL_VPORT_ZSCALE_3},
+ {18357, R_028498_PA_CL_VPORT_ZOFFSET_3},
+ {18379, R_02849C_PA_CL_VPORT_XSCALE_4},
+ {18400, R_0284A0_PA_CL_VPORT_XOFFSET_4},
+ {18422, R_0284A4_PA_CL_VPORT_YSCALE_4},
+ {18443, R_0284A8_PA_CL_VPORT_YOFFSET_4},
+ {18465, R_0284AC_PA_CL_VPORT_ZSCALE_4},
+ {18486, R_0284B0_PA_CL_VPORT_ZOFFSET_4},
+ {18508, R_0284B4_PA_CL_VPORT_XSCALE_5},
+ {18529, R_0284B8_PA_CL_VPORT_XOFFSET_5},
+ {18551, R_0284BC_PA_CL_VPORT_YSCALE_5},
+ {18572, R_0284C0_PA_CL_VPORT_YOFFSET_5},
+ {18594, R_0284C4_PA_CL_VPORT_ZSCALE_5},
+ {18615, R_0284C8_PA_CL_VPORT_ZOFFSET_5},
+ {18637, R_0284CC_PA_CL_VPORT_XSCALE_6},
+ {18658, R_0284D0_PA_CL_VPORT_XOFFSET_6},
+ {18680, R_0284D4_PA_CL_VPORT_YSCALE_6},
+ {18701, R_0284D8_PA_CL_VPORT_YOFFSET_6},
+ {18723, R_0284DC_PA_CL_VPORT_ZSCALE_6},
+ {18744, R_0284E0_PA_CL_VPORT_ZOFFSET_6},
+ {18766, R_0284E4_PA_CL_VPORT_XSCALE_7},
+ {18787, R_0284E8_PA_CL_VPORT_XOFFSET_7},
+ {18809, R_0284EC_PA_CL_VPORT_YSCALE_7},
+ {18830, R_0284F0_PA_CL_VPORT_YOFFSET_7},
+ {18852, R_0284F4_PA_CL_VPORT_ZSCALE_7},
+ {18873, R_0284F8_PA_CL_VPORT_ZOFFSET_7},
+ {18895, R_0284FC_PA_CL_VPORT_XSCALE_8},
+ {18916, R_028500_PA_CL_VPORT_XOFFSET_8},
+ {18938, R_028504_PA_CL_VPORT_YSCALE_8},
+ {18959, R_028508_PA_CL_VPORT_YOFFSET_8},
+ {18981, R_02850C_PA_CL_VPORT_ZSCALE_8},
+ {19002, R_028510_PA_CL_VPORT_ZOFFSET_8},
+ {19024, R_028514_PA_CL_VPORT_XSCALE_9},
+ {19045, R_028518_PA_CL_VPORT_XOFFSET_9},
+ {19067, R_02851C_PA_CL_VPORT_YSCALE_9},
+ {19088, R_028520_PA_CL_VPORT_YOFFSET_9},
+ {19110, R_028524_PA_CL_VPORT_ZSCALE_9},
+ {19131, R_028528_PA_CL_VPORT_ZOFFSET_9},
+ {19153, R_02852C_PA_CL_VPORT_XSCALE_10},
+ {19175, R_028530_PA_CL_VPORT_XOFFSET_10},
+ {19198, R_028534_PA_CL_VPORT_YSCALE_10},
+ {19220, R_028538_PA_CL_VPORT_YOFFSET_10},
+ {19243, R_02853C_PA_CL_VPORT_ZSCALE_10},
+ {19265, R_028540_PA_CL_VPORT_ZOFFSET_10},
+ {19288, R_028544_PA_CL_VPORT_XSCALE_11},
+ {19310, R_028548_PA_CL_VPORT_XOFFSET_11},
+ {19333, R_02854C_PA_CL_VPORT_YSCALE_11},
+ {19355, R_028550_PA_CL_VPORT_YOFFSET_11},
+ {19378, R_028554_PA_CL_VPORT_ZSCALE_11},
+ {19400, R_028558_PA_CL_VPORT_ZOFFSET_11},
+ {19423, R_02855C_PA_CL_VPORT_XSCALE_12},
+ {19445, R_028560_PA_CL_VPORT_XOFFSET_12},
+ {19468, R_028564_PA_CL_VPORT_YSCALE_12},
+ {19490, R_028568_PA_CL_VPORT_YOFFSET_12},
+ {19513, R_02856C_PA_CL_VPORT_ZSCALE_12},
+ {19535, R_028570_PA_CL_VPORT_ZOFFSET_12},
+ {19558, R_028574_PA_CL_VPORT_XSCALE_13},
+ {19580, R_028578_PA_CL_VPORT_XOFFSET_13},
+ {19603, R_02857C_PA_CL_VPORT_YSCALE_13},
+ {19625, R_028580_PA_CL_VPORT_YOFFSET_13},
+ {19648, R_028584_PA_CL_VPORT_ZSCALE_13},
+ {19670, R_028588_PA_CL_VPORT_ZOFFSET_13},
+ {19693, R_02858C_PA_CL_VPORT_XSCALE_14},
+ {19715, R_028590_PA_CL_VPORT_XOFFSET_14},
+ {19738, R_028594_PA_CL_VPORT_YSCALE_14},
+ {19760, R_028598_PA_CL_VPORT_YOFFSET_14},
+ {19783, R_02859C_PA_CL_VPORT_ZSCALE_14},
+ {19805, R_0285A0_PA_CL_VPORT_ZOFFSET_14},
+ {19828, R_0285A4_PA_CL_VPORT_XSCALE_15},
+ {19850, R_0285A8_PA_CL_VPORT_XOFFSET_15},
+ {19873, R_0285AC_PA_CL_VPORT_YSCALE_15},
+ {19895, R_0285B0_PA_CL_VPORT_YOFFSET_15},
+ {19918, R_0285B4_PA_CL_VPORT_ZSCALE_15},
+ {19940, R_0285B8_PA_CL_VPORT_ZOFFSET_15},
+ {19963, R_0285BC_PA_CL_UCP_0_X},
+ {19977, R_0285C0_PA_CL_UCP_0_Y},
+ {19991, R_0285C4_PA_CL_UCP_0_Z},
+ {20005, R_0285C8_PA_CL_UCP_0_W},
+ {20019, R_0285CC_PA_CL_UCP_1_X},
+ {20033, R_0285D0_PA_CL_UCP_1_Y},
+ {20047, R_0285D4_PA_CL_UCP_1_Z},
+ {20061, R_0285D8_PA_CL_UCP_1_W},
+ {20075, R_0285DC_PA_CL_UCP_2_X},
+ {20089, R_0285E0_PA_CL_UCP_2_Y},
+ {20103, R_0285E4_PA_CL_UCP_2_Z},
+ {20117, R_0285E8_PA_CL_UCP_2_W},
+ {20131, R_0285EC_PA_CL_UCP_3_X},
+ {20145, R_0285F0_PA_CL_UCP_3_Y},
+ {20159, R_0285F4_PA_CL_UCP_3_Z},
+ {20173, R_0285F8_PA_CL_UCP_3_W},
+ {20187, R_0285FC_PA_CL_UCP_4_X},
+ {20201, R_028600_PA_CL_UCP_4_Y},
+ {20215, R_028604_PA_CL_UCP_4_Z},
+ {20229, R_028608_PA_CL_UCP_4_W},
+ {20243, R_02860C_PA_CL_UCP_5_X},
+ {20257, R_028610_PA_CL_UCP_5_Y},
+ {20271, R_028614_PA_CL_UCP_5_Z},
+ {20285, R_028618_PA_CL_UCP_5_W},
+ {20299, R_028644_SPI_PS_INPUT_CNTL_0, 12, 1332},
+ {20319, R_028648_SPI_PS_INPUT_CNTL_1, 12, 1332},
+ {20339, R_02864C_SPI_PS_INPUT_CNTL_2, 12, 1332},
+ {20359, R_028650_SPI_PS_INPUT_CNTL_3, 12, 1332},
+ {20379, R_028654_SPI_PS_INPUT_CNTL_4, 12, 1332},
+ {20399, R_028658_SPI_PS_INPUT_CNTL_5, 12, 1332},
+ {20419, R_02865C_SPI_PS_INPUT_CNTL_6, 12, 1332},
+ {20439, R_028660_SPI_PS_INPUT_CNTL_7, 12, 1332},
+ {20459, R_028664_SPI_PS_INPUT_CNTL_8, 12, 1332},
+ {20479, R_028668_SPI_PS_INPUT_CNTL_9, 12, 1332},
+ {20499, R_02866C_SPI_PS_INPUT_CNTL_10, 12, 1332},
+ {20520, R_028670_SPI_PS_INPUT_CNTL_11, 12, 1332},
+ {20541, R_028674_SPI_PS_INPUT_CNTL_12, 12, 1332},
+ {20562, R_028678_SPI_PS_INPUT_CNTL_13, 12, 1332},
+ {20583, R_02867C_SPI_PS_INPUT_CNTL_14, 12, 1332},
+ {20604, R_028680_SPI_PS_INPUT_CNTL_15, 12, 1332},
+ {20625, R_028684_SPI_PS_INPUT_CNTL_16, 12, 1332},
+ {20646, R_028688_SPI_PS_INPUT_CNTL_17, 12, 1332},
+ {20667, R_02868C_SPI_PS_INPUT_CNTL_18, 12, 1332},
+ {20688, R_028690_SPI_PS_INPUT_CNTL_19, 12, 1332},
+ {20709, R_028694_SPI_PS_INPUT_CNTL_20, 12, 1332},
+ {20730, R_028698_SPI_PS_INPUT_CNTL_21, 12, 1332},
+ {20751, R_02869C_SPI_PS_INPUT_CNTL_22, 12, 1332},
+ {20772, R_0286A0_SPI_PS_INPUT_CNTL_23, 12, 1332},
+ {20793, R_0286A4_SPI_PS_INPUT_CNTL_24, 12, 1332},
+ {20814, R_0286A8_SPI_PS_INPUT_CNTL_25, 12, 1332},
+ {20835, R_0286AC_SPI_PS_INPUT_CNTL_26, 12, 1332},
+ {20856, R_0286B0_SPI_PS_INPUT_CNTL_27, 12, 1332},
+ {20877, R_0286B4_SPI_PS_INPUT_CNTL_28, 12, 1332},
+ {20898, R_0286B8_SPI_PS_INPUT_CNTL_29, 12, 1332},
+ {20919, R_0286BC_SPI_PS_INPUT_CNTL_30, 12, 1332},
+ {20940, R_0286C0_SPI_PS_INPUT_CNTL_31, 12, 1332},
+ {20961, R_0286C4_SPI_VS_OUT_CONFIG, 4, 1344},
+ {20979, R_0286CC_SPI_PS_INPUT_ENA, 16, 1348},
+ {20996, R_0286D0_SPI_PS_INPUT_ADDR, 16, 1364},
+ {21014, R_0286D4_SPI_INTERP_CONTROL_0, 7, 1380},
+ {21035, R_0286D8_SPI_PS_IN_CONTROL, 5, 1387},
+ {21053, R_0286E0_SPI_BARYC_CNTL, 7, 1392},
+ {21068, R_0286E8_SPI_TMPRING_SIZE, 2, 1399},
+ {21085, R_028704_SPI_WAVE_MGMT_1, 5, 1401},
+ {21101, R_028708_SPI_WAVE_MGMT_2, 1, 1406},
+ {21117, R_02870C_SPI_SHADER_POS_FORMAT, 4, 1407},
+ {21139, R_028710_SPI_SHADER_Z_FORMAT, 1, 1411},
+ {21159, R_028714_SPI_SHADER_COL_FORMAT, 8, 1412},
+ {21181, R_028754_SX_PS_DOWNCONVERT, 8, 1420},
+ {21199, R_028758_SX_BLEND_OPT_EPSILON, 8, 1428},
+ {21220, R_02875C_SX_BLEND_OPT_CONTROL, 17, 1436},
+ {21241, R_028760_SX_MRT0_BLEND_OPT, 6, 1453},
+ {21259, R_028764_SX_MRT1_BLEND_OPT, 6, 1459},
+ {21277, R_028768_SX_MRT2_BLEND_OPT, 6, 1465},
+ {21295, R_02876C_SX_MRT3_BLEND_OPT, 6, 1471},
+ {21313, R_028770_SX_MRT4_BLEND_OPT, 6, 1477},
+ {21331, R_028774_SX_MRT5_BLEND_OPT, 6, 1483},
+ {21349, R_028778_SX_MRT6_BLEND_OPT, 6, 1489},
+ {21367, R_02877C_SX_MRT7_BLEND_OPT, 6, 1495},
+ {21385, R_028780_CB_BLEND0_CONTROL, 9, 1501},
+ {21403, R_028784_CB_BLEND1_CONTROL, 9, 1501},
+ {21421, R_028788_CB_BLEND2_CONTROL, 9, 1501},
+ {21439, R_02878C_CB_BLEND3_CONTROL, 9, 1501},
+ {21457, R_028790_CB_BLEND4_CONTROL, 9, 1501},
+ {21475, R_028794_CB_BLEND5_CONTROL, 9, 1501},
+ {21493, R_028798_CB_BLEND6_CONTROL, 9, 1501},
+ {21511, R_02879C_CB_BLEND7_CONTROL, 9, 1501},
+ {21529, R_0287CC_CS_COPY_STATE, 1, 1510},
+ {21543, R_0287D4_PA_CL_POINT_X_RAD},
+ {21561, R_0287D8_PA_CL_POINT_Y_RAD},
+ {21579, R_0287DC_PA_CL_POINT_SIZE},
+ {21596, R_0287E0_PA_CL_POINT_CULL_RAD},
+ {21617, R_0287E4_VGT_DMA_BASE_HI, 1, 1511},
+ {21633, R_0287E8_VGT_DMA_BASE},
+ {21646, R_0287F0_VGT_DRAW_INITIATOR, 4, 1512},
+ {21665, R_0287F4_VGT_IMMED_DATA},
+ {21680, R_0287F8_VGT_EVENT_ADDRESS_REG, 1, 1516},
+ {21702, R_028800_DB_DEPTH_CONTROL, 10, 1517},
+ {21719, R_028804_DB_EQAA, 12, 1527},
+ {21727, R_028808_CB_COLOR_CONTROL, 4, 1539},
+ {21744, R_02880C_DB_SHADER_CONTROL, 13, 1543},
+ {21762, R_028810_PA_CL_CLIP_CNTL, 19, 1556},
+ {21778, R_028814_PA_SU_SC_MODE_CNTL, 13, 1575},
+ {21797, R_028818_PA_CL_VTE_CNTL, 9, 1588},
+ {21812, R_02881C_PA_CL_VS_OUT_CNTL, 27, 1597},
+ {21830, R_028820_PA_CL_NANINF_CNTL, 16, 1624},
+ {21848, R_028824_PA_SU_LINE_STIPPLE_CNTL, 4, 1640},
+ {21872, R_028828_PA_SU_LINE_STIPPLE_SCALE},
+ {21897, R_02882C_PA_SU_PRIM_FILTER_CNTL, 11, 1644},
+ {21920, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 5, 1655},
+ {21949, R_028A00_PA_SU_POINT_SIZE, 2, 1660},
+ {21966, R_028A04_PA_SU_POINT_MINMAX, 2, 1662},
+ {21985, R_028A08_PA_SU_LINE_CNTL, 1, 1664},
+ {22001, R_028A0C_PA_SC_LINE_STIPPLE, 4, 1665},
+ {22020, R_028A10_VGT_OUTPUT_PATH_CNTL, 1, 1669},
+ {22041, R_028A14_VGT_HOS_CNTL, 1, 1670},
+ {22054, R_028A18_VGT_HOS_MAX_TESS_LEVEL},
+ {22077, R_028A1C_VGT_HOS_MIN_TESS_LEVEL},
+ {22100, R_028A20_VGT_HOS_REUSE_DEPTH, 1, 1671},
+ {22120, R_028A24_VGT_GROUP_PRIM_TYPE, 4, 1672},
+ {22140, R_028A28_VGT_GROUP_FIRST_DECR, 1, 1676},
+ {22161, R_028A2C_VGT_GROUP_DECR, 1, 1677},
+ {22176, R_028A30_VGT_GROUP_VECT_0_CNTL, 6, 1678},
+ {22198, R_028A34_VGT_GROUP_VECT_1_CNTL, 6, 1684},
+ {22220, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 8, 1690},
+ {22246, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 8, 1698},
+ {22272, R_028A40_VGT_GS_MODE, 15, 1706},
+ {22284, R_028A44_VGT_GS_ONCHIP_CNTL, 2, 1721},
+ {22303, R_028A48_PA_SC_MODE_CNTL_0, 4, 1723},
+ {22321, R_028A4C_PA_SC_MODE_CNTL_1, 24, 1727},
+ {22339, R_028A50_VGT_ENHANCE},
+ {22351, R_028A54_VGT_GS_PER_ES, 1, 1751},
+ {22365, R_028A58_VGT_ES_PER_GS, 1, 1752},
+ {22379, R_028A5C_VGT_GS_PER_VS, 1, 1753},
+ {22393, R_028A60_VGT_GSVS_RING_OFFSET_1, 1, 1754},
+ {22416, R_028A64_VGT_GSVS_RING_OFFSET_2, 1, 1755},
+ {22439, R_028A68_VGT_GSVS_RING_OFFSET_3, 1, 1756},
+ {22462, R_028A6C_VGT_GS_OUT_PRIM_TYPE, 5, 1757},
+ {22483, R_028A70_IA_ENHANCE},
+ {22494, R_028A74_VGT_DMA_SIZE},
+ {22507, R_028A78_VGT_DMA_MAX_SIZE},
+ {22524, R_028A7C_VGT_DMA_INDEX_TYPE, 9, 1762},
+ {22543, R_028A80_WD_ENHANCE},
+ {22554, R_028A84_VGT_PRIMITIVEID_EN, 2, 1771},
+ {22573, R_028A88_VGT_DMA_NUM_INSTANCES},
+ {22595, R_028A8C_VGT_PRIMITIVEID_RESET},
+ {22617, R_028A90_VGT_EVENT_INITIATOR, 3, 1773},
+ {22637, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 1, 1776},
+ {22664, R_028AA0_VGT_INSTANCE_STEP_RATE_0},
+ {22689, R_028AA4_VGT_INSTANCE_STEP_RATE_1},
+ {22714, R_028AA8_IA_MULTI_VGT_PARAM, 7, 1777},
+ {22733, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1, 1784},
+ {22756, R_028AB0_VGT_GSVS_RING_ITEMSIZE, 1, 1785},
+ {22779, R_028AB4_VGT_REUSE_OFF, 1, 1786},
+ {22793, R_028AB8_VGT_VTX_CNT_EN, 1, 1787},
+ {22808, R_028ABC_DB_HTILE_SURFACE, 8, 1788},
+ {22825, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 4, 1796},
+ {22852, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 4, 1800},
+ {22879, R_028AC8_DB_PRELOAD_CONTROL, 4, 1804},
+ {22898, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0},
+ {22924, R_028AD4_VGT_STRMOUT_VTX_STRIDE_0, 1, 1808},
+ {22949, R_028ADC_VGT_STRMOUT_BUFFER_OFFSET_0},
+ {22977, R_028AE0_VGT_STRMOUT_BUFFER_SIZE_1},
+ {23003, R_028AE4_VGT_STRMOUT_VTX_STRIDE_1, 1, 1809},
+ {23028, R_028AEC_VGT_STRMOUT_BUFFER_OFFSET_1},
+ {23056, R_028AF0_VGT_STRMOUT_BUFFER_SIZE_2},
+ {23082, R_028AF4_VGT_STRMOUT_VTX_STRIDE_2, 1, 1810},
+ {23107, R_028AFC_VGT_STRMOUT_BUFFER_OFFSET_2},
+ {23135, R_028B00_VGT_STRMOUT_BUFFER_SIZE_3},
+ {23161, R_028B04_VGT_STRMOUT_VTX_STRIDE_3, 1, 1811},
+ {23186, R_028B0C_VGT_STRMOUT_BUFFER_OFFSET_3},
+ {23214, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET},
+ {23245, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE},
+ {23288, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, 1, 1812},
+ {23326, R_028B38_VGT_GS_MAX_VERT_OUT, 1, 1813},
+ {23346, R_028B50_VGT_TESS_DISTRIBUTION, 5, 1814},
+ {23368, R_028B54_VGT_SHADER_STAGES_EN, 10, 1819},
+ {23389, R_028B58_VGT_LS_HS_CONFIG, 3, 1829},
+ {23406, R_028B5C_VGT_GS_VERT_ITEMSIZE, 1, 1832},
+ {23427, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 1, 1833},
+ {23450, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 1, 1834},
+ {23473, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 1, 1835},
+ {23496, R_028B6C_VGT_TF_PARAM, 11, 1836},
+ {23509, R_028B70_DB_ALPHA_TO_MASK, 6, 1847},
+ {23526, R_028B74_VGT_DISPATCH_DRAW_INDEX},
+ {23550, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2, 1853},
+ {23580, R_028B7C_PA_SU_POLY_OFFSET_CLAMP},
+ {23604, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE},
+ {23634, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET},
+ {23665, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE},
+ {23694, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET},
+ {23724, R_028B90_VGT_GS_INSTANCE_CNT, 2, 1855},
+ {23744, R_028B94_VGT_STRMOUT_CONFIG, 7, 1857},
+ {23763, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 4, 1864},
+ {23789, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 8, 1868},
+ {23815, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 8, 1876},
+ {23841, R_028BDC_PA_SC_LINE_CNTL, 4, 1884},
+ {23857, R_028BE0_PA_SC_AA_CONFIG, 5, 1888},
+ {23873, R_028BE4_PA_SU_VTX_CNTL, 3, 1893},
+ {23888, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ},
+ {23911, R_028BEC_PA_CL_GB_VERT_DISC_ADJ},
+ {23934, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ},
+ {23957, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ},
+ {23980, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 8, 1896},
+ {24014, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, 8, 1904},
+ {24048, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 8, 1912},
+ {24082, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 8, 1920},
+ {24116, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 8, 1928},
+ {24150, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, 8, 1936},
+ {24184, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 8, 1944},
+ {24218, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 8, 1952},
+ {24252, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 8, 1960},
+ {24286, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, 8, 1968},
+ {24320, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 8, 1976},
+ {24354, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 8, 1984},
+ {24388, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 8, 1992},
+ {24422, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, 8, 2000},
+ {24456, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, 8, 2008},
+ {24490, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, 8, 2016},
+ {24524, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2, 2024},
+ {24548, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 2, 2026},
+ {24572, R_028C40_PA_SC_SHADER_CONTROL, 1, 2028},
+ {24593, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 1, 2029},
+ {24621, R_028C5C_VGT_OUT_DEALLOC_CNTL, 1, 2030},
+ {24642, R_028C60_CB_COLOR0_BASE},
+ {24657, R_028C64_CB_COLOR0_PITCH, 2, 2031},
+ {24673, R_028C68_CB_COLOR0_SLICE, 1, 2033},
+ {24689, R_028C6C_CB_COLOR0_VIEW, 2, 2034},
+ {24704, R_028C70_CB_COLOR0_INFO, 18, 2036},
+ {24719, R_028C74_CB_COLOR0_ATTRIB, 6, 2054},
+ {24736, R_028C78_CB_COLOR0_DCC_CONTROL, 9, 2060},
+ {24758, R_028C7C_CB_COLOR0_CMASK},
+ {24774, R_028C80_CB_COLOR0_CMASK_SLICE, 1, 2069},
+ {24796, R_028C84_CB_COLOR0_FMASK},
+ {24812, R_028C88_CB_COLOR0_FMASK_SLICE, 1, 2070},
+ {24834, R_028C8C_CB_COLOR0_CLEAR_WORD0},
+ {24856, R_028C90_CB_COLOR0_CLEAR_WORD1},
+ {24878, R_028C94_CB_COLOR0_DCC_BASE},
+ {24897, R_028C9C_CB_COLOR1_BASE},
+ {24912, R_028CA0_CB_COLOR1_PITCH, 2, 2031},
+ {24928, R_028CA4_CB_COLOR1_SLICE, 1, 2033},
+ {24944, R_028CA8_CB_COLOR1_VIEW, 2, 2034},
+ {24959, R_028CAC_CB_COLOR1_INFO, 18, 2036},
+ {24974, R_028CB0_CB_COLOR1_ATTRIB, 6, 2054},
+ {24991, R_028CB4_CB_COLOR1_DCC_CONTROL, 9, 2060},
+ {25013, R_028CB8_CB_COLOR1_CMASK},
+ {25029, R_028CBC_CB_COLOR1_CMASK_SLICE, 1, 2069},
+ {25051, R_028CC0_CB_COLOR1_FMASK},
+ {25067, R_028CC4_CB_COLOR1_FMASK_SLICE, 1, 2070},
+ {25089, R_028CC8_CB_COLOR1_CLEAR_WORD0},
+ {25111, R_028CCC_CB_COLOR1_CLEAR_WORD1},
+ {25133, R_028CD0_CB_COLOR1_DCC_BASE},
+ {25152, R_028CD8_CB_COLOR2_BASE},
+ {25167, R_028CDC_CB_COLOR2_PITCH, 2, 2031},
+ {25183, R_028CE0_CB_COLOR2_SLICE, 1, 2033},
+ {25199, R_028CE4_CB_COLOR2_VIEW, 2, 2034},
+ {25214, R_028CE8_CB_COLOR2_INFO, 18, 2036},
+ {25229, R_028CEC_CB_COLOR2_ATTRIB, 6, 2054},
+ {25246, R_028CF0_CB_COLOR2_DCC_CONTROL, 9, 2060},
+ {25268, R_028CF4_CB_COLOR2_CMASK},
+ {25284, R_028CF8_CB_COLOR2_CMASK_SLICE, 1, 2069},
+ {25306, R_028CFC_CB_COLOR2_FMASK},
+ {25322, R_028D00_CB_COLOR2_FMASK_SLICE, 1, 2070},
+ {25344, R_028D04_CB_COLOR2_CLEAR_WORD0},
+ {25366, R_028D08_CB_COLOR2_CLEAR_WORD1},
+ {25388, R_028D0C_CB_COLOR2_DCC_BASE},
+ {25407, R_028D14_CB_COLOR3_BASE},
+ {25422, R_028D18_CB_COLOR3_PITCH, 2, 2031},
+ {25438, R_028D1C_CB_COLOR3_SLICE, 1, 2033},
+ {25454, R_028D20_CB_COLOR3_VIEW, 2, 2034},
+ {25469, R_028D24_CB_COLOR3_INFO, 18, 2036},
+ {25484, R_028D28_CB_COLOR3_ATTRIB, 6, 2054},
+ {25501, R_028D2C_CB_COLOR3_DCC_CONTROL, 9, 2060},
+ {25523, R_028D30_CB_COLOR3_CMASK},
+ {25539, R_028D34_CB_COLOR3_CMASK_SLICE, 1, 2069},
+ {25561, R_028D38_CB_COLOR3_FMASK},
+ {25577, R_028D3C_CB_COLOR3_FMASK_SLICE, 1, 2070},
+ {25599, R_028D40_CB_COLOR3_CLEAR_WORD0},
+ {25621, R_028D44_CB_COLOR3_CLEAR_WORD1},
+ {25643, R_028D48_CB_COLOR3_DCC_BASE},
+ {25662, R_028D50_CB_COLOR4_BASE},
+ {25677, R_028D54_CB_COLOR4_PITCH, 2, 2031},
+ {25693, R_028D58_CB_COLOR4_SLICE, 1, 2033},
+ {25709, R_028D5C_CB_COLOR4_VIEW, 2, 2034},
+ {25724, R_028D60_CB_COLOR4_INFO, 18, 2036},
+ {25739, R_028D64_CB_COLOR4_ATTRIB, 6, 2054},
+ {25756, R_028D68_CB_COLOR4_DCC_CONTROL, 9, 2060},
+ {25778, R_028D6C_CB_COLOR4_CMASK},
+ {25794, R_028D70_CB_COLOR4_CMASK_SLICE, 1, 2069},
+ {25816, R_028D74_CB_COLOR4_FMASK},
+ {25832, R_028D78_CB_COLOR4_FMASK_SLICE, 1, 2070},
+ {25854, R_028D7C_CB_COLOR4_CLEAR_WORD0},
+ {25876, R_028D80_CB_COLOR4_CLEAR_WORD1},
+ {25898, R_028D84_CB_COLOR4_DCC_BASE},
+ {25917, R_028D8C_CB_COLOR5_BASE},
+ {25932, R_028D90_CB_COLOR5_PITCH, 2, 2031},
+ {25948, R_028D94_CB_COLOR5_SLICE, 1, 2033},
+ {25964, R_028D98_CB_COLOR5_VIEW, 2, 2034},
+ {25979, R_028D9C_CB_COLOR5_INFO, 18, 2036},
+ {25994, R_028DA0_CB_COLOR5_ATTRIB, 6, 2054},
+ {26011, R_028DA4_CB_COLOR5_DCC_CONTROL, 9, 2060},
+ {26033, R_028DA8_CB_COLOR5_CMASK},
+ {26049, R_028DAC_CB_COLOR5_CMASK_SLICE, 1, 2069},
+ {26071, R_028DB0_CB_COLOR5_FMASK},
+ {26087, R_028DB4_CB_COLOR5_FMASK_SLICE, 1, 2070},
+ {26109, R_028DB8_CB_COLOR5_CLEAR_WORD0},
+ {26131, R_028DBC_CB_COLOR5_CLEAR_WORD1},
+ {26153, R_028DC0_CB_COLOR5_DCC_BASE},
+ {26172, R_028DC8_CB_COLOR6_BASE},
+ {26187, R_028DCC_CB_COLOR6_PITCH, 2, 2031},
+ {26203, R_028DD0_CB_COLOR6_SLICE, 1, 2033},
+ {26219, R_028DD4_CB_COLOR6_VIEW, 2, 2034},
+ {26234, R_028DD8_CB_COLOR6_INFO, 18, 2036},
+ {26249, R_028DDC_CB_COLOR6_ATTRIB, 6, 2054},
+ {26266, R_028DE0_CB_COLOR6_DCC_CONTROL, 9, 2060},
+ {26288, R_028DE4_CB_COLOR6_CMASK},
+ {26304, R_028DE8_CB_COLOR6_CMASK_SLICE, 1, 2069},
+ {26326, R_028DEC_CB_COLOR6_FMASK},
+ {26342, R_028DF0_CB_COLOR6_FMASK_SLICE, 1, 2070},
+ {26364, R_028DF4_CB_COLOR6_CLEAR_WORD0},
+ {26386, R_028DF8_CB_COLOR6_CLEAR_WORD1},
+ {26408, R_028DFC_CB_COLOR6_DCC_BASE},
+ {26427, R_028E04_CB_COLOR7_BASE},
+ {26442, R_028E08_CB_COLOR7_PITCH, 2, 2031},
+ {26458, R_028E0C_CB_COLOR7_SLICE, 1, 2033},
+ {26474, R_028E10_CB_COLOR7_VIEW, 2, 2034},
+ {26489, R_028E14_CB_COLOR7_INFO, 18, 2036},
+ {26504, R_028E18_CB_COLOR7_ATTRIB, 6, 2054},
+ {26521, R_028E1C_CB_COLOR7_DCC_CONTROL, 9, 2060},
+ {26543, R_028E20_CB_COLOR7_CMASK},
+ {26559, R_028E24_CB_COLOR7_CMASK_SLICE, 1, 2069},
+ {26581, R_028E28_CB_COLOR7_FMASK},
+ {26597, R_028E2C_CB_COLOR7_FMASK_SLICE, 1, 2070},
+ {26619, R_028E30_CB_COLOR7_CLEAR_WORD0},
+ {26641, R_028E34_CB_COLOR7_CLEAR_WORD1},
+ {26663, R_028E38_CB_COLOR7_DCC_BASE},
+};
+
+static const struct si_reg gfx9d_reg_table[] = {
+ {1059, R_008008_GRBM_STATUS2, 25, 2071},
+ {1072, R_008010_GRBM_STATUS, 24, 2096},
+ {1142, R_008014_GRBM_STATUS_SE0, 12, 2120},
+ {1158, R_008018_GRBM_STATUS_SE1, 12, 2132},
+ {1174, R_008038_GRBM_STATUS_SE2, 12, 2144},
+ {1190, R_00803C_GRBM_STATUS_SE3, 12, 2156},
+ {1100, R_0301F0_CP_COHER_CNTL, 13, 2168},
+ {1244, R_0301FC_CP_COHER_STATUS, 2, 2181},
+ {1260, R_008210_CP_CPC_STATUS, 16, 2183},
+ {1291, R_008218_CP_CPC_STALLED_STAT1, 14, 2199},
+ {1312, R_00821C_CP_CPF_STATUS, 21, 2213},
+ {1343, R_008224_CP_CPF_STALLED_STAT1, 11, 2234},
+ {1806, R_008670_CP_STALLED_STAT3, 19, 2245},
+ {1857, R_008680_CP_STAT, 22, 2264},
+ {1485, R_030908_VGT_PRIMITIVE_TYPE, 1, 2286},
+ {1504, R_03090C_VGT_INDEX_TYPE, 2, 2287},
+ {17666, R_030920_VGT_MAX_VTX_INDX},
+ {17683, R_030924_VGT_MIN_VTX_INDX},
+ {17700, R_030928_VGT_INDX_OFFSET},
+ {22637, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, 2, 2289},
+ {1702, R_03093C_VGT_HS_OFFCHIP_PARAM, 2, 2291},
+ {26682, R_030944_VGT_TF_MEMORY_BASE_HI, 1, 2293},
+ {26704, R_030948_WD_POS_BUF_BASE},
+ {26720, R_03094C_WD_POS_BUF_BASE_HI, 1, 2294},
+ {26739, R_030950_WD_CNTL_SB_BUF_BASE},
+ {26759, R_030954_WD_CNTL_SB_BUF_BASE_HI, 1, 2295},
+ {26782, R_030958_WD_INDEX_BUF_BASE},
+ {26800, R_03095C_WD_INDEX_BUF_BASE_HI, 1, 2296},
+ {22714, R_030960_IA_MULTI_VGT_PARAM, 9, 2297},
+ {26821, R_030964_VGT_OBJECT_ID},
+ {26835, R_030968_VGT_INSTANCE_BASE_ID},
+ {1998, R_030D20_SQC_CACHES, 6, 2306},
+ {26856, R_030D24_SQC_WRITEBACK, 2, 2312},
+ {26870, R_030E08_TA_GRAD_ADJ_UCONFIG, 4, 2314},
+ {2226, R_008F0C_SQ_BUF_RSRC_WORD3, 12, 2318},
+ {2309, R_008F14_SQ_IMG_RSRC_WORD1, 9, 2330},
+ {2351, R_008F18_SQ_IMG_RSRC_WORD2, 3, 552},
+ {2392, R_008F1C_SQ_IMG_RSRC_WORD3, 8, 2339},
+ {2410, R_008F20_SQ_IMG_RSRC_WORD4, 3, 2347},
+ {2428, R_008F24_SQ_IMG_RSRC_WORD5, 7, 2350},
+ {2518, R_008F38_SQ_IMG_SAMP_WORD2, 10, 2357},
+ {2536, R_008F3C_SQ_IMG_SAMP_WORD3, 3, 2367},
+ {2800, R_031100_SPI_CONFIG_CNTL, 9, 2370},
+ {2816, R_031104_SPI_CONFIG_CNTL_1, 11, 2379},
+ {26890, R_031108_SPI_CONFIG_CNTL_2, 2, 2390},
+ {2882, R_0098F8_GB_ADDR_CONFIG, 13, 2392},
+ {2897, R_009910_GB_TILE_MODE0, 5, 2405},
+ {2911, R_009914_GB_TILE_MODE1, 5, 2405},
+ {2925, R_009918_GB_TILE_MODE2, 5, 2405},
+ {2939, R_00991C_GB_TILE_MODE3, 5, 2405},
+ {2953, R_009920_GB_TILE_MODE4, 5, 2405},
+ {2967, R_009924_GB_TILE_MODE5, 5, 2405},
+ {2981, R_009928_GB_TILE_MODE6, 5, 2405},
+ {2995, R_00992C_GB_TILE_MODE7, 5, 2405},
+ {3009, R_009930_GB_TILE_MODE8, 5, 2405},
+ {3023, R_009934_GB_TILE_MODE9, 5, 2405},
+ {3037, R_009938_GB_TILE_MODE10, 5, 2405},
+ {3052, R_00993C_GB_TILE_MODE11, 5, 2405},
+ {3067, R_009940_GB_TILE_MODE12, 5, 2405},
+ {3082, R_009944_GB_TILE_MODE13, 5, 2405},
+ {3097, R_009948_GB_TILE_MODE14, 5, 2405},
+ {3112, R_00994C_GB_TILE_MODE15, 5, 2405},
+ {3127, R_009950_GB_TILE_MODE16, 5, 2405},
+ {3142, R_009954_GB_TILE_MODE17, 5, 2405},
+ {3157, R_009958_GB_TILE_MODE18, 5, 2405},
+ {3172, R_00995C_GB_TILE_MODE19, 5, 2405},
+ {3187, R_009960_GB_TILE_MODE20, 5, 2405},
+ {3202, R_009964_GB_TILE_MODE21, 5, 2405},
+ {3217, R_009968_GB_TILE_MODE22, 5, 2405},
+ {3232, R_00996C_GB_TILE_MODE23, 5, 2405},
+ {3247, R_009970_GB_TILE_MODE24, 5, 2405},
+ {3262, R_009974_GB_TILE_MODE25, 5, 2405},
+ {3277, R_009978_GB_TILE_MODE26, 5, 2405},
+ {3292, R_00997C_GB_TILE_MODE27, 5, 2405},
+ {3307, R_009980_GB_TILE_MODE28, 5, 2405},
+ {3322, R_009984_GB_TILE_MODE29, 5, 2405},
+ {3337, R_009988_GB_TILE_MODE30, 5, 2405},
+ {3352, R_00998C_GB_TILE_MODE31, 5, 2405},
+ {3761, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 4, 2410},
+ {3827, R_00B028_SPI_SHADER_PGM_RSRC1_PS, 11, 2414},
+ {3851, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 10, 2425},
+ {26908, R_00B070_SPI_SHADER_USER_DATA_PS_16},
+ {26935, R_00B074_SPI_SHADER_USER_DATA_PS_17},
+ {26962, R_00B078_SPI_SHADER_USER_DATA_PS_18},
+ {26989, R_00B07C_SPI_SHADER_USER_DATA_PS_19},
+ {27016, R_00B080_SPI_SHADER_USER_DATA_PS_20},
+ {27043, R_00B084_SPI_SHADER_USER_DATA_PS_21},
+ {27070, R_00B088_SPI_SHADER_USER_DATA_PS_22},
+ {27097, R_00B08C_SPI_SHADER_USER_DATA_PS_23},
+ {27124, R_00B090_SPI_SHADER_USER_DATA_PS_24},
+ {27151, R_00B094_SPI_SHADER_USER_DATA_PS_25},
+ {27178, R_00B098_SPI_SHADER_USER_DATA_PS_26},
+ {27205, R_00B09C_SPI_SHADER_USER_DATA_PS_27},
+ {27232, R_00B0A0_SPI_SHADER_USER_DATA_PS_28},
+ {27259, R_00B0A4_SPI_SHADER_USER_DATA_PS_29},
+ {27286, R_00B0A8_SPI_SHADER_USER_DATA_PS_30},
+ {27313, R_00B0AC_SPI_SHADER_USER_DATA_PS_31},
+ {4381, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 4, 2435},
+ {4472, R_00B128_SPI_SHADER_PGM_RSRC1_VS, 12, 2439},
+ {4496, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, 14, 2451},
+ {27340, R_00B170_SPI_SHADER_USER_DATA_VS_16},
+ {27367, R_00B174_SPI_SHADER_USER_DATA_VS_17},
+ {27394, R_00B178_SPI_SHADER_USER_DATA_VS_18},
+ {27421, R_00B17C_SPI_SHADER_USER_DATA_VS_19},
+ {27448, R_00B180_SPI_SHADER_USER_DATA_VS_20},
+ {27475, R_00B184_SPI_SHADER_USER_DATA_VS_21},
+ {27502, R_00B188_SPI_SHADER_USER_DATA_VS_22},
+ {27529, R_00B18C_SPI_SHADER_USER_DATA_VS_23},
+ {27556, R_00B190_SPI_SHADER_USER_DATA_VS_24},
+ {27583, R_00B194_SPI_SHADER_USER_DATA_VS_25},
+ {27610, R_00B198_SPI_SHADER_USER_DATA_VS_26},
+ {27637, R_00B19C_SPI_SHADER_USER_DATA_VS_27},
+ {27664, R_00B1A0_SPI_SHADER_USER_DATA_VS_28},
+ {27691, R_00B1A4_SPI_SHADER_USER_DATA_VS_29},
+ {27718, R_00B1A8_SPI_SHADER_USER_DATA_VS_30},
+ {27745, R_00B1AC_SPI_SHADER_USER_DATA_VS_31},
+ {27772, R_00B1F0_SPI_SHADER_PGM_RSRC2_GS_VS, 9, 2465},
+ {27799, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 2, 2474},
+ {27823, R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS},
+ {27855, R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS},
+ {5670, R_00B210_SPI_SHADER_PGM_LO_ES},
+ {5691, R_00B214_SPI_SHADER_PGM_HI_ES, 1, 2476},
+ {5026, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 4, 2477},
+ {5092, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 12, 2481},
+ {5116, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, 9, 2493},
+ {27887, R_00B370_SPI_SHADER_USER_DATA_ES_16},
+ {27914, R_00B374_SPI_SHADER_USER_DATA_ES_17},
+ {27941, R_00B378_SPI_SHADER_USER_DATA_ES_18},
+ {27968, R_00B37C_SPI_SHADER_USER_DATA_ES_19},
+ {27995, R_00B380_SPI_SHADER_USER_DATA_ES_20},
+ {28022, R_00B384_SPI_SHADER_USER_DATA_ES_21},
+ {28049, R_00B388_SPI_SHADER_USER_DATA_ES_22},
+ {28076, R_00B38C_SPI_SHADER_USER_DATA_ES_23},
+ {28103, R_00B390_SPI_SHADER_USER_DATA_ES_24},
+ {28130, R_00B394_SPI_SHADER_USER_DATA_ES_25},
+ {28157, R_00B398_SPI_SHADER_USER_DATA_ES_26},
+ {28184, R_00B39C_SPI_SHADER_USER_DATA_ES_27},
+ {28211, R_00B3A0_SPI_SHADER_USER_DATA_ES_28},
+ {28238, R_00B3A4_SPI_SHADER_USER_DATA_ES_29},
+ {28265, R_00B3A8_SPI_SHADER_USER_DATA_ES_30},
+ {28292, R_00B3AC_SPI_SHADER_USER_DATA_ES_31},
+ {28319, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 1, 2502},
+ {28343, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS},
+ {28375, R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS},
+ {6910, R_00B410_SPI_SHADER_PGM_LO_LS},
+ {6931, R_00B414_SPI_SHADER_PGM_HI_LS, 1, 2503},
+ {6266, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 4, 2504},
+ {6332, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 11, 2508},
+ {6356, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, 7, 2519},
+ {7000, R_00B430_SPI_SHADER_USER_DATA_LS_0},
+ {7026, R_00B434_SPI_SHADER_USER_DATA_LS_1},
+ {7052, R_00B438_SPI_SHADER_USER_DATA_LS_2},
+ {7078, R_00B43C_SPI_SHADER_USER_DATA_LS_3},
+ {7104, R_00B440_SPI_SHADER_USER_DATA_LS_4},
+ {7130, R_00B444_SPI_SHADER_USER_DATA_LS_5},
+ {7156, R_00B448_SPI_SHADER_USER_DATA_LS_6},
+ {7182, R_00B44C_SPI_SHADER_USER_DATA_LS_7},
+ {7208, R_00B450_SPI_SHADER_USER_DATA_LS_8},
+ {7234, R_00B454_SPI_SHADER_USER_DATA_LS_9},
+ {7260, R_00B458_SPI_SHADER_USER_DATA_LS_10},
+ {7287, R_00B45C_SPI_SHADER_USER_DATA_LS_11},
+ {7314, R_00B460_SPI_SHADER_USER_DATA_LS_12},
+ {7341, R_00B464_SPI_SHADER_USER_DATA_LS_13},
+ {7368, R_00B468_SPI_SHADER_USER_DATA_LS_14},
+ {7395, R_00B46C_SPI_SHADER_USER_DATA_LS_15},
+ {28407, R_00B470_SPI_SHADER_USER_DATA_LS_16},
+ {28434, R_00B474_SPI_SHADER_USER_DATA_LS_17},
+ {28461, R_00B478_SPI_SHADER_USER_DATA_LS_18},
+ {28488, R_00B47C_SPI_SHADER_USER_DATA_LS_19},
+ {28515, R_00B480_SPI_SHADER_USER_DATA_LS_20},
+ {28542, R_00B484_SPI_SHADER_USER_DATA_LS_21},
+ {28569, R_00B488_SPI_SHADER_USER_DATA_LS_22},
+ {28596, R_00B48C_SPI_SHADER_USER_DATA_LS_23},
+ {28623, R_00B490_SPI_SHADER_USER_DATA_LS_24},
+ {28650, R_00B494_SPI_SHADER_USER_DATA_LS_25},
+ {28677, R_00B498_SPI_SHADER_USER_DATA_LS_26},
+ {28704, R_00B49C_SPI_SHADER_USER_DATA_LS_27},
+ {28731, R_00B4A0_SPI_SHADER_USER_DATA_LS_28},
+ {28758, R_00B4A4_SPI_SHADER_USER_DATA_LS_29},
+ {28785, R_00B4A8_SPI_SHADER_USER_DATA_LS_30},
+ {28812, R_00B4AC_SPI_SHADER_USER_DATA_LS_31},
+ {28839, R_00B530_SPI_SHADER_USER_DATA_COMMON_0},
+ {28869, R_00B534_SPI_SHADER_USER_DATA_COMMON_1},
+ {28899, R_00B538_SPI_SHADER_USER_DATA_COMMON_2},
+ {28929, R_00B53C_SPI_SHADER_USER_DATA_COMMON_3},
+ {28959, R_00B540_SPI_SHADER_USER_DATA_COMMON_4},
+ {28989, R_00B544_SPI_SHADER_USER_DATA_COMMON_5},
+ {29019, R_00B548_SPI_SHADER_USER_DATA_COMMON_6},
+ {29049, R_00B54C_SPI_SHADER_USER_DATA_COMMON_7},
+ {29079, R_00B550_SPI_SHADER_USER_DATA_COMMON_8},
+ {29109, R_00B554_SPI_SHADER_USER_DATA_COMMON_9},
+ {29139, R_00B558_SPI_SHADER_USER_DATA_COMMON_10},
+ {29170, R_00B55C_SPI_SHADER_USER_DATA_COMMON_11},
+ {29201, R_00B560_SPI_SHADER_USER_DATA_COMMON_12},
+ {29232, R_00B564_SPI_SHADER_USER_DATA_COMMON_13},
+ {29263, R_00B568_SPI_SHADER_USER_DATA_COMMON_14},
+ {29294, R_00B56C_SPI_SHADER_USER_DATA_COMMON_15},
+ {29325, R_00B570_SPI_SHADER_USER_DATA_COMMON_16},
+ {29356, R_00B574_SPI_SHADER_USER_DATA_COMMON_17},
+ {29387, R_00B578_SPI_SHADER_USER_DATA_COMMON_18},
+ {29418, R_00B57C_SPI_SHADER_USER_DATA_COMMON_19},
+ {29449, R_00B580_SPI_SHADER_USER_DATA_COMMON_20},
+ {29480, R_00B584_SPI_SHADER_USER_DATA_COMMON_21},
+ {29511, R_00B588_SPI_SHADER_USER_DATA_COMMON_22},
+ {29542, R_00B58C_SPI_SHADER_USER_DATA_COMMON_23},
+ {29573, R_00B590_SPI_SHADER_USER_DATA_COMMON_24},
+ {29604, R_00B594_SPI_SHADER_USER_DATA_COMMON_25},
+ {29635, R_00B598_SPI_SHADER_USER_DATA_COMMON_26},
+ {29666, R_00B59C_SPI_SHADER_USER_DATA_COMMON_27},
+ {29697, R_00B5A0_SPI_SHADER_USER_DATA_COMMON_28},
+ {29728, R_00B5A4_SPI_SHADER_USER_DATA_COMMON_29},
+ {29759, R_00B5A8_SPI_SHADER_USER_DATA_COMMON_30},
+ {29790, R_00B5AC_SPI_SHADER_USER_DATA_COMMON_31},
+ {7422, R_00B800_COMPUTE_DISPATCH_INITIATOR, 11, 2526},
+ {7690, R_00B834_COMPUTE_PGM_HI, 1, 867},
+ {29821, R_00B838_COMPUTE_DISPATCH_PKT_ADDR_LO},
+ {29850, R_00B83C_COMPUTE_DISPATCH_PKT_ADDR_HI, 1, 869},
+ {29879, R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO},
+ {29912, R_00B844_COMPUTE_DISPATCH_SCRATCH_BASE_HI, 1, 870},
+ {7765, R_00B848_COMPUTE_PGM_RSRC1, 11, 2537},
+ {7783, R_00B84C_COMPUTE_PGM_RSRC2, 12, 2548},
+ {7814, R_00B854_COMPUTE_RESOURCE_LIMITS, 7, 2560},
+ {29945, R_034030_CPF_LATENCY_STATS_DATA},
+ {29968, R_034034_CPG_LATENCY_STATS_DATA},
+ {29991, R_034038_CPC_LATENCY_STATS_DATA},
+ {12244, R_036000_CPG_PERFCOUNTER1_SELECT, 5, 2567},
+ {12268, R_036004_CPG_PERFCOUNTER0_SELECT1, 4, 2572},
+ {12293, R_036008_CPG_PERFCOUNTER0_SELECT, 5, 2567},
+ {12317, R_03600C_CPC_PERFCOUNTER1_SELECT, 5, 2576},
+ {12341, R_036010_CPC_PERFCOUNTER0_SELECT1, 4, 2581},
+ {12366, R_036014_CPF_PERFCOUNTER1_SELECT, 5, 2585},
+ {12390, R_036018_CPF_PERFCOUNTER0_SELECT1, 4, 2590},
+ {12415, R_03601C_CPF_PERFCOUNTER0_SELECT, 5, 2585},
+ {12455, R_036024_CPC_PERFCOUNTER0_SELECT, 5, 2576},
+ {30014, R_036028_CPF_TC_PERF_COUNTER_WINDOW_SELECT, 3, 2594},
+ {30048, R_03602C_CPG_TC_PERF_COUNTER_WINDOW_SELECT, 3, 2597},
+ {30082, R_036030_CPF_LATENCY_STATS_SELECT, 3, 2600},
+ {30107, R_036034_CPG_LATENCY_STATS_SELECT, 3, 2603},
+ {30132, R_036038_CPC_LATENCY_STATS_SELECT, 3, 2606},
+ {12479, R_036100_GRBM_PERFCOUNTER0_SELECT, 22, 2609},
+ {12504, R_036104_GRBM_PERFCOUNTER1_SELECT, 22, 2609},
+ {12529, R_036108_GRBM_SE0_PERFCOUNTER_SELECT, 13, 2631},
+ {12557, R_03610C_GRBM_SE1_PERFCOUNTER_SELECT, 13, 2644},
+ {12585, R_036110_GRBM_SE2_PERFCOUNTER_SELECT, 13, 2657},
+ {12613, R_036114_GRBM_SE3_PERFCOUNTER_SELECT, 13, 2670},
+ {13414, R_036600_SPI_PERFCOUNTER0_SELECT, 5, 2683},
+ {13438, R_036604_SPI_PERFCOUNTER1_SELECT, 5, 2683},
+ {13462, R_036608_SPI_PERFCOUNTER2_SELECT, 5, 2683},
+ {13486, R_03660C_SPI_PERFCOUNTER3_SELECT, 5, 2683},
+ {13510, R_036610_SPI_PERFCOUNTER0_SELECT1, 4, 2688},
+ {13610, R_036620_SPI_PERFCOUNTER4_SELECT, 5, 2683},
+ {13634, R_036624_SPI_PERFCOUNTER5_SELECT, 5, 2683},
+ {15266, R_028008_DB_DEPTH_VIEW, 5, 2692},
+ {15299, R_028010_DB_RENDER_OVERRIDE2, 16, 2697},
+ {30157, R_028018_DB_HTILE_DATA_BASE_HI, 1, 2713},
+ {15572, R_02801C_DB_DEPTH_SIZE, 2, 2714},
+ {15472, R_028038_DB_Z_INFO, 13, 2716},
+ {15482, R_02803C_DB_STENCIL_INFO, 8, 2729},
+ {15498, R_028040_DB_Z_READ_BASE},
+ {30179, R_028044_DB_Z_READ_BASE_HI, 1, 2737},
+ {15513, R_028048_DB_STENCIL_READ_BASE},
+ {30197, R_02804C_DB_STENCIL_READ_BASE_HI, 1, 2738},
+ {30221, R_028054_DB_Z_WRITE_BASE_HI, 1, 2739},
+ {15550, R_028058_DB_STENCIL_WRITE_BASE},
+ {30240, R_02805C_DB_STENCIL_WRITE_BASE_HI, 1, 2740},
+ {30265, R_028060_DB_DFSM_CONTROL, 3, 2741},
+ {30281, R_028064_DB_RENDER_FILTER, 1, 2744},
+ {30298, R_028068_DB_Z_INFO2, 1, 2745},
+ {30309, R_02806C_DB_STENCIL_INFO2, 1, 2746},
+ {15636, R_0281E8_COHER_DEST_BASE_HI_0, 1, 2747},
+ {15657, R_0281EC_COHER_DEST_BASE_HI_1, 1, 2748},
+ {15678, R_0281F0_COHER_DEST_BASE_HI_2, 1, 2749},
+ {15699, R_0281F4_COHER_DEST_BASE_HI_3, 1, 2750},
+ {17596, R_028350_PA_SC_RASTER_CONFIG, 15, 2751},
+ {17616, R_028354_PA_SC_RASTER_CONFIG_1, 3, 2766},
+ {30326, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 4, 2769},
+ {30355, R_028360_CP_PERFMON_CNTX_CNTL, 1, 2773},
+ {30376, R_0283A0_PA_SC_RIGHT_VERT_GRID, 4, 2774},
+ {30398, R_0283A4_PA_SC_LEFT_VERT_GRID, 4, 2778},
+ {30419, R_0283A8_PA_SC_HORIZ_GRID, 4, 2782},
+ {30436, R_0283AC_PA_SC_FOV_WINDOW_LR, 4, 2786},
+ {30456, R_0283B0_PA_SC_FOV_WINDOW_TB, 2, 2790},
+ {20961, R_0286C4_SPI_VS_OUT_CONFIG, 2, 1344},
+ {21035, R_0286D8_SPI_PS_IN_CONTROL, 5, 2792},
+ {30476, R_0287A0_CB_MRT0_EPITCH, 1, 2797},
+ {30491, R_0287A4_CB_MRT1_EPITCH, 1, 2798},
+ {30506, R_0287A8_CB_MRT2_EPITCH, 1, 2799},
+ {30521, R_0287AC_CB_MRT3_EPITCH, 1, 2800},
+ {30536, R_0287B0_CB_MRT4_EPITCH, 1, 2801},
+ {30551, R_0287B4_CB_MRT5_EPITCH, 1, 2802},
+ {30566, R_0287B8_CB_MRT6_EPITCH, 1, 2803},
+ {30581, R_0287BC_CB_MRT7_EPITCH, 1, 2804},
+ {21617, R_0287E4_VGT_DMA_BASE_HI, 1, 2805},
+ {21646, R_0287F0_VGT_DRAW_INITIATOR, 8, 2806},
+ {21744, R_02880C_DB_SHADER_CONTROL, 16, 2814},
+ {21778, R_028814_PA_SU_SC_MODE_CNTL, 15, 2830},
+ {21797, R_028818_PA_CL_VTE_CNTL, 10, 2845},
+ {21812, R_02881C_PA_CL_VS_OUT_CNTL, 28, 2855},
+ {21920, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 6, 2883},
+ {30596, R_028834_PA_CL_OBJPRIM_ID_CNTL, 3, 2889},
+ {30618, R_028838_PA_CL_NGG_CNTL, 2, 2892},
+ {30633, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 5, 2894},
+ {22272, R_028A40_VGT_GS_MODE, 15, 2899},
+ {22284, R_028A44_VGT_GS_ONCHIP_CNTL, 3, 2914},
+ {22303, R_028A48_PA_SC_MODE_CNTL_0, 7, 2917},
+ {22524, R_028A7C_VGT_DMA_INDEX_TYPE, 7, 2924},
+ {22554, R_028A84_VGT_PRIMITIVEID_EN, 3, 2931},
+ {22617, R_028A90_VGT_EVENT_INITIATOR, 3, 2934},
+ {30663, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, 1, 2937},
+ {30693, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 4, 2938},
+ {30715, R_028A9C_VGT_INDEX_PAYLOAD_CNTL, 1, 2942},
+ {22808, R_028ABC_DB_HTILE_SURFACE, 8, 2943},
+ {23368, R_028B54_VGT_SHADER_STAGES_EN, 13, 2951},
+ {23496, R_028B6C_VGT_TF_PARAM, 8, 2964},
+ {23744, R_028B94_VGT_STRMOUT_CONFIG, 8, 2972},
+ {30738, R_028B9C_VGT_DMA_EVENT_INITIATOR, 3, 2980},
+ {23857, R_028BE0_PA_SC_AA_CONFIG, 6, 2983},
+ {24572, R_028C40_PA_SC_SHADER_CONTROL, 3, 2989},
+ {30762, R_028C44_PA_SC_BINNER_CNTL_0, 10, 2992},
+ {30782, R_028C48_PA_SC_BINNER_CNTL_1, 2, 3002},
+ {30802, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 18, 3004},
+ {30840, R_028C50_PA_SC_NGG_MODE_CNTL, 1, 3022},
+ {30860, R_028C64_CB_COLOR0_BASE_EXT, 1, 3023},
+ {30879, R_028C68_CB_COLOR0_ATTRIB2, 3, 3024},
+ {24689, R_028C6C_CB_COLOR0_VIEW, 3, 3027},
+ {24704, R_028C70_CB_COLOR0_INFO, 16, 3030},
+ {24719, R_028C74_CB_COLOR0_ATTRIB, 10, 3046},
+ {30897, R_028C80_CB_COLOR0_CMASK_BASE_EXT, 1, 3056},
+ {30922, R_028C88_CB_COLOR0_FMASK_BASE_EXT, 1, 3057},
+ {30947, R_028C98_CB_COLOR0_DCC_BASE_EXT, 1, 3058},
+ {30970, R_028CA0_CB_COLOR1_BASE_EXT, 1, 3023},
+ {30989, R_028CA4_CB_COLOR1_ATTRIB2},
+ {24944, R_028CA8_CB_COLOR1_VIEW, 3, 3027},
+ {24959, R_028CAC_CB_COLOR1_INFO, 16, 3030},
+ {24974, R_028CB0_CB_COLOR1_ATTRIB, 10, 3046},
+ {31007, R_028CBC_CB_COLOR1_CMASK_BASE_EXT, 1, 3056},
+ {31032, R_028CC4_CB_COLOR1_FMASK_BASE_EXT, 1, 3057},
+ {31057, R_028CD4_CB_COLOR1_DCC_BASE_EXT, 1, 3058},
+ {31080, R_028CDC_CB_COLOR2_BASE_EXT, 1, 3023},
+ {31099, R_028CE0_CB_COLOR2_ATTRIB2},
+ {25199, R_028CE4_CB_COLOR2_VIEW, 3, 3027},
+ {25214, R_028CE8_CB_COLOR2_INFO, 16, 3030},
+ {25229, R_028CEC_CB_COLOR2_ATTRIB, 10, 3046},
+ {31117, R_028CF8_CB_COLOR2_CMASK_BASE_EXT, 1, 3056},
+ {31142, R_028D00_CB_COLOR2_FMASK_BASE_EXT, 1, 3057},
+ {31167, R_028D10_CB_COLOR2_DCC_BASE_EXT, 1, 3058},
+ {31190, R_028D18_CB_COLOR3_BASE_EXT, 1, 3023},
+ {31209, R_028D1C_CB_COLOR3_ATTRIB2},
+ {25454, R_028D20_CB_COLOR3_VIEW, 3, 3027},
+ {25469, R_028D24_CB_COLOR3_INFO, 16, 3030},
+ {25484, R_028D28_CB_COLOR3_ATTRIB, 10, 3046},
+ {31227, R_028D34_CB_COLOR3_CMASK_BASE_EXT, 1, 3056},
+ {31252, R_028D3C_CB_COLOR3_FMASK_BASE_EXT, 1, 3057},
+ {31277, R_028D4C_CB_COLOR3_DCC_BASE_EXT, 1, 3058},
+ {31300, R_028D54_CB_COLOR4_BASE_EXT, 1, 3023},
+ {31319, R_028D58_CB_COLOR4_ATTRIB2},
+ {25709, R_028D5C_CB_COLOR4_VIEW, 3, 3027},
+ {25724, R_028D60_CB_COLOR4_INFO, 16, 3030},
+ {25739, R_028D64_CB_COLOR4_ATTRIB, 10, 3046},
+ {31337, R_028D70_CB_COLOR4_CMASK_BASE_EXT, 1, 3056},
+ {31362, R_028D78_CB_COLOR4_FMASK_BASE_EXT, 1, 3057},
+ {31387, R_028D88_CB_COLOR4_DCC_BASE_EXT, 1, 3058},
+ {31410, R_028D90_CB_COLOR5_BASE_EXT, 1, 3023},
+ {31429, R_028D94_CB_COLOR5_ATTRIB2},
+ {25964, R_028D98_CB_COLOR5_VIEW, 3, 3027},
+ {25979, R_028D9C_CB_COLOR5_INFO, 16, 3030},
+ {25994, R_028DA0_CB_COLOR5_ATTRIB, 10, 3046},
+ {31447, R_028DAC_CB_COLOR5_CMASK_BASE_EXT, 1, 3056},
+ {31472, R_028DB4_CB_COLOR5_FMASK_BASE_EXT, 1, 3057},
+ {31497, R_028DC4_CB_COLOR5_DCC_BASE_EXT, 1, 3058},
+ {31520, R_028DCC_CB_COLOR6_BASE_EXT, 1, 3023},
+ {31539, R_028DD0_CB_COLOR6_ATTRIB2},
+ {26219, R_028DD4_CB_COLOR6_VIEW, 3, 3027},
+ {26234, R_028DD8_CB_COLOR6_INFO, 16, 3030},
+ {26249, R_028DDC_CB_COLOR6_ATTRIB, 10, 3046},
+ {31557, R_028DE8_CB_COLOR6_CMASK_BASE_EXT, 1, 3056},
+ {31582, R_028DF0_CB_COLOR6_FMASK_BASE_EXT, 1, 3057},
+ {31607, R_028E00_CB_COLOR6_DCC_BASE_EXT, 1, 3058},
+ {31630, R_028E08_CB_COLOR7_BASE_EXT, 1, 3023},
+ {31649, R_028E0C_CB_COLOR7_ATTRIB2},
+ {26474, R_028E10_CB_COLOR7_VIEW, 3, 3027},
+ {26489, R_028E14_CB_COLOR7_INFO, 16, 3030},
+ {26504, R_028E18_CB_COLOR7_ATTRIB, 10, 3046},
+ {31667, R_028E24_CB_COLOR7_CMASK_BASE_EXT, 1, 3056},
+ {31692, R_028E2C_CB_COLOR7_FMASK_BASE_EXT, 1, 3057},
+ {31717, R_028E3C_CB_COLOR7_DCC_BASE_EXT, 1, 3058},
+};
+
static const struct si_field sid_fields_table[] = {
/* 0 */
- {827, S_2C3_COUNT_INDIRECT_ENABLE(~0u)},
- {849, S_2C3_DRAW_INDEX_ENABLE(~0u)},
+ {31740, S_2C3_COUNT_INDIRECT_ENABLE(~0u)},
+ {31762, S_2C3_DRAW_INDEX_ENABLE(~0u)},
/* 2 */
- {877, S_370_ENGINE_SEL(~0u), 4, 0},
- {888, S_370_WR_CONFIRM(~0u)},
- {899, S_370_WR_ONE_ADDR(~0u)},
- {972, S_370_DST_SEL(~0u), 6, 4},
+ {31784, S_370_ENGINE_SEL(~0u), 4, 0},
+ {31795, S_370_WR_CONFIRM(~0u)},
+ {31806, S_370_WR_ONE_ADDR(~0u)},
+ {31870, S_370_DST_SEL(~0u), 6, 4},
/* 6 */
- {980, S_3F2_IB_SIZE(~0u)},
- {988, S_3F2_CHAIN(~0u)},
- {994, S_3F2_VALID(~0u)},
+ {31878, S_3F2_IB_SIZE(~0u)},
+ {31886, S_3F2_CHAIN(~0u)},
+ {31892, S_3F2_VALID(~0u)},
/* 9 */
- {1000, S_410_SRC_ADDR_LO(~0u)},
+ {963, S_410_SRC_ADDR_LO(~0u)},
/* 10 */
- {1012, S_411_CP_SYNC(~0u)},
- {1044, S_411_SRC_SEL(~0u), 4, 10},
- {1052, S_411_ENGINE(~0u), 2, 0},
- {1091, S_411_DSL_SEL(~0u), 4, 14},
- {1099, S_411_SRC_ADDR_HI(~0u)},
+ {31898, S_411_CP_SYNC(~0u)},
+ {31930, S_411_SRC_SEL(~0u), 4, 10},
+ {31938, S_411_ENGINE(~0u), 2, 0},
+ {31969, S_411_DSL_SEL(~0u), 4, 14},
+ {975, S_411_SRC_ADDR_HI(~0u)},
/* 15 */
- {1111, S_412_DST_ADDR_LO(~0u)},
+ {842, S_412_DST_ADDR_LO(~0u)},
/* 16 */
- {1123, S_413_DST_ADDR_HI(~0u)},
+ {854, S_413_DST_ADDR_HI(~0u)},
/* 17 */
- {1135, S_414_BYTE_COUNT_GFX6(~0u)},
- {1151, S_414_BYTE_COUNT_GFX9(~0u)},
- {1167, S_414_DISABLE_WR_CONFIRM_GFX6(~0u)},
- {1220, S_414_SRC_SWAP(~0u), 4, 18},
- {1229, S_414_DST_SWAP(~0u), 4, 18},
- {1245, S_414_SAS(~0u), 2, 22},
- {1249, S_414_DAS(~0u), 2, 22},
- {1276, S_414_SAIC(~0u), 2, 24},
- {1281, S_414_DAIC(~0u), 2, 24},
- {1286, S_414_RAW_WAIT(~0u)},
- {1295, S_414_DISABLE_WR_CONFIRM_GFX9(~0u)},
+ {31977, S_414_BYTE_COUNT_GFX6(~0u)},
+ {31993, S_414_BYTE_COUNT_GFX9(~0u)},
+ {32009, S_414_DISABLE_WR_CONFIRM_GFX6(~0u)},
+ {32062, S_414_SRC_SWAP(~0u), 4, 18},
+ {32071, S_414_DST_SWAP(~0u), 4, 18},
+ {32087, S_414_SAS(~0u), 2, 22},
+ {32091, S_414_DAS(~0u), 2, 22},
+ {32118, S_414_SAIC(~0u), 2, 24},
+ {32123, S_414_DAIC(~0u), 2, 24},
+ {32128, S_414_RAW_WAIT(~0u)},
+ {32137, S_414_DISABLE_WR_CONFIRM_GFX9(~0u)},
/* 28 */
- {1012, S_500_CP_SYNC(~0u)},
- {1044, S_500_SRC_SEL(~0u), 4, 10},
- {1091, S_500_DSL_SEL(~0u), 4, 14},
- {1052, S_500_ENGINE(~0u), 2, 0},
+ {31898, S_500_CP_SYNC(~0u)},
+ {31930, S_500_SRC_SEL(~0u), 4, 10},
+ {31969, S_500_DSL_SEL(~0u), 4, 14},
+ {31938, S_500_ENGINE(~0u), 2, 0},
/* 32 */
- {1319, S_000E4C_SDMA_RQ_PENDING(~0u)},
- {1335, S_000E4C_TST_RQ_PENDING(~0u)},
- {1350, S_000E4C_SDMA1_RQ_PENDING(~0u)},
- {1367, S_000E4C_VCE0_RQ_PENDING(~0u)},
- {1383, S_000E4C_VP8_BUSY(~0u)},
- {1392, S_000E4C_SDMA_BUSY(~0u)},
- {1402, S_000E4C_SDMA1_BUSY(~0u)},
- {1413, S_000E4C_VCE0_BUSY(~0u)},
- {1423, S_000E4C_XDMA_BUSY(~0u)},
- {1433, S_000E4C_CHUB_BUSY(~0u)},
- {1443, S_000E4C_SDMA2_BUSY(~0u)},
- {1454, S_000E4C_SDMA3_BUSY(~0u)},
- {1465, S_000E4C_SAMSCP_BUSY(~0u)},
- {1477, S_000E4C_ISP_BUSY(~0u)},
- {1486, S_000E4C_VCE1_BUSY(~0u)},
- {1496, S_000E4C_ODE_BUSY(~0u)},
- {1505, S_000E4C_SDMA2_RQ_PENDING(~0u)},
- {1522, S_000E4C_SDMA3_RQ_PENDING(~0u)},
- {1539, S_000E4C_SAMSCP_RQ_PENDING(~0u)},
- {1557, S_000E4C_ISP_RQ_PENDING(~0u)},
- {1572, S_000E4C_VCE1_RQ_PENDING(~0u)},
+ {32161, S_000E4C_SDMA_RQ_PENDING(~0u)},
+ {32177, S_000E4C_TST_RQ_PENDING(~0u)},
+ {32192, S_000E4C_SDMA1_RQ_PENDING(~0u)},
+ {32209, S_000E4C_VCE0_RQ_PENDING(~0u)},
+ {32225, S_000E4C_VP8_BUSY(~0u)},
+ {32234, S_000E4C_SDMA_BUSY(~0u)},
+ {32244, S_000E4C_SDMA1_BUSY(~0u)},
+ {32255, S_000E4C_VCE0_BUSY(~0u)},
+ {32265, S_000E4C_XDMA_BUSY(~0u)},
+ {32275, S_000E4C_CHUB_BUSY(~0u)},
+ {32285, S_000E4C_SDMA2_BUSY(~0u)},
+ {32296, S_000E4C_SDMA3_BUSY(~0u)},
+ {32307, S_000E4C_SAMSCP_BUSY(~0u)},
+ {32319, S_000E4C_ISP_BUSY(~0u)},
+ {32328, S_000E4C_VCE1_BUSY(~0u)},
+ {32338, S_000E4C_ODE_BUSY(~0u)},
+ {32347, S_000E4C_SDMA2_RQ_PENDING(~0u)},
+ {32364, S_000E4C_SDMA3_RQ_PENDING(~0u)},
+ {32381, S_000E4C_SAMSCP_RQ_PENDING(~0u)},
+ {32399, S_000E4C_ISP_RQ_PENDING(~0u)},
+ {32414, S_000E4C_VCE1_RQ_PENDING(~0u)},
/* 53 */
- {1588, S_000E50_UVD_RQ_PENDING(~0u)},
- {1603, S_000E50_SAMMSP_RQ_PENDING(~0u)},
- {1621, S_000E50_ACP_RQ_PENDING(~0u)},
- {1636, S_000E50_SMU_RQ_PENDING(~0u)},
- {1651, S_000E50_GRBM_RQ_PENDING(~0u)},
- {1667, S_000E50_HI_RQ_PENDING(~0u)},
- {1681, S_000E50_VMC_BUSY(~0u)},
- {1690, S_000E50_MCB_BUSY(~0u)},
- {1699, S_000E50_MCB_NON_DISPLAY_BUSY(~0u)},
- {1720, S_000E50_MCC_BUSY(~0u)},
- {1729, S_000E50_MCD_BUSY(~0u)},
- {1738, S_000E50_VMC1_BUSY(~0u)},
- {1748, S_000E50_SEM_BUSY(~0u)},
- {1757, S_000E50_ACP_BUSY(~0u)},
- {1766, S_000E50_IH_BUSY(~0u)},
- {1774, S_000E50_UVD_BUSY(~0u)},
- {1783, S_000E50_SAMMSP_BUSY(~0u)},
- {1795, S_000E50_GCATCL2_BUSY(~0u)},
- {1808, S_000E50_OSATCL2_BUSY(~0u)},
- {1821, S_000E50_BIF_BUSY(~0u)},
+ {32430, S_000E50_UVD_RQ_PENDING(~0u)},
+ {32445, S_000E50_SAMMSP_RQ_PENDING(~0u)},
+ {32463, S_000E50_ACP_RQ_PENDING(~0u)},
+ {32478, S_000E50_SMU_RQ_PENDING(~0u)},
+ {32493, S_000E50_GRBM_RQ_PENDING(~0u)},
+ {32509, S_000E50_HI_RQ_PENDING(~0u)},
+ {32523, S_000E50_VMC_BUSY(~0u)},
+ {32532, S_000E50_MCB_BUSY(~0u)},
+ {32541, S_000E50_MCB_NON_DISPLAY_BUSY(~0u)},
+ {32562, S_000E50_MCC_BUSY(~0u)},
+ {32571, S_000E50_MCD_BUSY(~0u)},
+ {32580, S_000E50_VMC1_BUSY(~0u)},
+ {32590, S_000E50_SEM_BUSY(~0u)},
+ {32599, S_000E50_ACP_BUSY(~0u)},
+ {32608, S_000E50_IH_BUSY(~0u)},
+ {32616, S_000E50_UVD_BUSY(~0u)},
+ {32625, S_000E50_SAMMSP_BUSY(~0u)},
+ {32637, S_000E50_GCATCL2_BUSY(~0u)},
+ {32650, S_000E50_OSATCL2_BUSY(~0u)},
+ {32663, S_000E50_BIF_BUSY(~0u)},
/* 73 */
- {1830, S_000E54_MCC0_BUSY(~0u)},
- {1840, S_000E54_MCC1_BUSY(~0u)},
- {1850, S_000E54_MCC2_BUSY(~0u)},
- {1860, S_000E54_MCC3_BUSY(~0u)},
- {1870, S_000E54_MCC4_BUSY(~0u)},
- {1880, S_000E54_MCC5_BUSY(~0u)},
- {1890, S_000E54_MCC6_BUSY(~0u)},
- {1900, S_000E54_MCC7_BUSY(~0u)},
- {1910, S_000E54_MCD0_BUSY(~0u)},
- {1920, S_000E54_MCD1_BUSY(~0u)},
- {1930, S_000E54_MCD2_BUSY(~0u)},
- {1940, S_000E54_MCD3_BUSY(~0u)},
- {1950, S_000E54_MCD4_BUSY(~0u)},
- {1960, S_000E54_MCD5_BUSY(~0u)},
- {1970, S_000E54_MCD6_BUSY(~0u)},
- {1980, S_000E54_MCD7_BUSY(~0u)},
+ {32672, S_000E54_MCC0_BUSY(~0u)},
+ {32682, S_000E54_MCC1_BUSY(~0u)},
+ {32692, S_000E54_MCC2_BUSY(~0u)},
+ {32702, S_000E54_MCC3_BUSY(~0u)},
+ {32712, S_000E54_MCC4_BUSY(~0u)},
+ {32722, S_000E54_MCC5_BUSY(~0u)},
+ {32732, S_000E54_MCC6_BUSY(~0u)},
+ {32742, S_000E54_MCC7_BUSY(~0u)},
+ {32752, S_000E54_MCD0_BUSY(~0u)},
+ {32762, S_000E54_MCD1_BUSY(~0u)},
+ {32772, S_000E54_MCD2_BUSY(~0u)},
+ {32782, S_000E54_MCD3_BUSY(~0u)},
+ {32792, S_000E54_MCD4_BUSY(~0u)},
+ {32802, S_000E54_MCD5_BUSY(~0u)},
+ {32812, S_000E54_MCD6_BUSY(~0u)},
+ {32822, S_000E54_MCD7_BUSY(~0u)},
/* 89 */
- {1990, S_00D034_IDLE(~0u)},
- {1995, S_00D034_REG_IDLE(~0u)},
- {2004, S_00D034_RB_EMPTY(~0u)},
- {2013, S_00D034_RB_FULL(~0u)},
- {2021, S_00D034_RB_CMD_IDLE(~0u)},
- {2033, S_00D034_RB_CMD_FULL(~0u)},
- {2045, S_00D034_IB_CMD_IDLE(~0u)},
- {2057, S_00D034_IB_CMD_FULL(~0u)},
- {2069, S_00D034_BLOCK_IDLE(~0u)},
- {2080, S_00D034_INSIDE_IB(~0u)},
- {2090, S_00D034_EX_IDLE(~0u)},
- {2098, S_00D034_EX_IDLE_POLL_TIMER_EXPIRE(~0u)},
- {2124, S_00D034_PACKET_READY(~0u)},
- {2137, S_00D034_MC_WR_IDLE(~0u)},
- {2148, S_00D034_SRBM_IDLE(~0u)},
- {2158, S_00D034_CONTEXT_EMPTY(~0u)},
- {2172, S_00D034_DELTA_RPTR_FULL(~0u)},
- {2188, S_00D034_RB_MC_RREQ_IDLE(~0u)},
- {2204, S_00D034_IB_MC_RREQ_IDLE(~0u)},
- {2220, S_00D034_MC_RD_IDLE(~0u)},
- {2231, S_00D034_DELTA_RPTR_EMPTY(~0u)},
- {2248, S_00D034_MC_RD_RET_STALL(~0u)},
- {2264, S_00D034_MC_RD_NO_POLL_IDLE(~0u)},
- {2283, S_00D034_PREV_CMD_IDLE(~0u)},
- {2297, S_00D034_SEM_IDLE(~0u)},
- {2306, S_00D034_SEM_REQ_STALL(~0u)},
- {2320, S_00D034_SEM_RESP_STATE(~0u)},
- {2335, S_00D034_INT_IDLE(~0u)},
- {2344, S_00D034_INT_REQ_STALL(~0u)},
+ {32832, S_00D034_IDLE(~0u)},
+ {32837, S_00D034_REG_IDLE(~0u)},
+ {32846, S_00D034_RB_EMPTY(~0u)},
+ {32855, S_00D034_RB_FULL(~0u)},
+ {32863, S_00D034_RB_CMD_IDLE(~0u)},
+ {32875, S_00D034_RB_CMD_FULL(~0u)},
+ {32887, S_00D034_IB_CMD_IDLE(~0u)},
+ {32899, S_00D034_IB_CMD_FULL(~0u)},
+ {32911, S_00D034_BLOCK_IDLE(~0u)},
+ {32922, S_00D034_INSIDE_IB(~0u)},
+ {32932, S_00D034_EX_IDLE(~0u)},
+ {32940, S_00D034_EX_IDLE_POLL_TIMER_EXPIRE(~0u)},
+ {32966, S_00D034_PACKET_READY(~0u)},
+ {32979, S_00D034_MC_WR_IDLE(~0u)},
+ {32990, S_00D034_SRBM_IDLE(~0u)},
+ {33000, S_00D034_CONTEXT_EMPTY(~0u)},
+ {33014, S_00D034_DELTA_RPTR_FULL(~0u)},
+ {33030, S_00D034_RB_MC_RREQ_IDLE(~0u)},
+ {33046, S_00D034_IB_MC_RREQ_IDLE(~0u)},
+ {33062, S_00D034_MC_RD_IDLE(~0u)},
+ {33073, S_00D034_DELTA_RPTR_EMPTY(~0u)},
+ {33090, S_00D034_MC_RD_RET_STALL(~0u)},
+ {33106, S_00D034_MC_RD_NO_POLL_IDLE(~0u)},
+ {33125, S_00D034_PREV_CMD_IDLE(~0u)},
+ {33139, S_00D034_SEM_IDLE(~0u)},
+ {33148, S_00D034_SEM_REQ_STALL(~0u)},
+ {33162, S_00D034_SEM_RESP_STATE(~0u)},
+ {33177, S_00D034_INT_IDLE(~0u)},
+ {33186, S_00D034_INT_REQ_STALL(~0u)},
/* 118 */
- {2358, S_008008_ME0PIPE1_CMDFIFO_AVAIL(~0u)},
- {2381, S_008008_ME0PIPE1_CF_RQ_PENDING(~0u)},
- {2404, S_008008_ME0PIPE1_PF_RQ_PENDING(~0u)},
- {2427, S_008008_ME1PIPE0_RQ_PENDING(~0u)},
- {2447, S_008008_ME1PIPE1_RQ_PENDING(~0u)},
- {2467, S_008008_ME1PIPE2_RQ_PENDING(~0u)},
- {2487, S_008008_ME1PIPE3_RQ_PENDING(~0u)},
- {2507, S_008008_ME2PIPE0_RQ_PENDING(~0u)},
- {2527, S_008008_ME2PIPE1_RQ_PENDING(~0u)},
- {2547, S_008008_ME2PIPE2_RQ_PENDING(~0u)},
- {2567, S_008008_ME2PIPE3_RQ_PENDING(~0u)},
- {2587, S_008008_RLC_RQ_PENDING(~0u)},
- {2602, S_008008_RLC_BUSY(~0u)},
- {2611, S_008008_TC_BUSY(~0u)},
- {2619, S_008008_TCC_CC_RESIDENT(~0u)},
- {2635, S_008008_CPF_BUSY(~0u)},
- {2644, S_008008_CPC_BUSY(~0u)},
- {2653, S_008008_CPG_BUSY(~0u)},
- {2662, S_008008_UTCL2_BUSY(~0u)},
- {2673, S_008008_EA_BUSY(~0u)},
- {2681, S_008008_RMI_BUSY(~0u)},
- {2690, S_008008_UTCL2_RQ_PENDING(~0u)},
- {2707, S_008008_CPF_RQ_PENDING(~0u)},
- {2722, S_008008_EA_LINK_BUSY(~0u)},
- {2735, S_008008_CPAXI_BUSY(~0u)},
- /* 143 */
- {2746, S_008010_ME0PIPE0_CMDFIFO_AVAIL(~0u)},
- {2769, S_008010_SRBM_RQ_PENDING(~0u)},
- {2785, S_008010_ME0PIPE0_CF_RQ_PENDING(~0u)},
- {2808, S_008010_ME0PIPE0_PF_RQ_PENDING(~0u)},
- {2831, S_008010_GDS_DMA_RQ_PENDING(~0u)},
- {2850, S_008010_DB_CLEAN(~0u)},
- {2859, S_008010_CB_CLEAN(~0u)},
- {2868, S_008010_TA_BUSY(~0u)},
- {2876, S_008010_GDS_BUSY(~0u)},
- {2885, S_008010_WD_BUSY_NO_DMA(~0u)},
- {2900, S_008010_VGT_BUSY(~0u)},
- {2909, S_008010_IA_BUSY_NO_DMA(~0u)},
- {2924, S_008010_IA_BUSY(~0u)},
- {2932, S_008010_SX_BUSY(~0u)},
- {2940, S_008010_WD_BUSY(~0u)},
- {2948, S_008010_SPI_BUSY(~0u)},
- {2957, S_008010_BCI_BUSY(~0u)},
- {2966, S_008010_SC_BUSY(~0u)},
- {2974, S_008010_PA_BUSY(~0u)},
- {2982, S_008010_DB_BUSY(~0u)},
- {2990, S_008010_CP_COHERENCY_BUSY(~0u)},
- {1469, S_008010_CP_BUSY(~0u)},
- {1691, S_008010_CB_BUSY(~0u)},
- {3008, S_008010_GUI_ACTIVE(~0u)},
- {3019, S_008010_RSMU_RQ_PENDING(~0u)},
- /* 168 */
- {3035, S_0084FC_OFFSET_UPDATE_DONE(~0u)},
- /* 169 */
- {3054, S_0085F0_DEST_BASE_0_ENA(~0u)},
- {3070, S_0085F0_DEST_BASE_1_ENA(~0u)},
- {3086, S_0085F0_CB0_DEST_BASE_ENA(~0u)},
- {3104, S_0085F0_CB1_DEST_BASE_ENA(~0u)},
- {3122, S_0085F0_CB2_DEST_BASE_ENA(~0u)},
- {3140, S_0085F0_CB3_DEST_BASE_ENA(~0u)},
- {3158, S_0085F0_CB4_DEST_BASE_ENA(~0u)},
- {3176, S_0085F0_CB5_DEST_BASE_ENA(~0u)},
- {3194, S_0085F0_CB6_DEST_BASE_ENA(~0u)},
- {3212, S_0085F0_CB7_DEST_BASE_ENA(~0u)},
- {3230, S_0085F0_DB_DEST_BASE_ENA(~0u)},
- {3247, S_0085F0_DEST_BASE_2_ENA(~0u)},
- {3263, S_0085F0_DEST_BASE_3_ENA(~0u)},
- {3279, S_0085F0_TCL1_ACTION_ENA(~0u)},
- {3295, S_0085F0_TC_ACTION_ENA(~0u)},
- {3309, S_0085F0_CB_ACTION_ENA(~0u)},
- {3323, S_0085F0_DB_ACTION_ENA(~0u)},
- {3337, S_0085F0_SH_KCACHE_ACTION_ENA(~0u)},
- {3358, S_0085F0_SH_ICACHE_ACTION_ENA(~0u)},
- /* 188 */
- {2850, S_008014_DB_CLEAN(~0u)},
- {2859, S_008014_CB_CLEAN(~0u)},
- {2957, S_008014_BCI_BUSY(~0u)},
- {2900, S_008014_VGT_BUSY(~0u)},
- {2974, S_008014_PA_BUSY(~0u)},
- {2868, S_008014_TA_BUSY(~0u)},
- {2932, S_008014_SX_BUSY(~0u)},
- {2948, S_008014_SPI_BUSY(~0u)},
- {2966, S_008014_SC_BUSY(~0u)},
- {2982, S_008014_DB_BUSY(~0u)},
- {1691, S_008014_CB_BUSY(~0u)},
- {2681, S_008014_RMI_BUSY(~0u)},
- /* 200 */
- {2850, S_008018_DB_CLEAN(~0u)},
- {2859, S_008018_CB_CLEAN(~0u)},
- {2957, S_008018_BCI_BUSY(~0u)},
- {2900, S_008018_VGT_BUSY(~0u)},
- {2974, S_008018_PA_BUSY(~0u)},
- {2868, S_008018_TA_BUSY(~0u)},
- {2932, S_008018_SX_BUSY(~0u)},
- {2948, S_008018_SPI_BUSY(~0u)},
- {2966, S_008018_SC_BUSY(~0u)},
- {2982, S_008018_DB_BUSY(~0u)},
- {1691, S_008018_CB_BUSY(~0u)},
- {2681, S_008018_RMI_BUSY(~0u)},
- /* 212 */
- {2850, S_008038_DB_CLEAN(~0u)},
- {2859, S_008038_CB_CLEAN(~0u)},
- {2957, S_008038_BCI_BUSY(~0u)},
- {2900, S_008038_VGT_BUSY(~0u)},
- {2974, S_008038_PA_BUSY(~0u)},
- {2868, S_008038_TA_BUSY(~0u)},
- {2932, S_008038_SX_BUSY(~0u)},
- {2948, S_008038_SPI_BUSY(~0u)},
- {2966, S_008038_SC_BUSY(~0u)},
- {2982, S_008038_DB_BUSY(~0u)},
- {1691, S_008038_CB_BUSY(~0u)},
- {2681, S_008038_RMI_BUSY(~0u)},
+ {33200, S_008008_ME0PIPE1_CMDFIFO_AVAIL(~0u)},
+ {33223, S_008008_ME0PIPE1_CF_RQ_PENDING(~0u)},
+ {33246, S_008008_ME0PIPE1_PF_RQ_PENDING(~0u)},
+ {33269, S_008008_ME1PIPE0_RQ_PENDING(~0u)},
+ {33289, S_008008_ME1PIPE1_RQ_PENDING(~0u)},
+ {33309, S_008008_ME1PIPE2_RQ_PENDING(~0u)},
+ {33329, S_008008_ME1PIPE3_RQ_PENDING(~0u)},
+ {33349, S_008008_ME2PIPE0_RQ_PENDING(~0u)},
+ {33369, S_008008_ME2PIPE1_RQ_PENDING(~0u)},
+ {33389, S_008008_ME2PIPE2_RQ_PENDING(~0u)},
+ {33409, S_008008_ME2PIPE3_RQ_PENDING(~0u)},
+ {33429, S_008008_RLC_RQ_PENDING(~0u)},
+ {33444, S_008008_RLC_BUSY(~0u)},
+ {33453, S_008008_TC_BUSY(~0u)},
+ {33461, S_008008_TCC_CC_RESIDENT(~0u)},
+ {33477, S_008008_CPF_BUSY(~0u)},
+ {33486, S_008008_CPC_BUSY(~0u)},
+ {33495, S_008008_CPG_BUSY(~0u)},
+ /* 136 */
+ {33504, S_008010_ME0PIPE0_CMDFIFO_AVAIL(~0u)},
+ {33527, S_008010_SRBM_RQ_PENDING(~0u)},
+ {33543, S_008010_ME0PIPE0_CF_RQ_PENDING(~0u)},
+ {33566, S_008010_ME0PIPE0_PF_RQ_PENDING(~0u)},
+ {33589, S_008010_GDS_DMA_RQ_PENDING(~0u)},
+ {33608, S_008010_DB_CLEAN(~0u)},
+ {33617, S_008010_CB_CLEAN(~0u)},
+ {33626, S_008010_TA_BUSY(~0u)},
+ {33634, S_008010_GDS_BUSY(~0u)},
+ {33643, S_008010_WD_BUSY_NO_DMA(~0u)},
+ {33658, S_008010_VGT_BUSY(~0u)},
+ {33667, S_008010_IA_BUSY_NO_DMA(~0u)},
+ {33682, S_008010_IA_BUSY(~0u)},
+ {33690, S_008010_SX_BUSY(~0u)},
+ {33698, S_008010_WD_BUSY(~0u)},
+ {33706, S_008010_SPI_BUSY(~0u)},
+ {33715, S_008010_BCI_BUSY(~0u)},
+ {33724, S_008010_SC_BUSY(~0u)},
+ {33732, S_008010_PA_BUSY(~0u)},
+ {33740, S_008010_DB_BUSY(~0u)},
+ {33748, S_008010_CP_COHERENCY_BUSY(~0u)},
+ {32311, S_008010_CP_BUSY(~0u)},
+ {32533, S_008010_CB_BUSY(~0u)},
+ {33766, S_008010_GUI_ACTIVE(~0u)},
+ /* 160 */
+ {33777, S_0084FC_OFFSET_UPDATE_DONE(~0u)},
+ /* 161 */
+ {33796, S_0085F0_DEST_BASE_0_ENA(~0u)},
+ {33812, S_0085F0_DEST_BASE_1_ENA(~0u)},
+ {33828, S_0085F0_CB0_DEST_BASE_ENA(~0u)},
+ {33846, S_0085F0_CB1_DEST_BASE_ENA(~0u)},
+ {33864, S_0085F0_CB2_DEST_BASE_ENA(~0u)},
+ {33882, S_0085F0_CB3_DEST_BASE_ENA(~0u)},
+ {33900, S_0085F0_CB4_DEST_BASE_ENA(~0u)},
+ {33918, S_0085F0_CB5_DEST_BASE_ENA(~0u)},
+ {33936, S_0085F0_CB6_DEST_BASE_ENA(~0u)},
+ {33954, S_0085F0_CB7_DEST_BASE_ENA(~0u)},
+ {33972, S_0085F0_DB_DEST_BASE_ENA(~0u)},
+ {33989, S_0085F0_DEST_BASE_2_ENA(~0u)},
+ {34005, S_0085F0_DEST_BASE_3_ENA(~0u)},
+ {34021, S_0085F0_TCL1_ACTION_ENA(~0u)},
+ {34037, S_0085F0_TC_ACTION_ENA(~0u)},
+ {34051, S_0085F0_CB_ACTION_ENA(~0u)},
+ {34065, S_0085F0_DB_ACTION_ENA(~0u)},
+ {34079, S_0085F0_SH_KCACHE_ACTION_ENA(~0u)},
+ {34100, S_0085F0_SH_ICACHE_ACTION_ENA(~0u)},
+ /* 180 */
+ {33608, S_008014_DB_CLEAN(~0u)},
+ {33617, S_008014_CB_CLEAN(~0u)},
+ {33715, S_008014_BCI_BUSY(~0u)},
+ {33658, S_008014_VGT_BUSY(~0u)},
+ {33732, S_008014_PA_BUSY(~0u)},
+ {33626, S_008014_TA_BUSY(~0u)},
+ {33690, S_008014_SX_BUSY(~0u)},
+ {33706, S_008014_SPI_BUSY(~0u)},
+ {33724, S_008014_SC_BUSY(~0u)},
+ {33740, S_008014_DB_BUSY(~0u)},
+ {32533, S_008014_CB_BUSY(~0u)},
+ /* 191 */
+ {33608, S_008018_DB_CLEAN(~0u)},
+ {33617, S_008018_CB_CLEAN(~0u)},
+ {33715, S_008018_BCI_BUSY(~0u)},
+ {33658, S_008018_VGT_BUSY(~0u)},
+ {33732, S_008018_PA_BUSY(~0u)},
+ {33626, S_008018_TA_BUSY(~0u)},
+ {33690, S_008018_SX_BUSY(~0u)},
+ {33706, S_008018_SPI_BUSY(~0u)},
+ {33724, S_008018_SC_BUSY(~0u)},
+ {33740, S_008018_DB_BUSY(~0u)},
+ {32533, S_008018_CB_BUSY(~0u)},
+ /* 202 */
+ {33608, S_008038_DB_CLEAN(~0u)},
+ {33617, S_008038_CB_CLEAN(~0u)},
+ {33715, S_008038_BCI_BUSY(~0u)},
+ {33658, S_008038_VGT_BUSY(~0u)},
+ {33732, S_008038_PA_BUSY(~0u)},
+ {33626, S_008038_TA_BUSY(~0u)},
+ {33690, S_008038_SX_BUSY(~0u)},
+ {33706, S_008038_SPI_BUSY(~0u)},
+ {33724, S_008038_SC_BUSY(~0u)},
+ {33740, S_008038_DB_BUSY(~0u)},
+ {32533, S_008038_CB_BUSY(~0u)},
+ /* 213 */
+ {33608, S_00803C_DB_CLEAN(~0u)},
+ {33617, S_00803C_CB_CLEAN(~0u)},
+ {33715, S_00803C_BCI_BUSY(~0u)},
+ {33658, S_00803C_VGT_BUSY(~0u)},
+ {33732, S_00803C_PA_BUSY(~0u)},
+ {33626, S_00803C_TA_BUSY(~0u)},
+ {33690, S_00803C_SX_BUSY(~0u)},
+ {33706, S_00803C_SPI_BUSY(~0u)},
+ {33724, S_00803C_SC_BUSY(~0u)},
+ {33740, S_00803C_DB_BUSY(~0u)},
+ {32533, S_00803C_CB_BUSY(~0u)},
/* 224 */
- {2850, S_00803C_DB_CLEAN(~0u)},
- {2859, S_00803C_CB_CLEAN(~0u)},
- {2957, S_00803C_BCI_BUSY(~0u)},
- {2900, S_00803C_VGT_BUSY(~0u)},
- {2974, S_00803C_PA_BUSY(~0u)},
- {2868, S_00803C_TA_BUSY(~0u)},
- {2932, S_00803C_SX_BUSY(~0u)},
- {2948, S_00803C_SPI_BUSY(~0u)},
- {2966, S_00803C_SC_BUSY(~0u)},
- {2982, S_00803C_DB_BUSY(~0u)},
- {1691, S_00803C_CB_BUSY(~0u)},
- {2681, S_00803C_RMI_BUSY(~0u)},
- /* 236 */
- {3035, S_0300FC_OFFSET_UPDATE_DONE(~0u)},
- /* 237 */
- {3379, S_0301E4_COHER_BASE_HI_256B(~0u)},
- /* 238 */
- {3398, S_0301EC_START_DELAY_COUNT(~0u)},
- /* 239 */
- {3054, S_0301F0_DEST_BASE_0_ENA(~0u)},
- {3070, S_0301F0_DEST_BASE_1_ENA(~0u)},
- {3416, S_0301F0_TC_SD_ACTION_ENA(~0u)},
- {3433, S_0301F0_TC_NC_ACTION_ENA(~0u)},
- {3086, S_0301F0_CB0_DEST_BASE_ENA(~0u)},
- {3104, S_0301F0_CB1_DEST_BASE_ENA(~0u)},
- {3122, S_0301F0_CB2_DEST_BASE_ENA(~0u)},
- {3140, S_0301F0_CB3_DEST_BASE_ENA(~0u)},
- {3158, S_0301F0_CB4_DEST_BASE_ENA(~0u)},
- {3176, S_0301F0_CB5_DEST_BASE_ENA(~0u)},
- {3194, S_0301F0_CB6_DEST_BASE_ENA(~0u)},
- {3212, S_0301F0_CB7_DEST_BASE_ENA(~0u)},
- {3230, S_0301F0_DB_DEST_BASE_ENA(~0u)},
- {3450, S_0301F0_TCL1_VOL_ACTION_ENA(~0u)},
- {3470, S_0301F0_TC_VOL_ACTION_ENA(~0u)},
- {3488, S_0301F0_TC_WB_ACTION_ENA(~0u)},
- {3247, S_0301F0_DEST_BASE_2_ENA(~0u)},
- {3263, S_0301F0_DEST_BASE_3_ENA(~0u)},
- {3279, S_0301F0_TCL1_ACTION_ENA(~0u)},
- {3295, S_0301F0_TC_ACTION_ENA(~0u)},
- {3309, S_0301F0_CB_ACTION_ENA(~0u)},
- {3323, S_0301F0_DB_ACTION_ENA(~0u)},
- {3337, S_0301F0_SH_KCACHE_ACTION_ENA(~0u)},
- {3505, S_0301F0_SH_KCACHE_VOL_ACTION_ENA(~0u)},
- {3358, S_0301F0_SH_ICACHE_ACTION_ENA(~0u)},
- {3530, S_0301F0_SH_KCACHE_WB_ACTION_ENA(~0u)},
- {3554, S_0301F0_SH_SD_ACTION_ENA(~0u)},
- {3571, S_0301F0_TC_WC_ACTION_ENA(~0u)},
- {3588, S_0301F0_TC_INV_METADATA_ACTION_ENA(~0u)},
- /* 268 */
- {3615, S_0301FC_MATCHING_GFX_CNTX(~0u)},
- {3633, S_0301FC_MEID(~0u)},
- {3638, S_0301FC_PHASE1_STATUS(~0u)},
- {3645, S_0301FC_STATUS(~0u)},
- /* 272 */
- {3652, S_008210_MEC1_BUSY(~0u)},
- {3662, S_008210_MEC2_BUSY(~0u)},
- {3672, S_008210_DC0_BUSY(~0u)},
- {3681, S_008210_DC1_BUSY(~0u)},
- {3690, S_008210_RCIU1_BUSY(~0u)},
- {3701, S_008210_RCIU2_BUSY(~0u)},
- {3712, S_008210_ROQ1_BUSY(~0u)},
- {3722, S_008210_ROQ2_BUSY(~0u)},
- {3732, S_008210_TCIU_BUSY(~0u)},
- {3742, S_008210_SCRATCH_RAM_BUSY(~0u)},
- {3759, S_008210_QU_BUSY(~0u)},
- {3767, S_008210_ATCL2IU_BUSY(~0u)},
- {3780, S_008210_CPG_CPC_BUSY(~0u)},
- {3793, S_008210_CPF_CPC_BUSY(~0u)},
- {2644, S_008210_CPC_BUSY(~0u)},
- {3806, S_008210_UTCL2IU_BUSY(~0u)},
- {3819, S_008210_SAVE_RESTORE_BUSY(~0u)},
- /* 289 */
- {3837, S_008214_MEC1_LOAD_BUSY(~0u)},
- {3852, S_008214_MEC1_SEMAPOHRE_BUSY(~0u)},
- {3872, S_008214_MEC1_MUTEX_BUSY(~0u)},
- {3888, S_008214_MEC1_MESSAGE_BUSY(~0u)},
- {3906, S_008214_MEC1_EOP_QUEUE_BUSY(~0u)},
- {3926, S_008214_MEC1_IQ_QUEUE_BUSY(~0u)},
- {3945, S_008214_MEC1_IB_QUEUE_BUSY(~0u)},
- {3964, S_008214_MEC1_TC_BUSY(~0u)},
- {3977, S_008214_MEC1_DMA_BUSY(~0u)},
- {3991, S_008214_MEC1_PARTIAL_FLUSH_BUSY(~0u)},
- {4015, S_008214_MEC1_PIPE0_BUSY(~0u)},
- {4031, S_008214_MEC1_PIPE1_BUSY(~0u)},
- {4047, S_008214_MEC1_PIPE2_BUSY(~0u)},
- {4063, S_008214_MEC1_PIPE3_BUSY(~0u)},
- {4079, S_008214_MEC2_LOAD_BUSY(~0u)},
- {4094, S_008214_MEC2_SEMAPOHRE_BUSY(~0u)},
- {4114, S_008214_MEC2_MUTEX_BUSY(~0u)},
- {4130, S_008214_MEC2_MESSAGE_BUSY(~0u)},
- {4148, S_008214_MEC2_EOP_QUEUE_BUSY(~0u)},
- {4168, S_008214_MEC2_IQ_QUEUE_BUSY(~0u)},
- {4187, S_008214_MEC2_IB_QUEUE_BUSY(~0u)},
- {4206, S_008214_MEC2_TC_BUSY(~0u)},
- {4219, S_008214_MEC2_DMA_BUSY(~0u)},
- {4233, S_008214_MEC2_PARTIAL_FLUSH_BUSY(~0u)},
- {4257, S_008214_MEC2_PIPE0_BUSY(~0u)},
- {4273, S_008214_MEC2_PIPE1_BUSY(~0u)},
- {4289, S_008214_MEC2_PIPE2_BUSY(~0u)},
- {4305, S_008214_MEC2_PIPE3_BUSY(~0u)},
- /* 317 */
- {4321, S_008218_RCIU_TX_FREE_STALL(~0u)},
- {4340, S_008218_RCIU_PRIV_VIOLATION(~0u)},
- {4360, S_008218_TCIU_TX_FREE_STALL(~0u)},
- {4379, S_008218_MEC1_DECODING_PACKET(~0u)},
- {4400, S_008218_MEC1_WAIT_ON_RCIU(~0u)},
- {4418, S_008218_MEC1_WAIT_ON_RCIU_READ(~0u)},
- {4441, S_008218_MEC1_WAIT_ON_ROQ_DATA(~0u)},
- {4463, S_008218_MEC2_DECODING_PACKET(~0u)},
- {4484, S_008218_MEC2_WAIT_ON_RCIU(~0u)},
- {4502, S_008218_MEC2_WAIT_ON_RCIU_READ(~0u)},
- {4525, S_008218_MEC2_WAIT_ON_ROQ_DATA(~0u)},
- {4547, S_008218_ATCL2IU_WAITING_ON_FREE(~0u)},
- {4571, S_008218_ATCL2IU_WAITING_ON_TAGS(~0u)},
- {4595, S_008218_ATCL1_WAITING_ON_TRANS(~0u)},
- {4618, S_008218_UTCL2IU_WAITING_ON_FREE(~0u)},
- {4642, S_008218_UTCL2IU_WAITING_ON_TAGS(~0u)},
- {4666, S_008218_UTCL1_WAITING_ON_TRANS(~0u)},
- /* 334 */
- {4689, S_00821C_POST_WPTR_GFX_BUSY(~0u)},
- {4708, S_00821C_CSF_BUSY(~0u)},
- {4717, S_00821C_ROQ_ALIGN_BUSY(~0u)},
- {4732, S_00821C_ROQ_RING_BUSY(~0u)},
- {4746, S_00821C_ROQ_INDIRECT1_BUSY(~0u)},
- {4765, S_00821C_ROQ_INDIRECT2_BUSY(~0u)},
- {4784, S_00821C_ROQ_STATE_BUSY(~0u)},
- {4799, S_00821C_ROQ_CE_RING_BUSY(~0u)},
- {4816, S_00821C_ROQ_CE_INDIRECT1_BUSY(~0u)},
- {4838, S_00821C_ROQ_CE_INDIRECT2_BUSY(~0u)},
- {4860, S_00821C_SEMAPHORE_BUSY(~0u)},
- {4875, S_00821C_INTERRUPT_BUSY(~0u)},
- {3732, S_00821C_TCIU_BUSY(~0u)},
- {4890, S_00821C_HQD_BUSY(~0u)},
- {4899, S_00821C_PRT_BUSY(~0u)},
- {3767, S_00821C_ATCL2IU_BUSY(~0u)},
- {4908, S_00821C_CPF_GFX_BUSY(~0u)},
- {4921, S_00821C_CPF_CMP_BUSY(~0u)},
- {4934, S_00821C_GRBM_CPF_STAT_BUSY(~0u)},
- {4953, S_00821C_CPC_CPF_BUSY(~0u)},
- {2635, S_00821C_CPF_BUSY(~0u)},
- {3806, S_00821C_UTCL2IU_BUSY(~0u)},
- /* 356 */
- {4966, S_008220_REG_BUS_FIFO_BUSY(~0u)},
- {4984, S_008220_CSF_RING_BUSY(~0u)},
- {4998, S_008220_CSF_INDIRECT1_BUSY(~0u)},
- {5017, S_008220_CSF_INDIRECT2_BUSY(~0u)},
- {5036, S_008220_CSF_STATE_BUSY(~0u)},
- {5051, S_008220_CSF_CE_INDR1_BUSY(~0u)},
- {5069, S_008220_CSF_CE_INDR2_BUSY(~0u)},
- {5087, S_008220_CSF_ARBITER_BUSY(~0u)},
- {5104, S_008220_CSF_INPUT_BUSY(~0u)},
- {5119, S_008220_OUTSTANDING_READ_TAGS(~0u)},
- {5141, S_008220_HPD_PROCESSING_EOP_BUSY(~0u)},
- {5165, S_008220_HQD_DISPATCH_BUSY(~0u)},
- {5183, S_008220_HQD_IQ_TIMER_BUSY(~0u)},
- {5201, S_008220_HQD_DMA_OFFLOAD_BUSY(~0u)},
- {5222, S_008220_HQD_WAIT_SEMAPHORE_BUSY(~0u)},
- {5246, S_008220_HQD_SIGNAL_SEMAPHORE_BUSY(~0u)},
- {5272, S_008220_HQD_MESSAGE_BUSY(~0u)},
- {5289, S_008220_HQD_PQ_FETCHER_BUSY(~0u)},
- {5309, S_008220_HQD_IB_FETCHER_BUSY(~0u)},
- {5329, S_008220_HQD_IQ_FETCHER_BUSY(~0u)},
- {5349, S_008220_HQD_EOP_FETCHER_BUSY(~0u)},
- {5370, S_008220_HQD_CONSUMED_RPTR_BUSY(~0u)},
- {5393, S_008220_HQD_FETCHER_ARB_BUSY(~0u)},
- {5414, S_008220_HQD_ROQ_ALIGN_BUSY(~0u)},
- {5433, S_008220_HQD_ROQ_EOP_BUSY(~0u)},
- {5450, S_008220_HQD_ROQ_IQ_BUSY(~0u)},
- {5466, S_008220_HQD_ROQ_PQ_BUSY(~0u)},
- {5482, S_008220_HQD_ROQ_IB_BUSY(~0u)},
- {5498, S_008220_HQD_WPTR_POLL_BUSY(~0u)},
- {5517, S_008220_HQD_PQ_BUSY(~0u)},
- {5529, S_008220_HQD_IB_BUSY(~0u)},
- /* 387 */
- {5541, S_008224_RING_FETCHING_DATA(~0u)},
- {5560, S_008224_INDR1_FETCHING_DATA(~0u)},
- {5580, S_008224_INDR2_FETCHING_DATA(~0u)},
- {5600, S_008224_STATE_FETCHING_DATA(~0u)},
- {5620, S_008224_TCIU_WAITING_ON_FREE(~0u)},
- {5641, S_008224_TCIU_WAITING_ON_TAGS(~0u)},
- {4547, S_008224_ATCL2IU_WAITING_ON_FREE(~0u)},
- {4571, S_008224_ATCL2IU_WAITING_ON_TAGS(~0u)},
- {4595, S_008224_ATCL1_WAITING_ON_TRANS(~0u)},
- {4618, S_008224_UTCL2IU_WAITING_ON_FREE(~0u)},
- {4642, S_008224_UTCL2IU_WAITING_ON_TAGS(~0u)},
- {5662, S_008224_GFX_UTCL1_WAITING_ON_TRANS(~0u)},
- {5689, S_008224_CMP_UTCL1_WAITING_ON_TRANS(~0u)},
- {5716, S_008224_RCIU_WAITING_ON_FREE(~0u)},
- /* 401 */
- {5737, S_030230_COHER_SIZE_HI_256B(~0u)},
- /* 402 */
- {5756, S_0088B0_PRIM_COUNT(~0u)},
- /* 403 */
- {5767, S_0088C4_VS_NO_EXTRA_BUFFER(~0u)},
- {5786, S_0088C4_STREAMOUT_FULL_FLUSH(~0u)},
- {5807, S_0088C4_ES_LIMIT(~0u)},
- /* 406 */
- {5816, S_0088D4_VERT_REUSE(~0u)},
- /* 407 */
- {6343, S_008958_PRIM_TYPE(~0u), 29, 26},
- /* 408 */
+ {33777, S_0300FC_OFFSET_UPDATE_DONE(~0u)},
+ /* 225 */
+ {34121, S_0301E4_COHER_BASE_HI_256B(~0u)},
+ /* 226 */
+ {34140, S_0301EC_START_DELAY_COUNT(~0u)},
+ /* 227 */
+ {33796, S_0301F0_DEST_BASE_0_ENA(~0u)},
+ {33812, S_0301F0_DEST_BASE_1_ENA(~0u)},
+ {34158, S_0301F0_TC_SD_ACTION_ENA(~0u)},
+ {34175, S_0301F0_TC_NC_ACTION_ENA(~0u)},
+ {33828, S_0301F0_CB0_DEST_BASE_ENA(~0u)},
+ {33846, S_0301F0_CB1_DEST_BASE_ENA(~0u)},
+ {33864, S_0301F0_CB2_DEST_BASE_ENA(~0u)},
+ {33882, S_0301F0_CB3_DEST_BASE_ENA(~0u)},
+ {33900, S_0301F0_CB4_DEST_BASE_ENA(~0u)},
+ {33918, S_0301F0_CB5_DEST_BASE_ENA(~0u)},
+ {33936, S_0301F0_CB6_DEST_BASE_ENA(~0u)},
+ {33954, S_0301F0_CB7_DEST_BASE_ENA(~0u)},
+ {33972, S_0301F0_DB_DEST_BASE_ENA(~0u)},
+ {34192, S_0301F0_TCL1_VOL_ACTION_ENA(~0u)},
+ {34212, S_0301F0_TC_VOL_ACTION_ENA(~0u)},
+ {34230, S_0301F0_TC_WB_ACTION_ENA(~0u)},
+ {33989, S_0301F0_DEST_BASE_2_ENA(~0u)},
+ {34005, S_0301F0_DEST_BASE_3_ENA(~0u)},
+ {34021, S_0301F0_TCL1_ACTION_ENA(~0u)},
+ {34037, S_0301F0_TC_ACTION_ENA(~0u)},
+ {34051, S_0301F0_CB_ACTION_ENA(~0u)},
+ {34065, S_0301F0_DB_ACTION_ENA(~0u)},
+ {34079, S_0301F0_SH_KCACHE_ACTION_ENA(~0u)},
+ {34247, S_0301F0_SH_KCACHE_VOL_ACTION_ENA(~0u)},
+ {34100, S_0301F0_SH_ICACHE_ACTION_ENA(~0u)},
+ {34272, S_0301F0_SH_KCACHE_WB_ACTION_ENA(~0u)},
+ {34296, S_0301F0_SH_SD_ACTION_ENA(~0u)},
+ /* 254 */
+ {34313, S_0301FC_MATCHING_GFX_CNTX(~0u)},
+ {34331, S_0301FC_MEID(~0u)},
+ {34336, S_0301FC_PHASE1_STATUS(~0u)},
+ {1005, S_0301FC_STATUS(~0u)},
+ /* 258 */
+ {34350, S_008210_MEC1_BUSY(~0u)},
+ {34360, S_008210_MEC2_BUSY(~0u)},
+ {34370, S_008210_DC0_BUSY(~0u)},
+ {34379, S_008210_DC1_BUSY(~0u)},
+ {34388, S_008210_RCIU1_BUSY(~0u)},
+ {34399, S_008210_RCIU2_BUSY(~0u)},
+ {34410, S_008210_ROQ1_BUSY(~0u)},
+ {34420, S_008210_ROQ2_BUSY(~0u)},
+ {34430, S_008210_TCIU_BUSY(~0u)},
+ {34440, S_008210_SCRATCH_RAM_BUSY(~0u)},
+ {34457, S_008210_QU_BUSY(~0u)},
+ {34465, S_008210_ATCL2IU_BUSY(~0u)},
+ {34478, S_008210_CPG_CPC_BUSY(~0u)},
+ {34491, S_008210_CPF_CPC_BUSY(~0u)},
+ {33486, S_008210_CPC_BUSY(~0u)},
+ /* 273 */
+ {34504, S_008214_MEC1_LOAD_BUSY(~0u)},
+ {34519, S_008214_MEC1_SEMAPOHRE_BUSY(~0u)},
+ {34539, S_008214_MEC1_MUTEX_BUSY(~0u)},
+ {34555, S_008214_MEC1_MESSAGE_BUSY(~0u)},
+ {34573, S_008214_MEC1_EOP_QUEUE_BUSY(~0u)},
+ {34593, S_008214_MEC1_IQ_QUEUE_BUSY(~0u)},
+ {34612, S_008214_MEC1_IB_QUEUE_BUSY(~0u)},
+ {34631, S_008214_MEC1_TC_BUSY(~0u)},
+ {34644, S_008214_MEC1_DMA_BUSY(~0u)},
+ {34658, S_008214_MEC1_PARTIAL_FLUSH_BUSY(~0u)},
+ {34682, S_008214_MEC1_PIPE0_BUSY(~0u)},
+ {34698, S_008214_MEC1_PIPE1_BUSY(~0u)},
+ {34714, S_008214_MEC1_PIPE2_BUSY(~0u)},
+ {34730, S_008214_MEC1_PIPE3_BUSY(~0u)},
+ {34746, S_008214_MEC2_LOAD_BUSY(~0u)},
+ {34761, S_008214_MEC2_SEMAPOHRE_BUSY(~0u)},
+ {34781, S_008214_MEC2_MUTEX_BUSY(~0u)},
+ {34797, S_008214_MEC2_MESSAGE_BUSY(~0u)},
+ {34815, S_008214_MEC2_EOP_QUEUE_BUSY(~0u)},
+ {34835, S_008214_MEC2_IQ_QUEUE_BUSY(~0u)},
+ {34854, S_008214_MEC2_IB_QUEUE_BUSY(~0u)},
+ {34873, S_008214_MEC2_TC_BUSY(~0u)},
+ {34886, S_008214_MEC2_DMA_BUSY(~0u)},
+ {34900, S_008214_MEC2_PARTIAL_FLUSH_BUSY(~0u)},
+ {34924, S_008214_MEC2_PIPE0_BUSY(~0u)},
+ {34940, S_008214_MEC2_PIPE1_BUSY(~0u)},
+ {34956, S_008214_MEC2_PIPE2_BUSY(~0u)},
+ {34972, S_008214_MEC2_PIPE3_BUSY(~0u)},
+ /* 301 */
+ {34988, S_008218_RCIU_TX_FREE_STALL(~0u)},
+ {35007, S_008218_RCIU_PRIV_VIOLATION(~0u)},
+ {35027, S_008218_TCIU_TX_FREE_STALL(~0u)},
+ {35046, S_008218_MEC1_DECODING_PACKET(~0u)},
+ {35067, S_008218_MEC1_WAIT_ON_RCIU(~0u)},
+ {35085, S_008218_MEC1_WAIT_ON_RCIU_READ(~0u)},
+ {35108, S_008218_MEC1_WAIT_ON_ROQ_DATA(~0u)},
+ {35130, S_008218_MEC2_DECODING_PACKET(~0u)},
+ {35151, S_008218_MEC2_WAIT_ON_RCIU(~0u)},
+ {35169, S_008218_MEC2_WAIT_ON_RCIU_READ(~0u)},
+ {35192, S_008218_MEC2_WAIT_ON_ROQ_DATA(~0u)},
+ {35214, S_008218_ATCL2IU_WAITING_ON_FREE(~0u)},
+ {35238, S_008218_ATCL2IU_WAITING_ON_TAGS(~0u)},
+ {35262, S_008218_ATCL1_WAITING_ON_TRANS(~0u)},
+ /* 315 */
+ {35285, S_00821C_POST_WPTR_GFX_BUSY(~0u)},
+ {35304, S_00821C_CSF_BUSY(~0u)},
+ {35313, S_00821C_ROQ_ALIGN_BUSY(~0u)},
+ {35328, S_00821C_ROQ_RING_BUSY(~0u)},
+ {35342, S_00821C_ROQ_INDIRECT1_BUSY(~0u)},
+ {35361, S_00821C_ROQ_INDIRECT2_BUSY(~0u)},
+ {35380, S_00821C_ROQ_STATE_BUSY(~0u)},
+ {35395, S_00821C_ROQ_CE_RING_BUSY(~0u)},
+ {35412, S_00821C_ROQ_CE_INDIRECT1_BUSY(~0u)},
+ {35434, S_00821C_ROQ_CE_INDIRECT2_BUSY(~0u)},
+ {35456, S_00821C_SEMAPHORE_BUSY(~0u)},
+ {35471, S_00821C_INTERRUPT_BUSY(~0u)},
+ {34430, S_00821C_TCIU_BUSY(~0u)},
+ {35486, S_00821C_HQD_BUSY(~0u)},
+ {35495, S_00821C_PRT_BUSY(~0u)},
+ {34465, S_00821C_ATCL2IU_BUSY(~0u)},
+ {35504, S_00821C_CPF_GFX_BUSY(~0u)},
+ {35517, S_00821C_CPF_CMP_BUSY(~0u)},
+ {35530, S_00821C_GRBM_CPF_STAT_BUSY(~0u)},
+ {35549, S_00821C_CPC_CPF_BUSY(~0u)},
+ {33477, S_00821C_CPF_BUSY(~0u)},
+ /* 336 */
+ {35562, S_008220_REG_BUS_FIFO_BUSY(~0u)},
+ {35580, S_008220_CSF_RING_BUSY(~0u)},
+ {35594, S_008220_CSF_INDIRECT1_BUSY(~0u)},
+ {35613, S_008220_CSF_INDIRECT2_BUSY(~0u)},
+ {35632, S_008220_CSF_STATE_BUSY(~0u)},
+ {35647, S_008220_CSF_CE_INDR1_BUSY(~0u)},
+ {35665, S_008220_CSF_CE_INDR2_BUSY(~0u)},
+ {35683, S_008220_CSF_ARBITER_BUSY(~0u)},
+ {35700, S_008220_CSF_INPUT_BUSY(~0u)},
+ {35715, S_008220_OUTSTANDING_READ_TAGS(~0u)},
+ {35737, S_008220_HPD_PROCESSING_EOP_BUSY(~0u)},
+ {35761, S_008220_HQD_DISPATCH_BUSY(~0u)},
+ {35779, S_008220_HQD_IQ_TIMER_BUSY(~0u)},
+ {35797, S_008220_HQD_DMA_OFFLOAD_BUSY(~0u)},
+ {35818, S_008220_HQD_WAIT_SEMAPHORE_BUSY(~0u)},
+ {35842, S_008220_HQD_SIGNAL_SEMAPHORE_BUSY(~0u)},
+ {35868, S_008220_HQD_MESSAGE_BUSY(~0u)},
+ {35885, S_008220_HQD_PQ_FETCHER_BUSY(~0u)},
+ {35905, S_008220_HQD_IB_FETCHER_BUSY(~0u)},
+ {35925, S_008220_HQD_IQ_FETCHER_BUSY(~0u)},
+ {35945, S_008220_HQD_EOP_FETCHER_BUSY(~0u)},
+ {35966, S_008220_HQD_CONSUMED_RPTR_BUSY(~0u)},
+ {35989, S_008220_HQD_FETCHER_ARB_BUSY(~0u)},
+ {36010, S_008220_HQD_ROQ_ALIGN_BUSY(~0u)},
+ {36029, S_008220_HQD_ROQ_EOP_BUSY(~0u)},
+ {36046, S_008220_HQD_ROQ_IQ_BUSY(~0u)},
+ {36062, S_008220_HQD_ROQ_PQ_BUSY(~0u)},
+ {36078, S_008220_HQD_ROQ_IB_BUSY(~0u)},
+ {36094, S_008220_HQD_WPTR_POLL_BUSY(~0u)},
+ {36113, S_008220_HQD_PQ_BUSY(~0u)},
+ {36125, S_008220_HQD_IB_BUSY(~0u)},
+ /* 367 */
+ {36137, S_008224_RING_FETCHING_DATA(~0u)},
+ {36156, S_008224_INDR1_FETCHING_DATA(~0u)},
+ {36176, S_008224_INDR2_FETCHING_DATA(~0u)},
+ {36196, S_008224_STATE_FETCHING_DATA(~0u)},
+ {36216, S_008224_TCIU_WAITING_ON_FREE(~0u)},
+ {36237, S_008224_TCIU_WAITING_ON_TAGS(~0u)},
+ {35214, S_008224_ATCL2IU_WAITING_ON_FREE(~0u)},
+ {35238, S_008224_ATCL2IU_WAITING_ON_TAGS(~0u)},
+ {35262, S_008224_ATCL1_WAITING_ON_TRANS(~0u)},
+ /* 376 */
+ {36258, S_030230_COHER_SIZE_HI_256B(~0u)},
+ /* 377 */
+ {36277, S_0088B0_PRIM_COUNT(~0u)},
+ /* 378 */
+ {36288, S_0088C4_VS_NO_EXTRA_BUFFER(~0u)},
+ {36307, S_0088C4_STREAMOUT_FULL_FLUSH(~0u)},
+ {36328, S_0088C4_ES_LIMIT(~0u)},
+ /* 381 */
+ {36337, S_0088D4_VERT_REUSE(~0u)},
+ /* 382 */
+ {22130, S_008958_PRIM_TYPE(~0u), 29, 26},
+ /* 383 */
{203, S_00895C_INDEX_TYPE(~0u), 2, 55},
- /* 409 */
+ /* 384 */
{38, S_008988_SIZE(~0u)},
- /* 410 */
- {6395, S_0089B0_OFFCHIP_BUFFERING(~0u)},
- /* 411 */
- {6413, S_008A14_CLIP_VTX_REORDER_ENA(~0u)},
- {6434, S_008A14_NUM_CLIP_SEQ(~0u)},
- {6447, S_008A14_CLIPPED_PRIM_SEQ_STALL(~0u)},
- {6470, S_008A14_VE_NAN_PROC_DISABLE(~0u)},
- /* 415 */
- {6490, S_008A60_LINE_STIPPLE_VALUE(~0u)},
- /* 416 */
- {6509, S_008B10_CURRENT_PTR(~0u)},
- {6521, S_008B10_CURRENT_COUNT(~0u)},
- /* 418 */
- {6535, S_008670_CE_TO_CSF_NOT_RDY_TO_RCV(~0u)},
- {6560, S_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(~0u)},
- {6598, S_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(~0u)},
- {6639, S_008670_CE_TO_RAM_INIT_NOT_RDY(~0u)},
- {6662, S_008670_CE_TO_RAM_DUMP_NOT_RDY(~0u)},
- {6685, S_008670_CE_TO_RAM_WRITE_NOT_RDY(~0u)},
- {6709, S_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(~0u)},
- {6739, S_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(~0u)},
- {6768, S_008670_CE_WAITING_ON_BUFFER_DATA(~0u)},
- {6794, S_008670_CE_WAITING_ON_CE_BUFFER_FLAG(~0u)},
- {6823, S_008670_CE_WAITING_ON_DE_COUNTER(~0u)},
- {6848, S_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(~0u)},
- {5620, S_008670_TCIU_WAITING_ON_FREE(~0u)},
- {5641, S_008670_TCIU_WAITING_ON_TAGS(~0u)},
- {6883, S_008670_CE_STALLED_ON_TC_WR_CONFIRM(~0u)},
- {6911, S_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(~0u)},
- {4547, S_008670_ATCL2IU_WAITING_ON_FREE(~0u)},
- {4571, S_008670_ATCL2IU_WAITING_ON_TAGS(~0u)},
- {4595, S_008670_ATCL1_WAITING_ON_TRANS(~0u)},
- {4618, S_008670_UTCL2IU_WAITING_ON_FREE(~0u)},
- {4642, S_008670_UTCL2IU_WAITING_ON_TAGS(~0u)},
- {4666, S_008670_UTCL1_WAITING_ON_TRANS(~0u)},
- /* 440 */
- {6941, S_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV(~0u)},
- {6968, S_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV(~0u)},
- {6995, S_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV(~0u)},
- {7024, S_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG(~0u)},
- {7053, S_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG(~0u)},
- {7082, S_008674_ME_STALLED_ON_TC_WR_CONFIRM(~0u)},
- {7110, S_008674_ME_STALLED_ON_ATOMIC_RTN_DATA(~0u)},
- {7140, S_008674_ME_WAITING_ON_TC_READ_DATA(~0u)},
- {7167, S_008674_ME_WAITING_ON_REG_READ_DATA(~0u)},
- {7195, S_008674_RCIU_WAITING_ON_GDS_FREE(~0u)},
- {7220, S_008674_RCIU_WAITING_ON_GRBM_FREE(~0u)},
- {7246, S_008674_RCIU_WAITING_ON_VGT_FREE(~0u)},
- {7271, S_008674_RCIU_STALLED_ON_ME_READ(~0u)},
- {7295, S_008674_RCIU_STALLED_ON_DMA_READ(~0u)},
- {7320, S_008674_RCIU_STALLED_ON_APPEND_READ(~0u)},
- {7348, S_008674_RCIU_HALTED_BY_REG_VIOLATION(~0u)},
- /* 456 */
- {7377, S_008678_PFP_TO_CSF_NOT_RDY_TO_RCV(~0u)},
- {7403, S_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV(~0u)},
- {7429, S_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV(~0u)},
- {7456, S_008678_PFP_TO_VGT_WRITES_PENDING(~0u)},
- {7482, S_008678_PFP_RCIU_READ_PENDING(~0u)},
- {7504, S_008678_PFP_WAITING_ON_BUFFER_DATA(~0u)},
- {7531, S_008678_ME_WAIT_ON_CE_COUNTER(~0u)},
- {7553, S_008678_ME_WAIT_ON_AVAIL_BUFFER(~0u)},
- {7577, S_008678_GFX_CNTX_NOT_AVAIL_TO_ME(~0u)},
- {7602, S_008678_ME_RCIU_NOT_RDY_TO_RCV(~0u)},
- {7625, S_008678_ME_TO_CONST_NOT_RDY_TO_RCV(~0u)},
- {7652, S_008678_ME_WAITING_DATA_FROM_PFP(~0u)},
- {7677, S_008678_ME_WAITING_ON_PARTIAL_FLUSH(~0u)},
- {7705, S_008678_MEQ_TO_ME_NOT_RDY_TO_RCV(~0u)},
- {7730, S_008678_STQ_TO_ME_NOT_RDY_TO_RCV(~0u)},
- {7755, S_008678_ME_WAITING_DATA_FROM_STQ(~0u)},
- {7780, S_008678_PFP_STALLED_ON_TC_WR_CONFIRM(~0u)},
- {7809, S_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA(~0u)},
- {7840, S_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE(~0u)},
- {7868, S_008678_EOPD_FIFO_NEEDS_WR_CONFIRM(~0u)},
- {7895, S_008678_STRMO_WR_OF_PRIM_DATA_PENDING(~0u)},
- {7925, S_008678_PIPE_STATS_WR_DATA_PENDING(~0u)},
- {7952, S_008678_APPEND_RDY_WAIT_ON_CS_DONE(~0u)},
- {7979, S_008678_APPEND_RDY_WAIT_ON_PS_DONE(~0u)},
- {8006, S_008678_APPEND_WAIT_ON_WR_CONFIRM(~0u)},
- {8032, S_008678_APPEND_ACTIVE_PARTITION(~0u)},
- {8056, S_008678_APPEND_WAITING_TO_SEND_MEMWRITE(~0u)},
- {8088, S_008678_SURF_SYNC_NEEDS_IDLE_CNTXS(~0u)},
- {8115, S_008678_SURF_SYNC_NEEDS_ALL_CLEAN(~0u)},
- /* 485 */
- {4732, S_008680_ROQ_RING_BUSY(~0u)},
- {4746, S_008680_ROQ_INDIRECT1_BUSY(~0u)},
- {4765, S_008680_ROQ_INDIRECT2_BUSY(~0u)},
- {4784, S_008680_ROQ_STATE_BUSY(~0u)},
- {8141, S_008680_DC_BUSY(~0u)},
- {3767, S_008680_ATCL2IU_BUSY(~0u)},
- {8149, S_008680_PFP_BUSY(~0u)},
- {8158, S_008680_MEQ_BUSY(~0u)},
- {8167, S_008680_ME_BUSY(~0u)},
- {8175, S_008680_QUERY_BUSY(~0u)},
- {4860, S_008680_SEMAPHORE_BUSY(~0u)},
- {4875, S_008680_INTERRUPT_BUSY(~0u)},
- {8186, S_008680_SURFACE_SYNC_BUSY(~0u)},
- {1393, S_008680_DMA_BUSY(~0u)},
- {8204, S_008680_RCIU_BUSY(~0u)},
- {3742, S_008680_SCRATCH_RAM_BUSY(~0u)},
- {8214, S_008680_CPC_CPG_BUSY(~0u)},
- {8227, S_008680_CE_BUSY(~0u)},
- {3732, S_008680_TCIU_BUSY(~0u)},
- {4799, S_008680_ROQ_CE_RING_BUSY(~0u)},
- {4816, S_008680_ROQ_CE_INDIRECT1_BUSY(~0u)},
- {4838, S_008680_ROQ_CE_INDIRECT2_BUSY(~0u)},
- {1469, S_008680_CP_BUSY(~0u)},
- {3806, S_008680_UTCL2IU_BUSY(~0u)},
- /* 509 */
- {8235, S_030800_INSTANCE_INDEX(~0u)},
- {8250, S_030800_SH_INDEX(~0u)},
- {8259, S_030800_SE_INDEX(~0u)},
- {8268, S_030800_SH_BROADCAST_WRITES(~0u)},
- {8288, S_030800_INSTANCE_BROADCAST_WRITES(~0u)},
- {8314, S_030800_SE_BROADCAST_WRITES(~0u)},
- /* 515 */
- {6343, S_030908_PRIM_TYPE(~0u), 29, 57},
- /* 516 */
+ /* 385 */
+ {36906, S_0089B0_OFFCHIP_BUFFERING(~0u)},
+ /* 386 */
+ {36924, S_008A14_CLIP_VTX_REORDER_ENA(~0u)},
+ {36945, S_008A14_NUM_CLIP_SEQ(~0u)},
+ {36958, S_008A14_CLIPPED_PRIM_SEQ_STALL(~0u)},
+ {36981, S_008A14_VE_NAN_PROC_DISABLE(~0u)},
+ /* 390 */
+ {1762, S_008A60_LINE_STIPPLE_VALUE(~0u)},
+ /* 391 */
+ {37001, S_008B10_CURRENT_PTR(~0u)},
+ {37013, S_008B10_CURRENT_COUNT(~0u)},
+ /* 393 */
+ {37027, S_008670_CE_TO_CSF_NOT_RDY_TO_RCV(~0u)},
+ {37052, S_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(~0u)},
+ {37090, S_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(~0u)},
+ {37131, S_008670_CE_TO_RAM_INIT_NOT_RDY(~0u)},
+ {37154, S_008670_CE_TO_RAM_DUMP_NOT_RDY(~0u)},
+ {37177, S_008670_CE_TO_RAM_WRITE_NOT_RDY(~0u)},
+ {37201, S_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(~0u)},
+ {37231, S_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(~0u)},
+ {37260, S_008670_CE_WAITING_ON_BUFFER_DATA(~0u)},
+ {37286, S_008670_CE_WAITING_ON_CE_BUFFER_FLAG(~0u)},
+ {37315, S_008670_CE_WAITING_ON_DE_COUNTER(~0u)},
+ {37340, S_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(~0u)},
+ {36216, S_008670_TCIU_WAITING_ON_FREE(~0u)},
+ {36237, S_008670_TCIU_WAITING_ON_TAGS(~0u)},
+ {37375, S_008670_CE_STALLED_ON_TC_WR_CONFIRM(~0u)},
+ {37403, S_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(~0u)},
+ {35214, S_008670_ATCL2IU_WAITING_ON_FREE(~0u)},
+ {35238, S_008670_ATCL2IU_WAITING_ON_TAGS(~0u)},
+ {35262, S_008670_ATCL1_WAITING_ON_TRANS(~0u)},
+ /* 412 */
+ {37433, S_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV(~0u)},
+ {37460, S_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV(~0u)},
+ {37487, S_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV(~0u)},
+ {37516, S_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG(~0u)},
+ {37545, S_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG(~0u)},
+ {37574, S_008674_ME_STALLED_ON_TC_WR_CONFIRM(~0u)},
+ {37602, S_008674_ME_STALLED_ON_ATOMIC_RTN_DATA(~0u)},
+ {37632, S_008674_ME_WAITING_ON_TC_READ_DATA(~0u)},
+ {37659, S_008674_ME_WAITING_ON_REG_READ_DATA(~0u)},
+ {37687, S_008674_RCIU_WAITING_ON_GDS_FREE(~0u)},
+ {37712, S_008674_RCIU_WAITING_ON_GRBM_FREE(~0u)},
+ {37738, S_008674_RCIU_WAITING_ON_VGT_FREE(~0u)},
+ {37763, S_008674_RCIU_STALLED_ON_ME_READ(~0u)},
+ {37787, S_008674_RCIU_STALLED_ON_DMA_READ(~0u)},
+ {37812, S_008674_RCIU_STALLED_ON_APPEND_READ(~0u)},
+ {37840, S_008674_RCIU_HALTED_BY_REG_VIOLATION(~0u)},
+ /* 428 */
+ {37869, S_008678_PFP_TO_CSF_NOT_RDY_TO_RCV(~0u)},
+ {37895, S_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV(~0u)},
+ {37921, S_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV(~0u)},
+ {37948, S_008678_PFP_TO_VGT_WRITES_PENDING(~0u)},
+ {37974, S_008678_PFP_RCIU_READ_PENDING(~0u)},
+ {37996, S_008678_PFP_WAITING_ON_BUFFER_DATA(~0u)},
+ {38023, S_008678_ME_WAIT_ON_CE_COUNTER(~0u)},
+ {38045, S_008678_ME_WAIT_ON_AVAIL_BUFFER(~0u)},
+ {38069, S_008678_GFX_CNTX_NOT_AVAIL_TO_ME(~0u)},
+ {38094, S_008678_ME_RCIU_NOT_RDY_TO_RCV(~0u)},
+ {38117, S_008678_ME_TO_CONST_NOT_RDY_TO_RCV(~0u)},
+ {38144, S_008678_ME_WAITING_DATA_FROM_PFP(~0u)},
+ {38169, S_008678_ME_WAITING_ON_PARTIAL_FLUSH(~0u)},
+ {38197, S_008678_MEQ_TO_ME_NOT_RDY_TO_RCV(~0u)},
+ {38222, S_008678_STQ_TO_ME_NOT_RDY_TO_RCV(~0u)},
+ {38247, S_008678_ME_WAITING_DATA_FROM_STQ(~0u)},
+ {38272, S_008678_PFP_STALLED_ON_TC_WR_CONFIRM(~0u)},
+ {38301, S_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA(~0u)},
+ {38332, S_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE(~0u)},
+ {38360, S_008678_EOPD_FIFO_NEEDS_WR_CONFIRM(~0u)},
+ {38387, S_008678_STRMO_WR_OF_PRIM_DATA_PENDING(~0u)},
+ {38417, S_008678_PIPE_STATS_WR_DATA_PENDING(~0u)},
+ {38444, S_008678_APPEND_RDY_WAIT_ON_CS_DONE(~0u)},
+ {38471, S_008678_APPEND_RDY_WAIT_ON_PS_DONE(~0u)},
+ {38498, S_008678_APPEND_WAIT_ON_WR_CONFIRM(~0u)},
+ {38524, S_008678_APPEND_ACTIVE_PARTITION(~0u)},
+ {38548, S_008678_APPEND_WAITING_TO_SEND_MEMWRITE(~0u)},
+ {38580, S_008678_SURF_SYNC_NEEDS_IDLE_CNTXS(~0u)},
+ {38607, S_008678_SURF_SYNC_NEEDS_ALL_CLEAN(~0u)},
+ /* 457 */
+ {35328, S_008680_ROQ_RING_BUSY(~0u)},
+ {35342, S_008680_ROQ_INDIRECT1_BUSY(~0u)},
+ {35361, S_008680_ROQ_INDIRECT2_BUSY(~0u)},
+ {35380, S_008680_ROQ_STATE_BUSY(~0u)},
+ {38633, S_008680_DC_BUSY(~0u)},
+ {34465, S_008680_ATCL2IU_BUSY(~0u)},
+ {38641, S_008680_PFP_BUSY(~0u)},
+ {38650, S_008680_MEQ_BUSY(~0u)},
+ {38659, S_008680_ME_BUSY(~0u)},
+ {38667, S_008680_QUERY_BUSY(~0u)},
+ {35456, S_008680_SEMAPHORE_BUSY(~0u)},
+ {35471, S_008680_INTERRUPT_BUSY(~0u)},
+ {38678, S_008680_SURFACE_SYNC_BUSY(~0u)},
+ {32235, S_008680_DMA_BUSY(~0u)},
+ {38696, S_008680_RCIU_BUSY(~0u)},
+ {34440, S_008680_SCRATCH_RAM_BUSY(~0u)},
+ {38706, S_008680_CPC_CPG_BUSY(~0u)},
+ {38719, S_008680_CE_BUSY(~0u)},
+ {34430, S_008680_TCIU_BUSY(~0u)},
+ {35395, S_008680_ROQ_CE_RING_BUSY(~0u)},
+ {35412, S_008680_ROQ_CE_INDIRECT1_BUSY(~0u)},
+ {35434, S_008680_ROQ_CE_INDIRECT2_BUSY(~0u)},
+ {32311, S_008680_CP_BUSY(~0u)},
+ /* 480 */
+ {38727, S_030800_INSTANCE_INDEX(~0u)},
+ {38742, S_030800_SH_INDEX(~0u)},
+ {38751, S_030800_SE_INDEX(~0u)},
+ {38760, S_030800_SH_BROADCAST_WRITES(~0u)},
+ {38780, S_030800_INSTANCE_BROADCAST_WRITES(~0u)},
+ {38806, S_030800_SE_BROADCAST_WRITES(~0u)},
+ /* 486 */
+ {22130, S_030908_PRIM_TYPE(~0u), 29, 57},
+ /* 487 */
{203, S_03090C_INDEX_TYPE(~0u), 2, 55},
- {8334, S_03090C_PRIMGEN_EN(~0u)},
- /* 518 */
+ /* 488 */
{38, S_030938_SIZE(~0u)},
- /* 519 */
- {6395, S_03093C_OFFCHIP_BUFFERING(~0u)},
- {8393, S_03093C_OFFCHIP_GRANULARITY(~0u), 4, 86},
- /* 521 */
- {6490, S_030A00_LINE_STIPPLE_VALUE(~0u)},
- /* 522 */
- {6509, S_030A04_CURRENT_PTR(~0u)},
- {6521, S_030A04_CURRENT_COUNT(~0u)},
- /* 524 */
+ /* 489 */
+ {36906, S_03093C_OFFCHIP_BUFFERING(~0u)},
+ {38874, S_03093C_OFFCHIP_GRANULARITY(~0u), 4, 86},
+ /* 491 */
+ {1762, S_030A00_LINE_STIPPLE_VALUE(~0u)},
+ /* 492 */
+ {37001, S_030A04_CURRENT_PTR(~0u)},
+ {37013, S_030A04_CURRENT_COUNT(~0u)},
+ /* 494 */
{445, S_030A10_X(~0u)},
{91, S_030A10_Y(~0u)},
- /* 526 */
+ /* 496 */
{445, S_030A14_X(~0u)},
{91, S_030A14_Y(~0u)},
- /* 528 */
+ /* 498 */
{445, S_030A18_X(~0u)},
{91, S_030A18_Y(~0u)},
- /* 530 */
+ /* 500 */
{445, S_030A2C_X(~0u)},
{91, S_030A2C_Y(~0u)},
+ /* 502 */
+ {38894, S_008BF0_ENABLE_PA_SC_OUT_OF_ORDER(~0u)},
+ {38920, S_008BF0_DISABLE_SC_DB_TILE_FIX(~0u)},
+ {38943, S_008BF0_DISABLE_AA_MASK_FULL_FIX(~0u)},
+ {38968, S_008BF0_ENABLE_1XMSAA_SAMPLE_LOCATIONS(~0u)},
+ {38999, S_008BF0_ENABLE_1XMSAA_SAMPLE_LOC_CENTROID(~0u)},
+ {39033, S_008BF0_DISABLE_SCISSOR_FIX(~0u)},
+ {39053, S_008BF0_DISABLE_PW_BUBBLE_COLLAPSE(~0u)},
+ {39080, S_008BF0_SEND_UNLIT_STILES_TO_PACKER(~0u)},
+ {39108, S_008BF0_DISABLE_DUALGRAD_PERF_OPTIMIZATION(~0u)},
+ /* 511 */
+ {39143, S_008C08_INST_INVALIDATE(~0u)},
+ {39159, S_008C08_DATA_INVALIDATE(~0u)},
+ /* 513 */
+ {39143, S_030D20_INST_INVALIDATE(~0u)},
+ {39159, S_030D20_DATA_INVALIDATE(~0u)},
+ {39175, S_030D20_INVALIDATE_VOLATILE(~0u)},
+ /* 516 */
+ {39195, S_008C0C_RET(~0u)},
+ {39199, S_008C0C_RUI(~0u)},
+ {39203, S_008C0C_RNG(~0u)},
+ /* 519 */
+ {2571, S_008DFC_EN(~0u)},
+ {39266, S_008DFC_TGT(~0u), 33, 90},
+ {39270, S_008DFC_COMPR(~0u)},
+ {33791, S_008DFC_DONE(~0u)},
+ {39276, S_008DFC_VM(~0u)},
+ {39296, S_008DFC_ENCODING(~0u), 63, 123},
+ /* 525 */
+ {39305, S_030E04_ADDRESS(~0u)},
+ /* 526 */
+ {2791, S_030F04_COUNT_HI(~0u)},
+ /* 527 */
+ {39313, S_008F04_BASE_ADDRESS_HI(~0u)},
+ {23319, S_008F04_STRIDE(~0u)},
+ {39329, S_008F04_CACHE_SWIZZLE(~0u)},
+ {39343, S_008F04_SWIZZLE_ENABLE(~0u)},
+ /* 531 */
+ {2791, S_030F0C_COUNT_HI(~0u)},
/* 532 */
- {8413, S_008BF0_ENABLE_PA_SC_OUT_OF_ORDER(~0u)},
- {8439, S_008BF0_DISABLE_SC_DB_TILE_FIX(~0u)},
- {8462, S_008BF0_DISABLE_AA_MASK_FULL_FIX(~0u)},
- {8487, S_008BF0_ENABLE_1XMSAA_SAMPLE_LOCATIONS(~0u)},
- {8518, S_008BF0_ENABLE_1XMSAA_SAMPLE_LOC_CENTROID(~0u)},
- {8552, S_008BF0_DISABLE_SCISSOR_FIX(~0u)},
- {8572, S_008BF0_DISABLE_PW_BUBBLE_COLLAPSE(~0u)},
- {8599, S_008BF0_SEND_UNLIT_STILES_TO_PACKER(~0u)},
- {8627, S_008BF0_DISABLE_DUALGRAD_PERF_OPTIMIZATION(~0u)},
- /* 541 */
- {8662, S_008C08_INST_INVALIDATE(~0u)},
- {8678, S_008C08_DATA_INVALIDATE(~0u)},
- /* 543 */
- {8662, S_030D20_INST_INVALIDATE(~0u)},
- {8678, S_030D20_DATA_INVALIDATE(~0u)},
- {8694, S_030D20_INVALIDATE_VOLATILE(~0u)},
- {8714, S_030D20_TARGET_INST(~0u)},
- {8726, S_030D20_TARGET_DATA(~0u)},
- {8667, S_030D20_INVALIDATE(~0u)},
- {8738, S_030D20_WRITEBACK(~0u)},
- {8748, S_030D20_VOL(~0u)},
- {8752, S_030D20_COMPLETE(~0u)},
+ {39448, S_008F0C_DST_SEL_X(~0u), 8, 186},
+ {39458, S_008F0C_DST_SEL_Y(~0u), 8, 186},
+ {39468, S_008F0C_DST_SEL_Z(~0u), 8, 186},
+ {39478, S_008F0C_DST_SEL_W(~0u), 8, 186},
+ {39662, S_008F0C_NUM_FORMAT(~0u), 8, 194},
+ {40054, S_008F0C_DATA_FORMAT(~0u), 16, 202},
+ {40066, S_008F0C_ELEMENT_SIZE(~0u)},
+ {40079, S_008F0C_INDEX_STRIDE(~0u)},
+ {40092, S_008F0C_ADD_TID_ENABLE(~0u)},
+ {40107, S_008F0C_ATC(~0u)},
+ {40111, S_008F0C_HASH_ENABLE(~0u)},
+ {40123, S_008F0C_HEAP(~0u)},
+ {40128, S_008F0C_MTYPE(~0u)},
+ {209, S_008F0C_TYPE(~0u), 4, 218},
+ /* 546 */
+ {2791, S_030F14_COUNT_HI(~0u)},
+ /* 547 */
+ {39313, S_008F14_BASE_ADDRESS_HI(~0u)},
+ {40203, S_008F14_MIN_LOD(~0u)},
+ {41836, S_008F14_DATA_FORMAT_GFX6(~0u), 64, 222},
+ {42220, S_008F14_NUM_FORMAT_GFX6(~0u), 16, 286},
+ {40128, S_008F14_MTYPE(~0u)},
/* 552 */
- {8761, S_008C0C_RET(~0u)},
- {8765, S_008C0C_RUI(~0u)},
- {8769, S_008C0C_RNG(~0u)},
- /* 555 */
- {8342, S_008DFC_EN(~0u)},
- {8832, S_008DFC_TGT(~0u), 33, 90},
- {8836, S_008DFC_COMPR(~0u)},
- {3049, S_008DFC_DONE(~0u)},
- {8842, S_008DFC_VM(~0u)},
- {8862, S_008DFC_ENCODING(~0u), 63, 123},
- /* 561 */
- {8871, S_030E04_ADDRESS(~0u)},
- /* 562 */
- {8879, S_030F04_COUNT_HI(~0u)},
- /* 563 */
- {8888, S_008F04_BASE_ADDRESS_HI(~0u)},
- {8904, S_008F04_STRIDE(~0u)},
- {8911, S_008F04_CACHE_SWIZZLE(~0u)},
- {8925, S_008F04_SWIZZLE_ENABLE(~0u)},
- /* 567 */
- {8879, S_030F0C_COUNT_HI(~0u)},
+ {42236, S_008F18_WIDTH(~0u)},
+ {42242, S_008F18_HEIGHT(~0u)},
+ {42249, S_008F18_PERF_MOD(~0u)},
+ {42258, S_008F18_INTERLACED(~0u)},
+ /* 556 */
+ {2791, S_030F1C_COUNT_HI(~0u)},
+ /* 557 */
+ {39448, S_008F1C_DST_SEL_X(~0u), 8, 186},
+ {39458, S_008F1C_DST_SEL_Y(~0u), 8, 186},
+ {39468, S_008F1C_DST_SEL_Z(~0u), 8, 186},
+ {39478, S_008F1C_DST_SEL_W(~0u), 8, 186},
+ {42269, S_008F1C_BASE_LEVEL(~0u)},
+ {42280, S_008F1C_LAST_LEVEL(~0u)},
+ {42291, S_008F1C_TILING_INDEX(~0u)},
+ {42304, S_008F1C_POW2_PAD(~0u)},
+ {40128, S_008F1C_MTYPE(~0u)},
+ {40107, S_008F1C_ATC(~0u)},
+ {209, S_008F1C_TYPE(~0u), 16, 302},
/* 568 */
- {9030, S_008F0C_DST_SEL_X(~0u), 8, 186},
- {9040, S_008F0C_DST_SEL_Y(~0u), 8, 186},
- {9050, S_008F0C_DST_SEL_Z(~0u), 8, 186},
- {9060, S_008F0C_DST_SEL_W(~0u), 8, 186},
- {9244, S_008F0C_NUM_FORMAT(~0u), 8, 194},
- {9636, S_008F0C_DATA_FORMAT(~0u), 16, 202},
- {9648, S_008F0C_ELEMENT_SIZE(~0u)},
- {9661, S_008F0C_INDEX_STRIDE(~0u)},
- {9674, S_008F0C_ADD_TID_ENABLE(~0u)},
- {9689, S_008F0C_ATC(~0u)},
- {9693, S_008F0C_HASH_ENABLE(~0u)},
- {9705, S_008F0C_HEAP(~0u)},
- {9710, S_008F0C_MTYPE(~0u)},
- {209, S_008F0C_TYPE(~0u), 4, 218},
- {9785, S_008F0C_USER_VM_ENABLE(~0u)},
- {9800, S_008F0C_USER_VM_MODE(~0u)},
- {9813, S_008F0C_NV(~0u)},
- /* 585 */
- {8879, S_030F14_COUNT_HI(~0u)},
- /* 586 */
- {8888, S_008F14_BASE_ADDRESS_HI(~0u)},
- {9816, S_008F14_MIN_LOD(~0u)},
- {11449, S_008F14_DATA_FORMAT_GFX6(~0u), 64, 222},
- {11833, S_008F14_NUM_FORMAT_GFX6(~0u), 16, 286},
- {9710, S_008F14_MTYPE(~0u)},
- {12421, S_008F14_DATA_FORMAT_GFX9(~0u), 64, 302},
- {12514, S_008F14_NUM_FORMAT_GFX9(~0u), 11, 366},
- {12749, S_008F14_NUM_FORMAT_FMASK(~0u), 13, 377},
- {12999, S_008F14_NUM_FORMAT_ASTC_2D(~0u), 14, 390},
- {13198, S_008F14_NUM_FORMAT_ASTC_3D(~0u), 10, 404},
- {9813, S_008F14_NV(~0u)},
- {13217, S_008F14_META_DIRECT(~0u)},
+ {22114, S_008F20_DEPTH(~0u)},
+ {42615, S_008F20_PITCH_GFX6(~0u)},
+ /* 570 */
+ {42626, S_008F24_BASE_ARRAY(~0u)},
+ {42637, S_008F24_LAST_ARRAY(~0u)},
+ /* 572 */
+ {42648, S_008F28_MIN_LOD_WARN(~0u)},
+ {42661, S_008F28_COUNTER_BANK_ID(~0u)},
+ {42677, S_008F28_LOD_HDW_CNT_EN(~0u)},
+ {42692, S_008F28_COMPRESSION_EN(~0u)},
+ {42707, S_008F28_ALPHA_IS_ON_MSB(~0u)},
+ {42723, S_008F28_COLOR_TRANSFORM(~0u)},
+ {42739, S_008F28_LOST_ALPHA_BITS(~0u)},
+ {42755, S_008F28_LOST_COLOR_BITS(~0u)},
+ /* 580 */
+ {42953, S_008F30_CLAMP_X(~0u), 8, 318},
+ {42961, S_008F30_CLAMP_Y(~0u), 8, 318},
+ {42969, S_008F30_CLAMP_Z(~0u), 8, 318},
+ {42977, S_008F30_MAX_ANISO_RATIO(~0u)},
+ {43225, S_008F30_DEPTH_COMPARE_FUNC(~0u), 8, 326},
+ {43244, S_008F30_FORCE_UNNORMALIZED(~0u)},
+ {43263, S_008F30_ANISO_THRESHOLD(~0u)},
+ {43279, S_008F30_MC_COORD_TRUNC(~0u)},
+ {43294, S_008F30_FORCE_DEGAMMA(~0u)},
+ {43308, S_008F30_ANISO_BIAS(~0u)},
+ {43319, S_008F30_TRUNC_COORD(~0u)},
+ {43331, S_008F30_DISABLE_CUBE_WRAP(~0u)},
+ {43349, S_008F30_FILTER_MODE(~0u)},
+ {43361, S_008F30_COMPAT_MODE(~0u)},
+ /* 594 */
+ {40203, S_008F34_MIN_LOD(~0u)},
+ {43373, S_008F34_MAX_LOD(~0u)},
+ {43381, S_008F34_PERF_MIP(~0u)},
+ {43390, S_008F34_PERF_Z(~0u)},
/* 598 */
- {13229, S_008F18_WIDTH(~0u)},
- {13235, S_008F18_HEIGHT(~0u)},
- {13242, S_008F18_PERF_MOD(~0u)},
- {13251, S_008F18_INTERLACED(~0u)},
- /* 602 */
- {8879, S_030F1C_COUNT_HI(~0u)},
- /* 603 */
- {9030, S_008F1C_DST_SEL_X(~0u), 8, 186},
- {9040, S_008F1C_DST_SEL_Y(~0u), 8, 186},
- {9050, S_008F1C_DST_SEL_Z(~0u), 8, 186},
- {9060, S_008F1C_DST_SEL_W(~0u), 8, 186},
- {13262, S_008F1C_BASE_LEVEL(~0u)},
- {13273, S_008F1C_LAST_LEVEL(~0u)},
- {13284, S_008F1C_TILING_INDEX(~0u)},
- {13297, S_008F1C_POW2_PAD(~0u)},
- {9710, S_008F1C_MTYPE(~0u)},
- {9689, S_008F1C_ATC(~0u)},
- {209, S_008F1C_TYPE(~0u), 16, 414},
- {13608, S_008F1C_SW_MODE(~0u)},
- /* 615 */
- {13616, S_008F20_DEPTH(~0u)},
- {13622, S_008F20_PITCH_GFX6(~0u)},
- {13633, S_008F20_PITCH_GFX9(~0u)},
- {13740, S_008F20_BC_SWIZZLE(~0u), 6, 430},
- /* 619 */
- {13751, S_008F24_BASE_ARRAY(~0u)},
- {13762, S_008F24_LAST_ARRAY(~0u)},
- {13773, S_008F24_ARRAY_PITCH(~0u)},
- {13785, S_008F24_META_DATA_ADDRESS(~0u)},
- {13803, S_008F24_META_LINEAR(~0u)},
- {13815, S_008F24_META_PIPE_ALIGNED(~0u)},
- {13833, S_008F24_META_RB_ALIGNED(~0u)},
- {13849, S_008F24_MAX_MIP(~0u)},
- /* 627 */
- {13857, S_008F28_MIN_LOD_WARN(~0u)},
- {13870, S_008F28_COUNTER_BANK_ID(~0u)},
- {13886, S_008F28_LOD_HDW_CNT_EN(~0u)},
- {13901, S_008F28_COMPRESSION_EN(~0u)},
- {13916, S_008F28_ALPHA_IS_ON_MSB(~0u)},
- {13932, S_008F28_COLOR_TRANSFORM(~0u)},
- {13948, S_008F28_LOST_ALPHA_BITS(~0u)},
- {13964, S_008F28_LOST_COLOR_BITS(~0u)},
- /* 635 */
- {14162, S_008F30_CLAMP_X(~0u), 8, 436},
- {14170, S_008F30_CLAMP_Y(~0u), 8, 436},
- {14178, S_008F30_CLAMP_Z(~0u), 8, 436},
- {14186, S_008F30_MAX_ANISO_RATIO(~0u)},
- {14434, S_008F30_DEPTH_COMPARE_FUNC(~0u), 8, 444},
- {14453, S_008F30_FORCE_UNNORMALIZED(~0u)},
- {14472, S_008F30_ANISO_THRESHOLD(~0u)},
- {14488, S_008F30_MC_COORD_TRUNC(~0u)},
- {14503, S_008F30_FORCE_DEGAMMA(~0u)},
- {14517, S_008F30_ANISO_BIAS(~0u)},
- {14528, S_008F30_TRUNC_COORD(~0u)},
- {14540, S_008F30_DISABLE_CUBE_WRAP(~0u)},
- {14558, S_008F30_FILTER_MODE(~0u)},
- {14570, S_008F30_COMPAT_MODE(~0u)},
- /* 649 */
- {9816, S_008F34_MIN_LOD(~0u)},
- {14582, S_008F34_MAX_LOD(~0u)},
- {14590, S_008F34_PERF_MIP(~0u)},
- {14599, S_008F34_PERF_Z(~0u)},
- /* 653 */
- {14606, S_008F38_LOD_BIAS(~0u)},
- {14615, S_008F38_LOD_BIAS_SEC(~0u)},
- {14677, S_008F38_XY_MAG_FILTER(~0u), 2, 452},
- {14752, S_008F38_XY_MIN_FILTER(~0u), 4, 454},
- {14832, S_008F38_Z_FILTER(~0u), 3, 458},
- {14841, S_008F38_MIP_FILTER(~0u), 3, 458},
- {14852, S_008F38_MIP_POINT_PRECLAMP(~0u)},
- {14871, S_008F38_DISABLE_LSB_CEIL(~0u)},
- {14888, S_008F38_FILTER_PREC_FIX(~0u)},
- {14904, S_008F38_ANISO_OVERRIDE(~0u)},
- {14919, S_008F38_BLEND_ZERO_PRT(~0u)},
- /* 664 */
- {14934, S_008F3C_BORDER_COLOR_PTR(~0u)},
- {14951, S_008F3C_UPGRADED_DEPTH(~0u)},
- {15093, S_008F3C_BORDER_COLOR_TYPE(~0u), 4, 461},
- {15111, S_008F3C_SKIP_DEGAMMA(~0u)},
- /* 668 */
- {15124, S_0090DC_VS_LOW_THRESHOLD(~0u)},
- {15141, S_0090DC_GS_LOW_THRESHOLD(~0u)},
- {15158, S_0090DC_ES_LOW_THRESHOLD(~0u)},
- {15175, S_0090DC_HS_LOW_THRESHOLD(~0u)},
- {15192, S_0090DC_LS_LOW_THRESHOLD(~0u)},
- /* 673 */
- {15209, S_0090E0_PS_CU_EN(~0u)},
- {15218, S_0090E0_VS_CU_EN(~0u)},
- /* 675 */
- {15227, S_0090E4_GS_CU_EN(~0u)},
- {15236, S_0090E4_ES_CU_EN(~0u)},
- /* 677 */
- {15245, S_0090E8_LSHS_CU_EN(~0u)},
- /* 678 */
- {15256, S_0090EC_MAX_WAVE_ID(~0u)},
- /* 679 */
- {15256, S_0090E8_MAX_WAVE_ID(~0u)},
- /* 680 */
- {15273, S_0090F0_RING_ORDER_TS0(~0u), 1, 465},
- {15288, S_0090F0_RING_ORDER_TS1(~0u)},
- {15303, S_0090F0_RING_ORDER_TS2(~0u)},
- /* 683 */
- {15318, S_00C700_PIPE_ORDER_TS0(~0u)},
- {15333, S_00C700_PIPE_ORDER_TS1(~0u)},
- {15348, S_00C700_PIPE_ORDER_TS2(~0u)},
- {15363, S_00C700_PIPE_ORDER_TS3(~0u)},
- {15378, S_00C700_TS0_DUR_MULT(~0u)},
- {15391, S_00C700_TS1_DUR_MULT(~0u)},
- {15404, S_00C700_TS2_DUR_MULT(~0u)},
- {15417, S_00C700_TS3_DUR_MULT(~0u)},
- /* 691 */
- {15430, S_0090F4_TS0_DURATION(~0u)},
- {15443, S_0090F4_TS1_DURATION(~0u)},
- /* 693 */
- {15456, S_0090F8_TS2_DURATION(~0u)},
- /* 694 */
+ {43397, S_008F38_LOD_BIAS(~0u)},
+ {43406, S_008F38_LOD_BIAS_SEC(~0u)},
+ {43468, S_008F38_XY_MAG_FILTER(~0u), 2, 334},
+ {43543, S_008F38_XY_MIN_FILTER(~0u), 4, 336},
+ {43623, S_008F38_Z_FILTER(~0u), 3, 340},
+ {43632, S_008F38_MIP_FILTER(~0u), 3, 340},
+ {43643, S_008F38_MIP_POINT_PRECLAMP(~0u)},
+ {43662, S_008F38_DISABLE_LSB_CEIL(~0u)},
+ {43679, S_008F38_FILTER_PREC_FIX(~0u)},
+ {43695, S_008F38_ANISO_OVERRIDE(~0u)},
+ /* 608 */
+ {43710, S_008F3C_BORDER_COLOR_PTR(~0u)},
+ {43727, S_008F3C_UPGRADED_DEPTH(~0u)},
+ {43869, S_008F3C_BORDER_COLOR_TYPE(~0u), 4, 343},
+ /* 611 */
+ {43887, S_0090DC_VS_LOW_THRESHOLD(~0u)},
+ {43904, S_0090DC_GS_LOW_THRESHOLD(~0u)},
+ {43921, S_0090DC_ES_LOW_THRESHOLD(~0u)},
+ {43938, S_0090DC_HS_LOW_THRESHOLD(~0u)},
+ {43955, S_0090DC_LS_LOW_THRESHOLD(~0u)},
+ /* 616 */
+ {43972, S_0090E0_PS_CU_EN(~0u)},
+ {43981, S_0090E0_VS_CU_EN(~0u)},
+ /* 618 */
+ {43990, S_0090E4_GS_CU_EN(~0u)},
+ {43999, S_0090E4_ES_CU_EN(~0u)},
+ /* 620 */
+ {44008, S_0090E8_LSHS_CU_EN(~0u)},
+ /* 621 */
+ {2656, S_0090EC_MAX_WAVE_ID(~0u)},
+ /* 622 */
+ {2656, S_0090E8_MAX_WAVE_ID(~0u)},
+ /* 623 */
+ {44024, S_0090F0_RING_ORDER_TS0(~0u), 1, 347},
+ {44039, S_0090F0_RING_ORDER_TS1(~0u)},
+ {44054, S_0090F0_RING_ORDER_TS2(~0u)},
+ /* 626 */
+ {44069, S_00C700_PIPE_ORDER_TS0(~0u)},
+ {44084, S_00C700_PIPE_ORDER_TS1(~0u)},
+ {44099, S_00C700_PIPE_ORDER_TS2(~0u)},
+ {44114, S_00C700_PIPE_ORDER_TS3(~0u)},
+ {44129, S_00C700_TS0_DUR_MULT(~0u)},
+ {44142, S_00C700_TS1_DUR_MULT(~0u)},
+ {44155, S_00C700_TS2_DUR_MULT(~0u)},
+ {44168, S_00C700_TS3_DUR_MULT(~0u)},
+ /* 634 */
+ {44181, S_0090F4_TS0_DURATION(~0u)},
+ {44194, S_0090F4_TS1_DURATION(~0u)},
+ /* 636 */
+ {44207, S_0090F8_TS2_DURATION(~0u)},
+ /* 637 */
{38, S_008F40_SIZE(~0u)},
- /* 695 */
+ /* 638 */
{669, S_008F44_OFFSET(~0u)},
+ /* 639 */
+ {2791, S_030FFC_COUNT_HI(~0u)},
+ /* 640 */
+ {44220, S_009100_GPR_WRITE_PRIORITY(~0u)},
+ {44239, S_009100_EXP_PRIORITY_ORDER(~0u)},
+ {44258, S_009100_ENABLE_SQG_TOP_EVENTS(~0u)},
+ {44280, S_009100_ENABLE_SQG_BOP_EVENTS(~0u)},
+ {44302, S_009100_RSRC_MGMT_RESET(~0u)},
+ /* 645 */
+ {44571, S_00913C_VTX_DONE_DELAY(~0u), 16, 348},
+ {44586, S_00913C_INTERP_ONE_PRIM_PER_ROW(~0u)},
+ {44610, S_00913C_PC_LIMIT_ENABLE(~0u)},
+ {44626, S_00913C_PC_LIMIT_STRICT(~0u)},
+ {44642, S_00913C_PC_LIMIT_SIZE(~0u)},
+ /* 650 */
+ {44656, S_00936C_TYPE_A(~0u)},
+ {44663, S_00936C_VGPR_A(~0u)},
+ {44670, S_00936C_SGPR_A(~0u)},
+ {44677, S_00936C_LDS_A(~0u)},
+ {44683, S_00936C_WAVES_A(~0u)},
+ {44691, S_00936C_EN_A(~0u)},
+ {44696, S_00936C_TYPE_B(~0u)},
+ {44703, S_00936C_VGPR_B(~0u)},
+ {44710, S_00936C_SGPR_B(~0u)},
+ {44717, S_00936C_LDS_B(~0u)},
+ {44723, S_00936C_WAVES_B(~0u)},
+ {44731, S_00936C_EN_B(~0u)},
+ /* 662 */
+ {44736, S_009858_MSAA1_X(~0u)},
+ {44744, S_009858_MSAA1_Y(~0u)},
+ {44752, S_009858_MSAA2_X(~0u)},
+ {44760, S_009858_MSAA2_Y(~0u)},
+ {44768, S_009858_MSAA4_X(~0u)},
+ {44776, S_009858_MSAA4_Y(~0u)},
+ {44784, S_009858_MSAA8_X(~0u)},
+ {44792, S_009858_MSAA8_Y(~0u)},
+ {44800, S_009858_MSAA16_X(~0u)},
+ {44809, S_009858_MSAA16_Y(~0u)},
+ /* 672 */
+ {44818, S_0098F8_NUM_PIPES(~0u)},
+ {44828, S_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(~0u)},
+ {44854, S_0098F8_BANK_INTERLEAVE_SIZE(~0u)},
+ {44875, S_0098F8_NUM_SHADER_ENGINES_GFX6(~0u)},
+ {44899, S_0098F8_SHADER_ENGINE_TILE_SIZE(~0u)},
+ {44923, S_0098F8_NUM_GPUS_GFX6(~0u)},
+ {44937, S_0098F8_MULTI_GPU_TILE_SIZE(~0u)},
+ {44957, S_0098F8_ROW_SIZE(~0u)},
+ {44966, S_0098F8_NUM_LOWER_PIPES(~0u)},
+ /* 681 */
+ {45099, S_009910_MICRO_TILE_MODE(~0u), 4, 364},
+ {45344, S_009910_ARRAY_MODE(~0u), 16, 368},
+ {45706, S_009910_PIPE_CONFIG(~0u), 15, 384},
+ {45896, S_009910_TILE_SPLIT(~0u), 7, 399},
+ {45999, S_009910_BANK_WIDTH(~0u), 4, 406},
+ {46106, S_009910_BANK_HEIGHT(~0u), 4, 410},
+ {46218, S_009910_MACRO_TILE_ASPECT(~0u), 4, 414},
+ {46305, S_009910_NUM_BANKS(~0u), 4, 418},
+ {46346, S_009910_MICRO_TILE_MODE_NEW(~0u), 4, 422},
+ {46366, S_009910_SAMPLE_SPLIT(~0u)},
+ /* 691 */
+ {45999, S_009990_BANK_WIDTH(~0u)},
+ {46106, S_009990_BANK_HEIGHT(~0u)},
+ {46218, S_009990_MACRO_TILE_ASPECT(~0u)},
+ {46305, S_009990_NUM_BANKS(~0u)},
+ /* 695 */
+ {46379, S_00B004_MEM_BASE(~0u)},
/* 696 */
- {8879, S_030FFC_COUNT_HI(~0u)},
+ {46379, S_00B00C_MEM_BASE(~0u)},
/* 697 */
- {15469, S_009100_GPR_WRITE_PRIORITY(~0u)},
- {15488, S_009100_EXP_PRIORITY_ORDER(~0u)},
- {15507, S_009100_ENABLE_SQG_TOP_EVENTS(~0u)},
- {15529, S_009100_ENABLE_SQG_BOP_EVENTS(~0u)},
- {15551, S_009100_RSRC_MGMT_RESET(~0u)},
- /* 702 */
- {15820, S_00913C_VTX_DONE_DELAY(~0u), 16, 466},
- {15835, S_00913C_INTERP_ONE_PRIM_PER_ROW(~0u)},
- {15859, S_00913C_PC_LIMIT_ENABLE(~0u)},
- {15875, S_00913C_PC_LIMIT_STRICT(~0u)},
- {15891, S_00913C_PC_LIMIT_SIZE(~0u)},
- /* 707 */
- {15905, S_00936C_TYPE_A(~0u)},
- {15912, S_00936C_VGPR_A(~0u)},
- {15919, S_00936C_SGPR_A(~0u)},
- {15926, S_00936C_LDS_A(~0u)},
- {15932, S_00936C_WAVES_A(~0u)},
- {15940, S_00936C_EN_A(~0u)},
- {15945, S_00936C_TYPE_B(~0u)},
- {15952, S_00936C_VGPR_B(~0u)},
- {15959, S_00936C_SGPR_B(~0u)},
- {15966, S_00936C_LDS_B(~0u)},
- {15972, S_00936C_WAVES_B(~0u)},
- {15980, S_00936C_EN_B(~0u)},
+ {43975, S_00B01C_CU_EN(~0u)},
+ {46388, S_00B01C_WAVE_LIMIT(~0u)},
+ {46399, S_00B01C_LOCK_LOW_THRESHOLD(~0u)},
+ /* 700 */
+ {46379, S_00B024_MEM_BASE(~0u)},
+ /* 701 */
+ {46418, S_00B028_VGPRS(~0u)},
+ {46424, S_00B028_SGPRS(~0u)},
+ {2676, S_00B028_PRIORITY(~0u)},
+ {46473, S_00B028_FLOAT_MODE(~0u), 241, 426},
+ {46484, S_00B028_PRIV(~0u)},
+ {46489, S_00B028_DX10_CLAMP(~0u)},
+ {46500, S_00B028_DEBUG_MODE(~0u)},
+ {46511, S_00B028_IEEE_MODE(~0u)},
+ {46521, S_00B028_CU_GROUP_DISABLE(~0u)},
+ {46538, S_00B028_CACHE_CTL(~0u)},
+ {46548, S_00B028_CDBG_USER(~0u)},
+ /* 712 */
+ {46558, S_00B02C_SCRATCH_EN(~0u)},
+ {46569, S_00B02C_USER_SGPR(~0u)},
+ {46579, S_00B02C_TRAP_PRESENT(~0u)},
+ {46592, S_00B02C_WAVE_CNT_EN(~0u)},
+ {46604, S_00B02C_EXTRA_LDS_SIZE(~0u)},
+ {46619, S_00B02C_EXCP_EN_SI(~0u)},
+ {46630, S_00B02C_EXCP_EN(~0u)},
/* 719 */
- {15985, S_009858_MSAA1_X(~0u)},
- {15993, S_009858_MSAA1_Y(~0u)},
- {16001, S_009858_MSAA2_X(~0u)},
- {16009, S_009858_MSAA2_Y(~0u)},
- {16017, S_009858_MSAA4_X(~0u)},
- {16025, S_009858_MSAA4_Y(~0u)},
- {16033, S_009858_MSAA8_X(~0u)},
- {16041, S_009858_MSAA8_Y(~0u)},
- {16049, S_009858_MSAA16_X(~0u)},
- {16058, S_009858_MSAA16_Y(~0u)},
- /* 729 */
- {16067, S_0098F8_NUM_PIPES(~0u)},
- {16077, S_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(~0u)},
- {16103, S_0098F8_BANK_INTERLEAVE_SIZE(~0u)},
- {16124, S_0098F8_NUM_SHADER_ENGINES_GFX6(~0u)},
- {16148, S_0098F8_SHADER_ENGINE_TILE_SIZE(~0u)},
- {16172, S_0098F8_NUM_GPUS_GFX6(~0u)},
- {16186, S_0098F8_MULTI_GPU_TILE_SIZE(~0u)},
- {16206, S_0098F8_ROW_SIZE(~0u)},
- {16215, S_0098F8_NUM_LOWER_PIPES(~0u)},
- {16231, S_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(~0u)},
- {16257, S_0098F8_MAX_COMPRESSED_FRAGS(~0u)},
- {16278, S_0098F8_NUM_BANKS(~0u)},
- {16288, S_0098F8_NUM_SHADER_ENGINES_GFX9(~0u)},
- {16312, S_0098F8_NUM_GPUS_GFX9(~0u)},
- {16326, S_0098F8_NUM_RB_PER_SE(~0u)},
- {16340, S_0098F8_SE_ENABLE(~0u)},
- /* 745 */
- {16467, S_009910_MICRO_TILE_MODE(~0u), 4, 482},
- {16712, S_009910_ARRAY_MODE(~0u), 16, 486},
- {17074, S_009910_PIPE_CONFIG(~0u), 15, 502},
- {17264, S_009910_TILE_SPLIT(~0u), 7, 517},
- {17367, S_009910_BANK_WIDTH(~0u), 4, 524},
- {17474, S_009910_BANK_HEIGHT(~0u), 4, 528},
- {17586, S_009910_MACRO_TILE_ASPECT(~0u), 4, 532},
- {16278, S_009910_NUM_BANKS(~0u), 4, 536},
- {17704, S_009910_MICRO_TILE_MODE_NEW(~0u), 4, 540},
- {17724, S_009910_SAMPLE_SPLIT(~0u)},
- /* 755 */
- {17367, S_009990_BANK_WIDTH(~0u)},
- {17474, S_009990_BANK_HEIGHT(~0u)},
- {17586, S_009990_MACRO_TILE_ASPECT(~0u)},
- {16278, S_009990_NUM_BANKS(~0u)},
- /* 759 */
- {17737, S_00B004_MEM_BASE(~0u)},
- /* 760 */
- {17737, S_00B00C_MEM_BASE(~0u)},
- /* 761 */
- {15212, S_00B01C_CU_EN(~0u)},
- {17746, S_00B01C_WAVE_LIMIT(~0u)},
- {17757, S_00B01C_LOCK_LOW_THRESHOLD(~0u)},
- {17776, S_00B01C_SIMD_DISABLE(~0u)},
- /* 765 */
- {17737, S_00B024_MEM_BASE(~0u)},
- /* 766 */
- {17789, S_00B028_VGPRS(~0u)},
- {17795, S_00B028_SGPRS(~0u)},
- {15479, S_00B028_PRIORITY(~0u)},
- {17844, S_00B028_FLOAT_MODE(~0u), 241, 544},
- {17855, S_00B028_PRIV(~0u)},
- {17860, S_00B028_DX10_CLAMP(~0u)},
- {17871, S_00B028_DEBUG_MODE(~0u)},
- {17882, S_00B028_IEEE_MODE(~0u)},
- {17892, S_00B028_CU_GROUP_DISABLE(~0u)},
- {17909, S_00B028_CACHE_CTL(~0u)},
- {17919, S_00B028_CDBG_USER(~0u)},
- {17929, S_00B028_FP16_OVFL(~0u)},
- /* 778 */
- {17939, S_00B02C_SCRATCH_EN(~0u)},
- {17950, S_00B02C_USER_SGPR(~0u)},
- {17960, S_00B02C_TRAP_PRESENT(~0u)},
- {17973, S_00B02C_WAVE_CNT_EN(~0u)},
- {17985, S_00B02C_EXTRA_LDS_SIZE(~0u)},
- {18000, S_00B02C_EXCP_EN_SI(~0u)},
- {18011, S_00B02C_EXCP_EN(~0u)},
- {18019, S_00B02C_LOAD_COLLISION_WAVEID(~0u)},
- {18041, S_00B02C_LOAD_INTRAWAVE_COLLISION(~0u)},
- {18066, S_00B02C_SKIP_USGPR0(~0u)},
- {18078, S_00B02C_USER_SGPR_MSB(~0u)},
- /* 789 */
- {17737, S_00B104_MEM_BASE(~0u)},
- /* 790 */
- {17737, S_00B10C_MEM_BASE(~0u)},
- /* 791 */
- {15212, S_00B118_CU_EN(~0u)},
- {17746, S_00B118_WAVE_LIMIT(~0u)},
- {17757, S_00B118_LOCK_LOW_THRESHOLD(~0u)},
- {17776, S_00B118_SIMD_DISABLE(~0u)},
- /* 795 */
- {5810, S_00B11C_LIMIT(~0u)},
- /* 796 */
- {17737, S_00B124_MEM_BASE(~0u)},
- /* 797 */
- {17789, S_00B128_VGPRS(~0u)},
- {17795, S_00B128_SGPRS(~0u)},
- {15479, S_00B128_PRIORITY(~0u)},
- {17844, S_00B128_FLOAT_MODE(~0u)},
- {17855, S_00B128_PRIV(~0u)},
- {17860, S_00B128_DX10_CLAMP(~0u)},
- {17871, S_00B128_DEBUG_MODE(~0u)},
- {17882, S_00B128_IEEE_MODE(~0u)},
- {18092, S_00B128_VGPR_COMP_CNT(~0u)},
- {18106, S_00B128_CU_GROUP_ENABLE(~0u)},
- {17909, S_00B128_CACHE_CTL(~0u)},
- {17919, S_00B128_CDBG_USER(~0u)},
- {17929, S_00B128_FP16_OVFL(~0u)},
- /* 810 */
- {17939, S_00B12C_SCRATCH_EN(~0u)},
- {17950, S_00B12C_USER_SGPR(~0u)},
- {17960, S_00B12C_TRAP_PRESENT(~0u)},
- {18122, S_00B12C_OC_LDS_EN(~0u)},
- {18132, S_00B12C_SO_BASE0_EN(~0u)},
- {18144, S_00B12C_SO_BASE1_EN(~0u)},
- {18156, S_00B12C_SO_BASE2_EN(~0u)},
- {18168, S_00B12C_SO_BASE3_EN(~0u)},
- {18180, S_00B12C_SO_EN(~0u)},
- {18000, S_00B12C_EXCP_EN_SI(~0u)},
- {18011, S_00B12C_EXCP_EN(~0u)},
- {18186, S_00B12C_DISPATCH_DRAW_EN(~0u)},
- {18203, S_00B12C_PC_BASE_EN(~0u)},
- {18066, S_00B12C_SKIP_USGPR0(~0u)},
- {18078, S_00B12C_USER_SGPR_MSB(~0u)},
- /* 825 */
- {17737, S_00B204_MEM_BASE(~0u)},
- /* 826 */
- {17737, S_00B20C_MEM_BASE(~0u)},
- /* 827 */
- {15212, S_00B21C_CU_EN(~0u)},
- {17746, S_00B21C_WAVE_LIMIT(~0u)},
- {17757, S_00B21C_LOCK_LOW_THRESHOLD(~0u)},
- {18214, S_00B21C_GROUP_FIFO_DEPTH(~0u)},
- {17776, S_00B21C_SIMD_DISABLE(~0u)},
- /* 832 */
- {17737, S_00B224_MEM_BASE(~0u)},
- /* 833 */
- {17789, S_00B228_VGPRS(~0u)},
- {17795, S_00B228_SGPRS(~0u)},
- {15479, S_00B228_PRIORITY(~0u)},
- {17844, S_00B228_FLOAT_MODE(~0u)},
- {17855, S_00B228_PRIV(~0u)},
- {17860, S_00B228_DX10_CLAMP(~0u)},
- {17871, S_00B228_DEBUG_MODE(~0u)},
- {17882, S_00B228_IEEE_MODE(~0u)},
- {18106, S_00B228_CU_GROUP_ENABLE(~0u)},
- {17909, S_00B228_CACHE_CTL(~0u)},
- {17919, S_00B228_CDBG_USER(~0u)},
- {18231, S_00B228_GS_VGPR_COMP_CNT(~0u)},
- {17929, S_00B228_FP16_OVFL(~0u)},
+ {46379, S_00B104_MEM_BASE(~0u)},
+ /* 720 */
+ {46379, S_00B10C_MEM_BASE(~0u)},
+ /* 721 */
+ {43975, S_00B118_CU_EN(~0u)},
+ {46388, S_00B118_WAVE_LIMIT(~0u)},
+ {46399, S_00B118_LOCK_LOW_THRESHOLD(~0u)},
+ /* 724 */
+ {36331, S_00B11C_LIMIT(~0u)},
+ /* 725 */
+ {46379, S_00B124_MEM_BASE(~0u)},
+ /* 726 */
+ {46418, S_00B128_VGPRS(~0u)},
+ {46424, S_00B128_SGPRS(~0u)},
+ {2676, S_00B128_PRIORITY(~0u)},
+ {46473, S_00B128_FLOAT_MODE(~0u)},
+ {46484, S_00B128_PRIV(~0u)},
+ {46489, S_00B128_DX10_CLAMP(~0u)},
+ {46500, S_00B128_DEBUG_MODE(~0u)},
+ {46511, S_00B128_IEEE_MODE(~0u)},
+ {46638, S_00B128_VGPR_COMP_CNT(~0u)},
+ {46652, S_00B128_CU_GROUP_ENABLE(~0u)},
+ {46538, S_00B128_CACHE_CTL(~0u)},
+ {46548, S_00B128_CDBG_USER(~0u)},
+ /* 738 */
+ {46558, S_00B12C_SCRATCH_EN(~0u)},
+ {46569, S_00B12C_USER_SGPR(~0u)},
+ {46579, S_00B12C_TRAP_PRESENT(~0u)},
+ {46668, S_00B12C_OC_LDS_EN(~0u)},
+ {46678, S_00B12C_SO_BASE0_EN(~0u)},
+ {46690, S_00B12C_SO_BASE1_EN(~0u)},
+ {46702, S_00B12C_SO_BASE2_EN(~0u)},
+ {46714, S_00B12C_SO_BASE3_EN(~0u)},
+ {46726, S_00B12C_SO_EN(~0u)},
+ {46619, S_00B12C_EXCP_EN_SI(~0u)},
+ {46630, S_00B12C_EXCP_EN(~0u)},
+ {46732, S_00B12C_DISPATCH_DRAW_EN(~0u)},
+ /* 750 */
+ {46379, S_00B204_MEM_BASE(~0u)},
+ /* 751 */
+ {46379, S_00B20C_MEM_BASE(~0u)},
+ /* 752 */
+ {43975, S_00B21C_CU_EN(~0u)},
+ {46388, S_00B21C_WAVE_LIMIT(~0u)},
+ {46399, S_00B21C_LOCK_LOW_THRESHOLD(~0u)},
+ {46749, S_00B21C_GROUP_FIFO_DEPTH(~0u)},
+ /* 756 */
+ {46379, S_00B224_MEM_BASE(~0u)},
+ /* 757 */
+ {46418, S_00B228_VGPRS(~0u)},
+ {46424, S_00B228_SGPRS(~0u)},
+ {2676, S_00B228_PRIORITY(~0u)},
+ {46473, S_00B228_FLOAT_MODE(~0u)},
+ {46484, S_00B228_PRIV(~0u)},
+ {46489, S_00B228_DX10_CLAMP(~0u)},
+ {46500, S_00B228_DEBUG_MODE(~0u)},
+ {46511, S_00B228_IEEE_MODE(~0u)},
+ {46652, S_00B228_CU_GROUP_ENABLE(~0u)},
+ {46538, S_00B228_CACHE_CTL(~0u)},
+ {46548, S_00B228_CDBG_USER(~0u)},
+ /* 768 */
+ {46558, S_00B22C_SCRATCH_EN(~0u)},
+ {46569, S_00B22C_USER_SGPR(~0u)},
+ {46579, S_00B22C_TRAP_PRESENT(~0u)},
+ {46619, S_00B22C_EXCP_EN_SI(~0u)},
+ {46630, S_00B22C_EXCP_EN(~0u)},
+ /* 773 */
+ {46379, S_00B304_MEM_BASE(~0u)},
+ /* 774 */
+ {46379, S_00B30C_MEM_BASE(~0u)},
+ /* 775 */
+ {43975, S_00B31C_CU_EN(~0u)},
+ {46388, S_00B31C_WAVE_LIMIT(~0u)},
+ {46399, S_00B31C_LOCK_LOW_THRESHOLD(~0u)},
+ {46749, S_00B31C_GROUP_FIFO_DEPTH(~0u)},
+ /* 779 */
+ {46379, S_00B324_MEM_BASE(~0u)},
+ /* 780 */
+ {46418, S_00B328_VGPRS(~0u)},
+ {46424, S_00B328_SGPRS(~0u)},
+ {2676, S_00B328_PRIORITY(~0u)},
+ {46473, S_00B328_FLOAT_MODE(~0u)},
+ {46484, S_00B328_PRIV(~0u)},
+ {46489, S_00B328_DX10_CLAMP(~0u)},
+ {46500, S_00B328_DEBUG_MODE(~0u)},
+ {46511, S_00B328_IEEE_MODE(~0u)},
+ {46638, S_00B328_VGPR_COMP_CNT(~0u)},
+ {46652, S_00B328_CU_GROUP_ENABLE(~0u)},
+ {46538, S_00B328_CACHE_CTL(~0u)},
+ {46548, S_00B328_CDBG_USER(~0u)},
+ /* 792 */
+ {46558, S_00B32C_SCRATCH_EN(~0u)},
+ {46569, S_00B32C_USER_SGPR(~0u)},
+ {46579, S_00B32C_TRAP_PRESENT(~0u)},
+ {46668, S_00B32C_OC_LDS_EN(~0u)},
+ {46619, S_00B32C_EXCP_EN_SI(~0u)},
+ {46630, S_00B32C_EXCP_EN(~0u)},
+ {46610, S_00B32C_LDS_SIZE(~0u)},
+ /* 799 */
+ {46379, S_00B404_MEM_BASE(~0u)},
+ /* 800 */
+ {46379, S_00B40C_MEM_BASE(~0u)},
+ /* 801 */
+ {46388, S_00B41C_WAVE_LIMIT(~0u)},
+ {46399, S_00B41C_LOCK_LOW_THRESHOLD(~0u)},
+ {46749, S_00B41C_GROUP_FIFO_DEPTH(~0u)},
+ /* 804 */
+ {46379, S_00B424_MEM_BASE(~0u)},
+ /* 805 */
+ {46418, S_00B428_VGPRS(~0u)},
+ {46424, S_00B428_SGPRS(~0u)},
+ {2676, S_00B428_PRIORITY(~0u)},
+ {46473, S_00B428_FLOAT_MODE(~0u)},
+ {46484, S_00B428_PRIV(~0u)},
+ {46489, S_00B428_DX10_CLAMP(~0u)},
+ {46500, S_00B428_DEBUG_MODE(~0u)},
+ {46511, S_00B428_IEEE_MODE(~0u)},
+ {46538, S_00B428_CACHE_CTL(~0u)},
+ {46548, S_00B428_CDBG_USER(~0u)},
+ /* 815 */
+ {46558, S_00B42C_SCRATCH_EN(~0u)},
+ {46569, S_00B42C_USER_SGPR(~0u)},
+ {46579, S_00B42C_TRAP_PRESENT(~0u)},
+ {46668, S_00B42C_OC_LDS_EN(~0u)},
+ {46766, S_00B42C_TG_SIZE_EN(~0u)},
+ {46619, S_00B42C_EXCP_EN_SI(~0u)},
+ {46777, S_00B42C_EXCP_EN_CIK_VI(~0u)},
+ /* 822 */
+ {46379, S_00B504_MEM_BASE(~0u)},
+ /* 823 */
+ {46379, S_00B50C_MEM_BASE(~0u)},
+ /* 824 */
+ {43975, S_00B51C_CU_EN(~0u)},
+ {46388, S_00B51C_WAVE_LIMIT(~0u)},
+ {46399, S_00B51C_LOCK_LOW_THRESHOLD(~0u)},
+ {46749, S_00B51C_GROUP_FIFO_DEPTH(~0u)},
+ /* 828 */
+ {46379, S_00B524_MEM_BASE(~0u)},
+ /* 829 */
+ {46418, S_00B528_VGPRS(~0u)},
+ {46424, S_00B528_SGPRS(~0u)},
+ {2676, S_00B528_PRIORITY(~0u)},
+ {46473, S_00B528_FLOAT_MODE(~0u)},
+ {46484, S_00B528_PRIV(~0u)},
+ {46489, S_00B528_DX10_CLAMP(~0u)},
+ {46500, S_00B528_DEBUG_MODE(~0u)},
+ {46511, S_00B528_IEEE_MODE(~0u)},
+ {46638, S_00B528_VGPR_COMP_CNT(~0u)},
+ {46538, S_00B528_CACHE_CTL(~0u)},
+ {46548, S_00B528_CDBG_USER(~0u)},
+ /* 840 */
+ {46558, S_00B52C_SCRATCH_EN(~0u)},
+ {46569, S_00B52C_USER_SGPR(~0u)},
+ {46579, S_00B52C_TRAP_PRESENT(~0u)},
+ {46610, S_00B52C_LDS_SIZE(~0u)},
+ {46619, S_00B52C_EXCP_EN_SI(~0u)},
+ {46630, S_00B52C_EXCP_EN(~0u)},
/* 846 */
- {17939, S_00B22C_SCRATCH_EN(~0u)},
- {17950, S_00B22C_USER_SGPR(~0u)},
- {17960, S_00B22C_TRAP_PRESENT(~0u)},
- {18000, S_00B22C_EXCP_EN_SI(~0u)},
- {18011, S_00B22C_EXCP_EN(~0u)},
- {18248, S_00B22C_ES_VGPR_COMP_CNT(~0u)},
- {18122, S_00B22C_OC_LDS_EN(~0u)},
- {17991, S_00B22C_LDS_SIZE(~0u)},
- {18066, S_00B22C_SKIP_USGPR0(~0u)},
- {18078, S_00B22C_USER_SGPR_MSB(~0u)},
- /* 856 */
- {17737, S_00B304_MEM_BASE(~0u)},
- /* 857 */
- {17737, S_00B30C_MEM_BASE(~0u)},
+ {46792, S_00B800_COMPUTE_SHADER_EN(~0u)},
+ {46810, S_00B800_PARTIAL_TG_EN(~0u)},
+ {46824, S_00B800_FORCE_START_AT_000(~0u)},
+ {46843, S_00B800_ORDERED_APPEND_ENBL(~0u)},
+ {46863, S_00B800_ORDERED_APPEND_MODE(~0u)},
+ {46883, S_00B800_USE_THREAD_DIMENSIONS(~0u)},
+ {46905, S_00B800_ORDER_MODE(~0u)},
+ {46916, S_00B800_DISPATCH_CACHE_CNTL(~0u)},
+ {46936, S_00B800_SCALAR_L1_INV_VOL(~0u)},
+ {46954, S_00B800_VECTOR_L1_INV_VOL(~0u)},
+ {46972, S_00B800_DATA_ATC(~0u)},
+ {46981, S_00B800_RESTORE(~0u)},
/* 858 */
- {15212, S_00B31C_CU_EN(~0u)},
- {17746, S_00B31C_WAVE_LIMIT(~0u)},
- {17757, S_00B31C_LOCK_LOW_THRESHOLD(~0u)},
- {18214, S_00B31C_GROUP_FIFO_DEPTH(~0u)},
+ {46989, S_00B81C_NUM_THREAD_FULL(~0u)},
+ {47005, S_00B81C_NUM_THREAD_PARTIAL(~0u)},
+ /* 860 */
+ {46989, S_00B820_NUM_THREAD_FULL(~0u)},
+ {47005, S_00B820_NUM_THREAD_PARTIAL(~0u)},
/* 862 */
- {17737, S_00B324_MEM_BASE(~0u)},
- /* 863 */
- {17789, S_00B328_VGPRS(~0u)},
- {17795, S_00B328_SGPRS(~0u)},
- {15479, S_00B328_PRIORITY(~0u)},
- {17844, S_00B328_FLOAT_MODE(~0u)},
- {17855, S_00B328_PRIV(~0u)},
- {17860, S_00B328_DX10_CLAMP(~0u)},
- {17871, S_00B328_DEBUG_MODE(~0u)},
- {17882, S_00B328_IEEE_MODE(~0u)},
- {18092, S_00B328_VGPR_COMP_CNT(~0u)},
- {18106, S_00B328_CU_GROUP_ENABLE(~0u)},
- {17909, S_00B328_CACHE_CTL(~0u)},
- {17919, S_00B328_CDBG_USER(~0u)},
- /* 875 */
- {17939, S_00B32C_SCRATCH_EN(~0u)},
- {17950, S_00B32C_USER_SGPR(~0u)},
- {17960, S_00B32C_TRAP_PRESENT(~0u)},
- {18122, S_00B32C_OC_LDS_EN(~0u)},
- {18000, S_00B32C_EXCP_EN_SI(~0u)},
- {18011, S_00B32C_EXCP_EN(~0u)},
- {17991, S_00B32C_LDS_SIZE(~0u)},
- /* 882 */
- {17737, S_00B404_MEM_BASE(~0u)},
- /* 883 */
- {17737, S_00B40C_MEM_BASE(~0u)},
- /* 884 */
- {17746, S_00B41C_WAVE_LIMIT(~0u)},
- {17757, S_00B41C_LOCK_LOW_THRESHOLD(~0u)},
- {18214, S_00B41C_GROUP_FIFO_DEPTH(~0u)},
- {17776, S_00B41C_SIMD_DISABLE(~0u)},
- {15212, S_00B41C_CU_EN(~0u)},
- /* 889 */
- {17737, S_00B424_MEM_BASE(~0u)},
- /* 890 */
- {17789, S_00B428_VGPRS(~0u)},
- {17795, S_00B428_SGPRS(~0u)},
- {15479, S_00B428_PRIORITY(~0u)},
- {17844, S_00B428_FLOAT_MODE(~0u)},
- {17855, S_00B428_PRIV(~0u)},
- {17860, S_00B428_DX10_CLAMP(~0u)},
- {17871, S_00B428_DEBUG_MODE(~0u)},
- {17882, S_00B428_IEEE_MODE(~0u)},
- {17909, S_00B428_CACHE_CTL(~0u)},
- {17919, S_00B428_CDBG_USER(~0u)},
- {18265, S_00B428_LS_VGPR_COMP_CNT(~0u)},
- {17929, S_00B428_FP16_OVFL(~0u)},
- /* 902 */
- {17939, S_00B42C_SCRATCH_EN(~0u)},
- {17950, S_00B42C_USER_SGPR(~0u)},
- {17960, S_00B42C_TRAP_PRESENT(~0u)},
- {18122, S_00B42C_OC_LDS_EN(~0u)},
- {18282, S_00B42C_TG_SIZE_EN(~0u)},
- {18000, S_00B42C_EXCP_EN_SI(~0u)},
- {18293, S_00B42C_EXCP_EN_CIK_VI(~0u)},
- {18011, S_00B42C_EXCP_EN(~0u)},
- {17991, S_00B42C_LDS_SIZE(~0u)},
- {18066, S_00B42C_SKIP_USGPR0(~0u)},
- {18078, S_00B42C_USER_SGPR_MSB(~0u)},
- /* 913 */
- {17737, S_00B504_MEM_BASE(~0u)},
- /* 914 */
- {17737, S_00B50C_MEM_BASE(~0u)},
- /* 915 */
- {15212, S_00B51C_CU_EN(~0u)},
- {17746, S_00B51C_WAVE_LIMIT(~0u)},
- {17757, S_00B51C_LOCK_LOW_THRESHOLD(~0u)},
- {18214, S_00B51C_GROUP_FIFO_DEPTH(~0u)},
- /* 919 */
- {17737, S_00B524_MEM_BASE(~0u)},
- /* 920 */
- {17789, S_00B528_VGPRS(~0u)},
- {17795, S_00B528_SGPRS(~0u)},
- {15479, S_00B528_PRIORITY(~0u)},
- {17844, S_00B528_FLOAT_MODE(~0u)},
- {17855, S_00B528_PRIV(~0u)},
- {17860, S_00B528_DX10_CLAMP(~0u)},
- {17871, S_00B528_DEBUG_MODE(~0u)},
- {17882, S_00B528_IEEE_MODE(~0u)},
- {18092, S_00B528_VGPR_COMP_CNT(~0u)},
- {17909, S_00B528_CACHE_CTL(~0u)},
- {17919, S_00B528_CDBG_USER(~0u)},
- /* 931 */
- {17939, S_00B52C_SCRATCH_EN(~0u)},
- {17950, S_00B52C_USER_SGPR(~0u)},
- {17960, S_00B52C_TRAP_PRESENT(~0u)},
- {17991, S_00B52C_LDS_SIZE(~0u)},
- {18000, S_00B52C_EXCP_EN_SI(~0u)},
- {18011, S_00B52C_EXCP_EN(~0u)},
- /* 937 */
- {18308, S_00B800_COMPUTE_SHADER_EN(~0u)},
- {18326, S_00B800_PARTIAL_TG_EN(~0u)},
- {18340, S_00B800_FORCE_START_AT_000(~0u)},
- {18359, S_00B800_ORDERED_APPEND_ENBL(~0u)},
- {18379, S_00B800_ORDERED_APPEND_MODE(~0u)},
- {18399, S_00B800_USE_THREAD_DIMENSIONS(~0u)},
- {18421, S_00B800_ORDER_MODE(~0u)},
- {18432, S_00B800_DISPATCH_CACHE_CNTL(~0u)},
- {18452, S_00B800_SCALAR_L1_INV_VOL(~0u)},
- {18470, S_00B800_VECTOR_L1_INV_VOL(~0u)},
- {18488, S_00B800_DATA_ATC(~0u)},
- {18497, S_00B800_RESTORE(~0u)},
- {953, S_00B800_RESERVED(~0u)},
- /* 950 */
- {18505, S_00B81C_NUM_THREAD_FULL(~0u)},
- {18521, S_00B81C_NUM_THREAD_PARTIAL(~0u)},
- /* 952 */
- {18505, S_00B820_NUM_THREAD_FULL(~0u)},
- {18521, S_00B820_NUM_THREAD_PARTIAL(~0u)},
- /* 954 */
- {18505, S_00B824_NUM_THREAD_FULL(~0u)},
- {18521, S_00B824_NUM_THREAD_PARTIAL(~0u)},
- /* 956 */
- {15256, S_00B82C_MAX_WAVE_ID(~0u)},
- /* 957 */
- {18540, S_00B828_PIPELINESTAT_ENABLE(~0u)},
- /* 958 */
- {18560, S_00B82C_PERFCOUNT_ENABLE(~0u)},
- /* 959 */
+ {46989, S_00B824_NUM_THREAD_FULL(~0u)},
+ {47005, S_00B824_NUM_THREAD_PARTIAL(~0u)},
+ /* 864 */
+ {2656, S_00B82C_MAX_WAVE_ID(~0u)},
+ /* 865 */
+ {7630, S_00B828_PIPELINESTAT_ENABLE(~0u)},
+ /* 866 */
+ {7658, S_00B82C_PERFCOUNT_ENABLE(~0u)},
+ /* 867 */
{391, S_00B834_DATA(~0u)},
- {18577, S_00B834_INST_ATC(~0u)},
- /* 961 */
+ {47024, S_00B834_INST_ATC(~0u)},
+ /* 869 */
{391, S_00B83C_DATA(~0u)},
- /* 962 */
+ /* 870 */
{391, S_00B844_DATA(~0u)},
- /* 963 */
- {17789, S_00B848_VGPRS(~0u)},
- {17795, S_00B848_SGPRS(~0u)},
- {15479, S_00B848_PRIORITY(~0u)},
- {17844, S_00B848_FLOAT_MODE(~0u)},
- {17855, S_00B848_PRIV(~0u)},
- {17860, S_00B848_DX10_CLAMP(~0u)},
- {17871, S_00B848_DEBUG_MODE(~0u)},
- {17882, S_00B848_IEEE_MODE(~0u)},
- {18586, S_00B848_BULKY(~0u)},
- {17919, S_00B848_CDBG_USER(~0u)},
- {17929, S_00B848_FP16_OVFL(~0u)},
- /* 974 */
- {17939, S_00B84C_SCRATCH_EN(~0u)},
- {17950, S_00B84C_USER_SGPR(~0u)},
- {17960, S_00B84C_TRAP_PRESENT(~0u)},
- {18592, S_00B84C_TGID_X_EN(~0u)},
- {18602, S_00B84C_TGID_Y_EN(~0u)},
- {18612, S_00B84C_TGID_Z_EN(~0u)},
- {18282, S_00B84C_TG_SIZE_EN(~0u)},
- {18622, S_00B84C_TIDIG_COMP_CNT(~0u)},
- {18637, S_00B84C_EXCP_EN_MSB(~0u)},
- {17991, S_00B84C_LDS_SIZE(~0u)},
- {18011, S_00B84C_EXCP_EN(~0u)},
- {18066, S_00B84C_SKIP_USGPR0(~0u)},
- /* 986 */
+ /* 871 */
+ {46418, S_00B848_VGPRS(~0u)},
+ {46424, S_00B848_SGPRS(~0u)},
+ {2676, S_00B848_PRIORITY(~0u)},
+ {46473, S_00B848_FLOAT_MODE(~0u)},
+ {46484, S_00B848_PRIV(~0u)},
+ {46489, S_00B848_DX10_CLAMP(~0u)},
+ {46500, S_00B848_DEBUG_MODE(~0u)},
+ {46511, S_00B848_IEEE_MODE(~0u)},
+ {47033, S_00B848_BULKY(~0u)},
+ {46548, S_00B848_CDBG_USER(~0u)},
+ /* 881 */
+ {46558, S_00B84C_SCRATCH_EN(~0u)},
+ {46569, S_00B84C_USER_SGPR(~0u)},
+ {46579, S_00B84C_TRAP_PRESENT(~0u)},
+ {47039, S_00B84C_TGID_X_EN(~0u)},
+ {47049, S_00B84C_TGID_Y_EN(~0u)},
+ {47059, S_00B84C_TGID_Z_EN(~0u)},
+ {46766, S_00B84C_TG_SIZE_EN(~0u)},
+ {47069, S_00B84C_TIDIG_COMP_CNT(~0u)},
+ {47084, S_00B84C_EXCP_EN_MSB(~0u)},
+ {46610, S_00B84C_LDS_SIZE(~0u)},
+ {46630, S_00B84C_EXCP_EN(~0u)},
+ /* 892 */
{391, S_00B850_DATA(~0u)},
- /* 987 */
- {18649, S_00B854_WAVES_PER_SH_SI(~0u)},
- {18665, S_00B854_WAVES_PER_SH(~0u)},
- {18678, S_00B854_TG_PER_CU(~0u)},
- {18688, S_00B854_LOCK_THRESHOLD(~0u)},
- {18703, S_00B854_SIMD_DEST_CNTL(~0u)},
- {18718, S_00B854_FORCE_SIMD_DIST(~0u)},
- {18734, S_00B854_CU_GROUP_COUNT(~0u)},
- {17776, S_00B854_SIMD_DISABLE(~0u)},
- /* 995 */
- {18749, S_00B858_SH0_CU_EN(~0u)},
- {18759, S_00B858_SH1_CU_EN(~0u)},
- /* 997 */
- {18749, S_00B85C_SH0_CU_EN(~0u)},
- {18759, S_00B85C_SH1_CU_EN(~0u)},
- /* 999 */
- {18769, S_00B860_WAVES(~0u)},
- {18775, S_00B860_WAVESIZE(~0u)},
- /* 1001 */
- {18749, S_00B864_SH0_CU_EN(~0u)},
- {18759, S_00B864_SH1_CU_EN(~0u)},
- /* 1003 */
- {18749, S_00B868_SH0_CU_EN(~0u)},
- {18759, S_00B868_SH1_CU_EN(~0u)},
- /* 1005 */
- {18784, S_00B87C_SEND_SEID(~0u)},
- {16795, S_00B87C_RESERVED2(~0u)},
- {18794, S_00B87C_RESERVED3(~0u)},
- {18804, S_00B87C_RESERVED4(~0u)},
- {18814, S_00B87C_WAVE_ID_BASE(~0u)},
+ /* 893 */
+ {47096, S_00B854_WAVES_PER_SH_SI(~0u)},
+ {47112, S_00B854_WAVES_PER_SH(~0u)},
+ {47125, S_00B854_TG_PER_CU(~0u)},
+ {47135, S_00B854_LOCK_THRESHOLD(~0u)},
+ {47150, S_00B854_SIMD_DEST_CNTL(~0u)},
+ {47165, S_00B854_FORCE_SIMD_DIST(~0u)},
+ {47181, S_00B854_CU_GROUP_COUNT(~0u)},
+ /* 900 */
+ {47196, S_00B858_SH0_CU_EN(~0u)},
+ {47206, S_00B858_SH1_CU_EN(~0u)},
+ /* 902 */
+ {47196, S_00B85C_SH0_CU_EN(~0u)},
+ {47206, S_00B85C_SH1_CU_EN(~0u)},
+ /* 904 */
+ {47216, S_00B860_WAVES(~0u)},
+ {47222, S_00B860_WAVESIZE(~0u)},
+ /* 906 */
+ {47196, S_00B864_SH0_CU_EN(~0u)},
+ {47206, S_00B864_SH1_CU_EN(~0u)},
+ /* 908 */
+ {47196, S_00B868_SH0_CU_EN(~0u)},
+ {47206, S_00B868_SH1_CU_EN(~0u)},
+ /* 910 */
+ {47231, S_00B87C_SEND_SEID(~0u)},
+ {45427, S_00B87C_RESERVED2(~0u)},
+ {47241, S_00B87C_RESERVED3(~0u)},
+ {47251, S_00B87C_RESERVED4(~0u)},
+ {47261, S_00B87C_WAVE_ID_BASE(~0u)},
+ /* 915 */
+ {47274, S_00B888_PAYLOAD(~0u)},
+ {47282, S_00B888_IS_EVENT(~0u)},
+ {47291, S_00B888_IS_STATE(~0u)},
+ /* 918 */
+ {2051, S_00B890_ADDR(~0u)},
+ /* 919 */
+ {40107, S_00B894_ATC(~0u)},
+ {40128, S_00B894_MTYPE(~0u)},
+ /* 921 */
+ {8905, S_034404_PERFCOUNTER_HI(~0u)},
+ /* 922 */
+ {47300, S_036008_PERF_SEL(~0u)},
+ {47309, S_036008_PERF_SEL1(~0u)},
+ {47319, S_036008_CNTR_MODE(~0u)},
+ /* 925 */
+ {47329, S_036004_PERF_SEL2(~0u)},
+ {47339, S_036004_PERF_SEL3(~0u)},
+ /* 927 */
+ {47300, S_036024_PERF_SEL(~0u)},
+ {47309, S_036024_PERF_SEL1(~0u)},
+ {47319, S_036024_CNTR_MODE(~0u)},
+ /* 930 */
+ {47329, S_036010_PERF_SEL2(~0u)},
+ {47339, S_036010_PERF_SEL3(~0u)},
+ /* 932 */
+ {47300, S_03601C_PERF_SEL(~0u)},
+ {47309, S_03601C_PERF_SEL1(~0u)},
+ {47319, S_03601C_CNTR_MODE(~0u)},
+ /* 935 */
+ {47329, S_036018_PERF_SEL2(~0u)},
+ {47339, S_036018_PERF_SEL3(~0u)},
+ /* 937 */
+ {47396, S_036020_PERFMON_STATE(~0u), 3, 667},
+ {47410, S_036020_SPM_PERFMON_STATE(~0u)},
+ {47428, S_036020_PERFMON_ENABLE_MODE(~0u)},
+ {47448, S_036020_PERFMON_SAMPLE_ENABLE(~0u)},
+ /* 941 */
+ {47300, S_036100_PERF_SEL(~0u)},
+ {47470, S_036100_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_036100_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47524, S_036100_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47551, S_036100_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_036100_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_036100_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_036100_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_036100_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47682, S_036100_GRBM_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_036100_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_036100_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47762, S_036100_CP_BUSY_USER_DEFINED_MASK(~0u)},
+ {47788, S_036100_IA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47814, S_036100_GDS_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_036100_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47868, S_036100_RLC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47895, S_036100_TC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47921, S_036100_WD_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 960 */
+ {47300, S_036108_PERF_SEL(~0u)},
+ {47470, S_036108_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_036108_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47551, S_036108_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_036108_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_036108_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_036108_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_036108_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_036108_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47524, S_036108_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_036108_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_036108_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 972 */
+ {47300, S_03610C_PERF_SEL(~0u)},
+ {47470, S_03610C_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_03610C_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47551, S_03610C_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_03610C_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_03610C_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_03610C_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_03610C_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_03610C_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47524, S_03610C_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_03610C_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_03610C_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 984 */
+ {47300, S_036110_PERF_SEL(~0u)},
+ {47470, S_036110_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_036110_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47551, S_036110_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_036110_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_036110_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_036110_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_036110_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_036110_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47524, S_036110_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_036110_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_036110_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 996 */
+ {47300, S_036114_PERF_SEL(~0u)},
+ {47470, S_036114_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_036114_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47551, S_036114_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_036114_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_036114_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_036114_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_036114_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_036114_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47524, S_036114_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_036114_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_036114_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 1008 */
+ {47300, S_036200_PERF_SEL(~0u)},
+ {47947, S_036200_PERF_MODE(~0u)},
/* 1010 */
- {18827, S_00B888_PAYLOAD(~0u)},
- {18835, S_00B888_IS_EVENT(~0u)},
- {18844, S_00B888_IS_STATE(~0u)},
- /* 1013 */
- {906, S_00B890_ADDR(~0u)},
- /* 1014 */
- {9689, S_00B894_ATC(~0u)},
- {9710, S_00B894_MTYPE(~0u)},
- /* 1016 */
- {18853, S_034404_PERFCOUNTER_HI(~0u)},
- /* 1017 */
- {18868, S_036004_PERF_SEL2(~0u)},
- {18878, S_036004_PERF_SEL3(~0u)},
- {18888, S_036004_CNTR_SEL2(~0u)},
- {18898, S_036004_CNTR_SEL3(~0u)},
- {18908, S_036004_CNTR_MODE3(~0u)},
- {18919, S_036004_CNTR_MODE2(~0u)},
- /* 1023 */
- {18930, S_036008_PERF_SEL(~0u)},
- {18939, S_036008_PERF_SEL1(~0u)},
- {18949, S_036008_CNTR_MODE(~0u)},
- {18959, S_036008_CNTR_SEL0(~0u)},
- {18969, S_036008_CNTR_SEL1(~0u)},
- {18979, S_036008_SPM_MODE(~0u)},
- {18988, S_036008_CNTR_MODE1(~0u)},
- {18999, S_036008_CNTR_MODE0(~0u)},
- /* 1031 */
- {18868, S_036010_PERF_SEL2(~0u)},
- {18878, S_036010_PERF_SEL3(~0u)},
- {18888, S_036010_CNTR_SEL2(~0u)},
- {18898, S_036010_CNTR_SEL3(~0u)},
- {18908, S_036010_CNTR_MODE3(~0u)},
- {18919, S_036010_CNTR_MODE2(~0u)},
+ {47300, S_036210_PERF_SEL(~0u)},
+ {47309, S_036210_PERF_SEL1(~0u)},
+ {47319, S_036210_CNTR_MODE(~0u)},
+ {47957, S_036210_PERF_MODE1(~0u)},
+ {47947, S_036210_PERF_MODE(~0u)},
+ /* 1015 */
+ {47329, S_036220_PERF_SEL2(~0u)},
+ {47339, S_036220_PERF_SEL3(~0u)},
+ {47968, S_036220_PERF_MODE3(~0u)},
+ {47979, S_036220_PERF_MODE2(~0u)},
+ /* 1019 */
+ {47300, S_036230_PERF_SEL(~0u)},
+ {47309, S_036230_PERF_SEL1(~0u)},
+ {47319, S_036230_CNTR_MODE(~0u)},
+ {47957, S_036230_PERF_MODE1(~0u)},
+ {47947, S_036230_PERF_MODE(~0u)},
+ /* 1024 */
+ {47329, S_036240_PERF_SEL2(~0u)},
+ {47339, S_036240_PERF_SEL3(~0u)},
+ {47968, S_036240_PERF_MODE3(~0u)},
+ {47979, S_036240_PERF_MODE2(~0u)},
+ /* 1028 */
+ {47990, S_036250_PERF_SEID_IGNORE_MASK(~0u)},
+ /* 1029 */
+ {47300, S_036400_PERF_SEL(~0u)},
+ {47309, S_036400_PERF_SEL1(~0u)},
+ {47319, S_036400_CNTR_MODE(~0u)},
+ /* 1032 */
+ {47329, S_036404_PERF_SEL2(~0u)},
+ {47339, S_036404_PERF_SEL3(~0u)},
+ /* 1034 */
+ {47300, S_036500_PERF_SEL(~0u)},
+ {47309, S_036500_PERF_SEL1(~0u)},
+ {47319, S_036500_CNTR_MODE(~0u)},
/* 1037 */
- {18868, S_036018_PERF_SEL2(~0u)},
- {18878, S_036018_PERF_SEL3(~0u)},
- {18888, S_036018_CNTR_SEL2(~0u)},
- {18898, S_036018_CNTR_SEL3(~0u)},
- {18908, S_036018_CNTR_MODE3(~0u)},
- {18919, S_036018_CNTR_MODE2(~0u)},
- /* 1043 */
- {18930, S_03601C_PERF_SEL(~0u)},
- {18939, S_03601C_PERF_SEL1(~0u)},
- {18949, S_03601C_CNTR_MODE(~0u)},
- {18959, S_03601C_CNTR_SEL0(~0u)},
- {18969, S_03601C_CNTR_SEL1(~0u)},
- {18979, S_03601C_SPM_MODE(~0u)},
- {18988, S_03601C_CNTR_MODE1(~0u)},
- {18999, S_03601C_CNTR_MODE0(~0u)},
- /* 1051 */
- {19057, S_036020_PERFMON_STATE(~0u), 3, 785},
- {19071, S_036020_SPM_PERFMON_STATE(~0u)},
- {19089, S_036020_PERFMON_ENABLE_MODE(~0u)},
- {19109, S_036020_PERFMON_SAMPLE_ENABLE(~0u)},
- /* 1055 */
- {18930, S_036024_PERF_SEL(~0u)},
- {18939, S_036024_PERF_SEL1(~0u)},
- {18949, S_036024_CNTR_MODE(~0u)},
- {18959, S_036024_CNTR_SEL0(~0u)},
- {18969, S_036024_CNTR_SEL1(~0u)},
- {18979, S_036024_SPM_MODE(~0u)},
- {18988, S_036024_CNTR_MODE1(~0u)},
- {18999, S_036024_CNTR_MODE0(~0u)},
- /* 1063 */
- {18930, S_036100_PERF_SEL(~0u)},
- {19131, S_036100_DB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19158, S_036100_CB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19185, S_036100_VGT_BUSY_USER_DEFINED_MASK(~0u)},
- {19212, S_036100_TA_BUSY_USER_DEFINED_MASK(~0u)},
- {19238, S_036100_SX_BUSY_USER_DEFINED_MASK(~0u)},
- {19264, S_036100_SPI_BUSY_USER_DEFINED_MASK(~0u)},
- {19291, S_036100_SC_BUSY_USER_DEFINED_MASK(~0u)},
- {19317, S_036100_PA_BUSY_USER_DEFINED_MASK(~0u)},
- {19343, S_036100_GRBM_BUSY_USER_DEFINED_MASK(~0u)},
- {19371, S_036100_DB_BUSY_USER_DEFINED_MASK(~0u)},
- {19397, S_036100_CB_BUSY_USER_DEFINED_MASK(~0u)},
- {19423, S_036100_CP_BUSY_USER_DEFINED_MASK(~0u)},
- {19449, S_036100_IA_BUSY_USER_DEFINED_MASK(~0u)},
- {19475, S_036100_GDS_BUSY_USER_DEFINED_MASK(~0u)},
- {19502, S_036100_BCI_BUSY_USER_DEFINED_MASK(~0u)},
- {19529, S_036100_RLC_BUSY_USER_DEFINED_MASK(~0u)},
- {19556, S_036100_TC_BUSY_USER_DEFINED_MASK(~0u)},
- {19582, S_036100_WD_BUSY_USER_DEFINED_MASK(~0u)},
- {19608, S_036100_UTCL2_BUSY_USER_DEFINED_MASK(~0u)},
- {19637, S_036100_EA_BUSY_USER_DEFINED_MASK(~0u)},
- {19663, S_036100_RMI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47329, S_036504_PERF_SEL2(~0u)},
+ {47339, S_036504_PERF_SEL3(~0u)},
+ /* 1039 */
+ {47300, S_036600_PERF_SEL(~0u)},
+ {47309, S_036600_PERF_SEL1(~0u)},
+ {47319, S_036600_CNTR_MODE(~0u)},
+ /* 1042 */
+ {47329, S_036610_PERF_SEL2(~0u)},
+ {47339, S_036610_PERF_SEL3(~0u)},
+ /* 1044 */
+ {48012, S_036628_BIN0_MIN(~0u)},
+ {48021, S_036628_BIN0_MAX(~0u)},
+ {48030, S_036628_BIN1_MIN(~0u)},
+ {48039, S_036628_BIN1_MAX(~0u)},
+ {48048, S_036628_BIN2_MIN(~0u)},
+ {48057, S_036628_BIN2_MAX(~0u)},
+ {48066, S_036628_BIN3_MIN(~0u)},
+ {48075, S_036628_BIN3_MAX(~0u)},
+ /* 1052 */
+ {47300, S_036700_PERF_SEL(~0u)},
+ {48084, S_036700_SQC_BANK_MASK(~0u)},
+ {48098, S_036700_SQC_CLIENT_MASK(~0u)},
+ {48114, S_036700_SPM_MODE(~0u)},
+ {48123, S_036700_SIMD_MASK(~0u)},
+ {47947, S_036700_PERF_MODE(~0u)},
+ /* 1058 */
+ {48133, S_036780_PS_EN(~0u)},
+ {48139, S_036780_VS_EN(~0u)},
+ {48145, S_036780_GS_EN(~0u)},
+ {23383, S_036780_ES_EN(~0u)},
+ {48151, S_036780_HS_EN(~0u)},
+ {48157, S_036780_LS_EN(~0u)},
+ {48163, S_036780_CS_EN(~0u)},
+ {48169, S_036780_CNTR_RATE(~0u)},
+ {48179, S_036780_DISABLE_FLUSH(~0u)},
+ /* 1067 */
+ {48193, S_036784_SH0_MASK(~0u)},
+ {48202, S_036784_SH1_MASK(~0u)},
+ /* 1069 */
+ {48211, S_036788_FORCE_EN(~0u)},
+ /* 1070 */
+ {12538, S_036900_PERFCOUNTER_SELECT(~0u)},
+ {48220, S_036900_PERFCOUNTER_SELECT1(~0u)},
+ {47319, S_036900_CNTR_MODE(~0u)},
+ /* 1073 */
+ {48240, S_036910_PERFCOUNTER_SELECT2(~0u)},
+ {48260, S_036910_PERFCOUNTER_SELECT3(~0u)},
+ /* 1075 */
+ {12538, S_036A00_PERFCOUNTER_SELECT(~0u)},
+ {48220, S_036A00_PERFCOUNTER_SELECT1(~0u)},
+ {47319, S_036A00_CNTR_MODE(~0u)},
+ /* 1078 */
+ {48240, S_036A10_PERFCOUNTER_SELECT2(~0u)},
+ {48260, S_036A10_PERFCOUNTER_SELECT3(~0u)},
+ /* 1080 */
+ {47300, S_036B00_PERF_SEL(~0u)},
+ {47309, S_036B00_PERF_SEL1(~0u)},
+ {47319, S_036B00_CNTR_MODE(~0u)},
+ {47957, S_036B00_PERF_MODE1(~0u)},
+ {47947, S_036B00_PERF_MODE(~0u)},
/* 1085 */
- {18930, S_036108_PERF_SEL(~0u)},
- {19131, S_036108_DB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19158, S_036108_CB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19212, S_036108_TA_BUSY_USER_DEFINED_MASK(~0u)},
- {19238, S_036108_SX_BUSY_USER_DEFINED_MASK(~0u)},
- {19264, S_036108_SPI_BUSY_USER_DEFINED_MASK(~0u)},
- {19291, S_036108_SC_BUSY_USER_DEFINED_MASK(~0u)},
- {19371, S_036108_DB_BUSY_USER_DEFINED_MASK(~0u)},
- {19397, S_036108_CB_BUSY_USER_DEFINED_MASK(~0u)},
- {19185, S_036108_VGT_BUSY_USER_DEFINED_MASK(~0u)},
- {19317, S_036108_PA_BUSY_USER_DEFINED_MASK(~0u)},
- {19502, S_036108_BCI_BUSY_USER_DEFINED_MASK(~0u)},
- {19663, S_036108_RMI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47329, S_036B04_PERF_SEL2(~0u)},
+ {47339, S_036B04_PERF_SEL3(~0u)},
+ {47968, S_036B04_PERF_MODE3(~0u)},
+ {47979, S_036B04_PERF_MODE2(~0u)},
+ /* 1089 */
+ {47300, S_036C00_PERF_SEL(~0u)},
+ {47309, S_036C00_PERF_SEL1(~0u)},
+ {47319, S_036C00_CNTR_MODE(~0u)},
+ {47957, S_036C00_PERF_MODE1(~0u)},
+ {47947, S_036C00_PERF_MODE(~0u)},
+ /* 1094 */
+ {47329, S_036C04_PERF_SEL2(~0u)},
+ {47339, S_036C04_PERF_SEL3(~0u)},
+ {47968, S_036C04_PERF_MODE3(~0u)},
+ {47979, S_036C04_PERF_MODE2(~0u)},
/* 1098 */
- {18930, S_03610C_PERF_SEL(~0u)},
- {19131, S_03610C_DB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19158, S_03610C_CB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19212, S_03610C_TA_BUSY_USER_DEFINED_MASK(~0u)},
- {19238, S_03610C_SX_BUSY_USER_DEFINED_MASK(~0u)},
- {19264, S_03610C_SPI_BUSY_USER_DEFINED_MASK(~0u)},
- {19291, S_03610C_SC_BUSY_USER_DEFINED_MASK(~0u)},
- {19371, S_03610C_DB_BUSY_USER_DEFINED_MASK(~0u)},
- {19397, S_03610C_CB_BUSY_USER_DEFINED_MASK(~0u)},
- {19185, S_03610C_VGT_BUSY_USER_DEFINED_MASK(~0u)},
- {19317, S_03610C_PA_BUSY_USER_DEFINED_MASK(~0u)},
- {19502, S_03610C_BCI_BUSY_USER_DEFINED_MASK(~0u)},
- {19663, S_03610C_RMI_BUSY_USER_DEFINED_MASK(~0u)},
- /* 1111 */
- {18930, S_036110_PERF_SEL(~0u)},
- {19131, S_036110_DB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19158, S_036110_CB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19212, S_036110_TA_BUSY_USER_DEFINED_MASK(~0u)},
- {19238, S_036110_SX_BUSY_USER_DEFINED_MASK(~0u)},
- {19264, S_036110_SPI_BUSY_USER_DEFINED_MASK(~0u)},
- {19291, S_036110_SC_BUSY_USER_DEFINED_MASK(~0u)},
- {19371, S_036110_DB_BUSY_USER_DEFINED_MASK(~0u)},
- {19397, S_036110_CB_BUSY_USER_DEFINED_MASK(~0u)},
- {19185, S_036110_VGT_BUSY_USER_DEFINED_MASK(~0u)},
- {19317, S_036110_PA_BUSY_USER_DEFINED_MASK(~0u)},
- {19502, S_036110_BCI_BUSY_USER_DEFINED_MASK(~0u)},
- {19663, S_036110_RMI_BUSY_USER_DEFINED_MASK(~0u)},
- /* 1124 */
- {18930, S_036114_PERF_SEL(~0u)},
- {19131, S_036114_DB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19158, S_036114_CB_CLEAN_USER_DEFINED_MASK(~0u)},
- {19212, S_036114_TA_BUSY_USER_DEFINED_MASK(~0u)},
- {19238, S_036114_SX_BUSY_USER_DEFINED_MASK(~0u)},
- {19264, S_036114_SPI_BUSY_USER_DEFINED_MASK(~0u)},
- {19291, S_036114_SC_BUSY_USER_DEFINED_MASK(~0u)},
- {19371, S_036114_DB_BUSY_USER_DEFINED_MASK(~0u)},
- {19397, S_036114_CB_BUSY_USER_DEFINED_MASK(~0u)},
- {19185, S_036114_VGT_BUSY_USER_DEFINED_MASK(~0u)},
- {19317, S_036114_PA_BUSY_USER_DEFINED_MASK(~0u)},
- {19502, S_036114_BCI_BUSY_USER_DEFINED_MASK(~0u)},
- {19663, S_036114_RMI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47300, S_036D00_PERF_SEL(~0u)},
+ {47309, S_036D00_PERF_SEL1(~0u)},
+ {47319, S_036D00_CNTR_MODE(~0u)},
+ {47957, S_036D00_PERF_MODE1(~0u)},
+ {47947, S_036D00_PERF_MODE(~0u)},
+ /* 1103 */
+ {47329, S_036D04_PERF_SEL2(~0u)},
+ {47339, S_036D04_PERF_SEL3(~0u)},
+ {47968, S_036D04_PERF_MODE3(~0u)},
+ {47979, S_036D04_PERF_MODE2(~0u)},
+ /* 1107 */
+ {47300, S_036E00_PERF_SEL(~0u)},
+ {47309, S_036E00_PERF_SEL1(~0u)},
+ {47319, S_036E00_CNTR_MODE(~0u)},
+ {47957, S_036E00_PERF_MODE1(~0u)},
+ {47947, S_036E00_PERF_MODE(~0u)},
+ /* 1112 */
+ {47329, S_036E04_PERF_SEL2(~0u)},
+ {47339, S_036E04_PERF_SEL3(~0u)},
+ {47979, S_036E04_PERF_MODE2(~0u)},
+ {47968, S_036E04_PERF_MODE3(~0u)},
+ /* 1116 */
+ {47300, S_036E40_PERF_SEL(~0u)},
+ {47309, S_036E40_PERF_SEL1(~0u)},
+ {47319, S_036E40_CNTR_MODE(~0u)},
+ {47957, S_036E40_PERF_MODE1(~0u)},
+ {47947, S_036E40_PERF_MODE(~0u)},
+ /* 1121 */
+ {47329, S_036E44_PERF_SEL2(~0u)},
+ {47339, S_036E44_PERF_SEL3(~0u)},
+ {47979, S_036E44_PERF_MODE2(~0u)},
+ {47968, S_036E44_PERF_MODE3(~0u)},
+ /* 1125 */
+ {48280, S_037000_OP_FILTER_ENABLE(~0u)},
+ {48297, S_037000_OP_FILTER_SEL(~0u)},
+ {48311, S_037000_FORMAT_FILTER_ENABLE(~0u)},
+ {48332, S_037000_FORMAT_FILTER_SEL(~0u)},
+ {48350, S_037000_CLEAR_FILTER_ENABLE(~0u)},
+ {48370, S_037000_CLEAR_FILTER_SEL(~0u)},
+ {48387, S_037000_MRT_FILTER_ENABLE(~0u)},
+ {48405, S_037000_MRT_FILTER_SEL(~0u)},
+ {48420, S_037000_NUM_SAMPLES_FILTER_ENABLE(~0u)},
+ {48446, S_037000_NUM_SAMPLES_FILTER_SEL(~0u)},
+ {48469, S_037000_NUM_FRAGMENTS_FILTER_ENABLE(~0u)},
+ {48497, S_037000_NUM_FRAGMENTS_FILTER_SEL(~0u)},
/* 1137 */
- {18930, S_036200_PERF_SEL(~0u)},
- {19690, S_036200_PERF_MODE(~0u)},
- /* 1139 */
- {18930, S_036210_PERF_SEL(~0u)},
- {18939, S_036210_PERF_SEL1(~0u)},
- {18949, S_036210_CNTR_MODE(~0u)},
- {19700, S_036210_PERF_MODE1(~0u)},
- {19690, S_036210_PERF_MODE(~0u)},
- /* 1144 */
- {18868, S_036220_PERF_SEL2(~0u)},
- {18878, S_036220_PERF_SEL3(~0u)},
- {19711, S_036220_PERF_MODE3(~0u)},
- {19722, S_036220_PERF_MODE2(~0u)},
- /* 1148 */
- {18930, S_036230_PERF_SEL(~0u)},
- {18939, S_036230_PERF_SEL1(~0u)},
- {18949, S_036230_CNTR_MODE(~0u)},
- {19700, S_036230_PERF_MODE1(~0u)},
- {19690, S_036230_PERF_MODE(~0u)},
- /* 1153 */
- {18868, S_036240_PERF_SEL2(~0u)},
- {18878, S_036240_PERF_SEL3(~0u)},
- {19711, S_036240_PERF_MODE3(~0u)},
- {19722, S_036240_PERF_MODE2(~0u)},
- /* 1157 */
- {19733, S_036250_PERF_SEID_IGNORE_MASK(~0u)},
- /* 1158 */
- {18930, S_036400_PERF_SEL(~0u)},
- {18939, S_036400_PERF_SEL1(~0u)},
- {18949, S_036400_CNTR_MODE(~0u)},
- /* 1161 */
- {18868, S_036404_PERF_SEL2(~0u)},
- {18878, S_036404_PERF_SEL3(~0u)},
- /* 1163 */
- {18930, S_036500_PERF_SEL(~0u)},
- {18939, S_036500_PERF_SEL1(~0u)},
- {18949, S_036500_CNTR_MODE(~0u)},
- /* 1166 */
- {18868, S_036504_PERF_SEL2(~0u)},
- {18878, S_036504_PERF_SEL3(~0u)},
- /* 1168 */
- {18930, S_036600_PERF_SEL(~0u)},
- {18939, S_036600_PERF_SEL1(~0u)},
- {18949, S_036600_CNTR_MODE(~0u)},
- {19700, S_036600_PERF_MODE1(~0u)},
- {19690, S_036600_PERF_MODE(~0u)},
- /* 1173 */
- {18868, S_036610_PERF_SEL2(~0u)},
- {18878, S_036610_PERF_SEL3(~0u)},
- {19711, S_036610_PERF_MODE3(~0u)},
- {19722, S_036610_PERF_MODE2(~0u)},
- /* 1177 */
- {19755, S_036628_BIN0_MIN(~0u)},
- {19764, S_036628_BIN0_MAX(~0u)},
- {19773, S_036628_BIN1_MIN(~0u)},
- {19782, S_036628_BIN1_MAX(~0u)},
- {19791, S_036628_BIN2_MIN(~0u)},
- {19800, S_036628_BIN2_MAX(~0u)},
- {19809, S_036628_BIN3_MIN(~0u)},
- {19818, S_036628_BIN3_MAX(~0u)},
- /* 1185 */
- {18930, S_036700_PERF_SEL(~0u)},
- {19827, S_036700_SQC_BANK_MASK(~0u)},
- {19841, S_036700_SQC_CLIENT_MASK(~0u)},
- {18979, S_036700_SPM_MODE(~0u)},
- {19857, S_036700_SIMD_MASK(~0u)},
- {19690, S_036700_PERF_MODE(~0u)},
- /* 1191 */
- {19867, S_036780_PS_EN(~0u)},
- {19873, S_036780_VS_EN(~0u)},
- {19879, S_036780_GS_EN(~0u)},
- {19885, S_036780_ES_EN(~0u)},
- {19891, S_036780_HS_EN(~0u)},
- {19897, S_036780_LS_EN(~0u)},
- {19903, S_036780_CS_EN(~0u)},
- {19909, S_036780_CNTR_RATE(~0u)},
- {19919, S_036780_DISABLE_FLUSH(~0u)},
- /* 1200 */
- {19933, S_036784_SH0_MASK(~0u)},
- {19942, S_036784_SH1_MASK(~0u)},
- /* 1202 */
- {19951, S_036788_FORCE_EN(~0u)},
- /* 1203 */
- {19960, S_036900_PERFCOUNTER_SELECT(~0u)},
- {19979, S_036900_PERFCOUNTER_SELECT1(~0u)},
- {18949, S_036900_CNTR_MODE(~0u)},
- /* 1206 */
- {19999, S_036910_PERFCOUNTER_SELECT2(~0u)},
- {20019, S_036910_PERFCOUNTER_SELECT3(~0u)},
- /* 1208 */
- {19960, S_036A00_PERFCOUNTER_SELECT(~0u)},
- {19979, S_036A00_PERFCOUNTER_SELECT1(~0u)},
- {18949, S_036A00_CNTR_MODE(~0u)},
- /* 1211 */
- {19999, S_036A10_PERFCOUNTER_SELECT2(~0u)},
- {20019, S_036A10_PERFCOUNTER_SELECT3(~0u)},
- /* 1213 */
- {18930, S_036B00_PERF_SEL(~0u)},
- {18939, S_036B00_PERF_SEL1(~0u)},
- {18949, S_036B00_CNTR_MODE(~0u)},
- {19700, S_036B00_PERF_MODE1(~0u)},
- {19690, S_036B00_PERF_MODE(~0u)},
- /* 1218 */
- {18868, S_036B04_PERF_SEL2(~0u)},
- {18878, S_036B04_PERF_SEL3(~0u)},
- {19711, S_036B04_PERF_MODE3(~0u)},
- {19722, S_036B04_PERF_MODE2(~0u)},
- /* 1222 */
- {18930, S_036C00_PERF_SEL(~0u)},
- {18939, S_036C00_PERF_SEL1(~0u)},
- {18949, S_036C00_CNTR_MODE(~0u)},
- {19700, S_036C00_PERF_MODE1(~0u)},
- {19690, S_036C00_PERF_MODE(~0u)},
- /* 1227 */
- {18868, S_036C04_PERF_SEL2(~0u)},
- {18878, S_036C04_PERF_SEL3(~0u)},
- {19711, S_036C04_PERF_MODE3(~0u)},
- {19722, S_036C04_PERF_MODE2(~0u)},
- /* 1231 */
- {18930, S_036D00_PERF_SEL(~0u)},
- {18939, S_036D00_PERF_SEL1(~0u)},
- {18949, S_036D00_CNTR_MODE(~0u)},
- {19700, S_036D00_PERF_MODE1(~0u)},
- {19690, S_036D00_PERF_MODE(~0u)},
- /* 1236 */
- {18868, S_036D04_PERF_SEL2(~0u)},
- {18878, S_036D04_PERF_SEL3(~0u)},
- {19711, S_036D04_PERF_MODE3(~0u)},
- {19722, S_036D04_PERF_MODE2(~0u)},
- /* 1240 */
- {18930, S_036E00_PERF_SEL(~0u)},
- {18939, S_036E00_PERF_SEL1(~0u)},
- {18949, S_036E00_CNTR_MODE(~0u)},
- {19700, S_036E00_PERF_MODE1(~0u)},
- {19690, S_036E00_PERF_MODE(~0u)},
- /* 1245 */
- {18868, S_036E04_PERF_SEL2(~0u)},
- {18878, S_036E04_PERF_SEL3(~0u)},
- {19722, S_036E04_PERF_MODE2(~0u)},
- {19711, S_036E04_PERF_MODE3(~0u)},
- /* 1249 */
- {18930, S_036E40_PERF_SEL(~0u)},
- {18939, S_036E40_PERF_SEL1(~0u)},
- {18949, S_036E40_CNTR_MODE(~0u)},
- {19700, S_036E40_PERF_MODE1(~0u)},
- {19690, S_036E40_PERF_MODE(~0u)},
- /* 1254 */
- {18868, S_036E44_PERF_SEL2(~0u)},
- {18878, S_036E44_PERF_SEL3(~0u)},
- {19722, S_036E44_PERF_MODE2(~0u)},
- {19711, S_036E44_PERF_MODE3(~0u)},
+ {47300, S_037004_PERF_SEL(~0u)},
+ {47309, S_037004_PERF_SEL1(~0u)},
+ {47319, S_037004_CNTR_MODE(~0u)},
+ {47957, S_037004_PERF_MODE1(~0u)},
+ {47947, S_037004_PERF_MODE(~0u)},
+ /* 1142 */
+ {47329, S_037008_PERF_SEL2(~0u)},
+ {47339, S_037008_PERF_SEL3(~0u)},
+ {47968, S_037008_PERF_MODE3(~0u)},
+ {47979, S_037008_PERF_MODE2(~0u)},
+ /* 1146 */
+ {47300, S_037100_PERF_SEL(~0u)},
+ {47309, S_037100_PERF_SEL1(~0u)},
+ {47319, S_037100_CNTR_MODE(~0u)},
+ {47957, S_037100_PERF_MODE1(~0u)},
+ {47947, S_037100_PERF_MODE(~0u)},
+ /* 1151 */
+ {47329, S_037104_PERF_SEL2(~0u)},
+ {47339, S_037104_PERF_SEL3(~0u)},
+ {47968, S_037104_PERF_MODE3(~0u)},
+ {47979, S_037104_PERF_MODE2(~0u)},
+ /* 1155 */
+ {48522, S_028000_DEPTH_CLEAR_ENABLE(~0u)},
+ {48541, S_028000_STENCIL_CLEAR_ENABLE(~0u)},
+ {48562, S_028000_DEPTH_COPY(~0u)},
+ {48573, S_028000_STENCIL_COPY(~0u)},
+ {48586, S_028000_RESUMMARIZE_ENABLE(~0u)},
+ {48605, S_028000_STENCIL_COMPRESS_DISABLE(~0u)},
+ {48630, S_028000_DEPTH_COMPRESS_DISABLE(~0u)},
+ {48653, S_028000_COPY_CENTROID(~0u)},
+ {48667, S_028000_COPY_SAMPLE(~0u)},
+ {48679, S_028000_DECOMPRESS_ENABLE(~0u)},
+ /* 1165 */
+ {48697, S_028004_ZPASS_INCREMENT_DISABLE(~0u)},
+ {48721, S_028004_PERFECT_ZPASS_COUNTS(~0u)},
+ {48742, S_028004_SAMPLE_RATE(~0u)},
+ {48754, S_028004_ZPASS_ENABLE(~0u)},
+ {48767, S_028004_ZFAIL_ENABLE(~0u)},
+ {48780, S_028004_SFAIL_ENABLE(~0u)},
+ {48793, S_028004_DBFAIL_ENABLE(~0u)},
+ {48807, S_028004_SLICE_EVEN_ENABLE(~0u)},
+ {48825, S_028004_SLICE_ODD_ENABLE(~0u)},
+ /* 1174 */
+ {48842, S_028008_SLICE_START(~0u)},
+ {48854, S_028008_SLICE_MAX(~0u)},
+ {48864, S_028008_Z_READ_ONLY(~0u)},
+ {48876, S_028008_STENCIL_READ_ONLY(~0u)},
+ /* 1178 */
+ {48946, S_02800C_FORCE_HIZ_ENABLE(~0u), 4, 670},
+ {48963, S_02800C_FORCE_HIS_ENABLE0(~0u), 4, 670},
+ {48981, S_02800C_FORCE_HIS_ENABLE1(~0u), 4, 670},
+ {48999, S_02800C_FORCE_SHADER_Z_ORDER(~0u)},
+ {49020, S_02800C_FAST_Z_DISABLE(~0u)},
+ {49035, S_02800C_FAST_STENCIL_DISABLE(~0u)},
+ {49056, S_02800C_NOOP_CULL_DISABLE(~0u)},
+ {49074, S_02800C_FORCE_COLOR_KILL(~0u)},
+ {49091, S_02800C_FORCE_Z_READ(~0u)},
+ {49104, S_02800C_FORCE_STENCIL_READ(~0u)},
+ {49123, S_02800C_FORCE_FULL_Z_RANGE(~0u), 4, 670},
+ {49142, S_02800C_FORCE_QC_SMASK_CONFLICT(~0u)},
+ {49166, S_02800C_DISABLE_VIEWPORT_CLAMP(~0u)},
+ {49189, S_02800C_IGNORE_SC_ZRANGE(~0u)},
+ {49206, S_02800C_DISABLE_FULLY_COVERED(~0u)},
+ {49291, S_02800C_FORCE_Z_LIMIT_SUMM(~0u), 4, 674},
+ {49310, S_02800C_MAX_TILES_IN_DTT(~0u)},
+ {49327, S_02800C_DISABLE_TILE_RATE_TILES(~0u)},
+ {49351, S_02800C_FORCE_Z_DIRTY(~0u)},
+ {49365, S_02800C_FORCE_STENCIL_DIRTY(~0u)},
+ {49385, S_02800C_FORCE_Z_VALID(~0u)},
+ {49399, S_02800C_FORCE_STENCIL_VALID(~0u)},
+ {49419, S_02800C_PRESERVE_COMPRESSION(~0u)},
+ /* 1201 */
+ {49493, S_028010_PARTIAL_SQUAD_LAUNCH_CONTROL(~0u), 4, 678},
+ {49522, S_028010_PARTIAL_SQUAD_LAUNCH_COUNTDOWN(~0u)},
+ {49553, S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(~0u)},
+ {49589, S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(~0u)},
+ {49624, S_028010_DISABLE_COLOR_ON_VALIDATION(~0u)},
+ {49652, S_028010_DECOMPRESS_Z_ON_FLUSH(~0u)},
+ {49674, S_028010_DISABLE_REG_SNOOP(~0u)},
+ {49692, S_028010_DEPTH_BOUNDS_HIER_DEPTH_DISABLE(~0u)},
+ {49724, S_028010_SEPARATE_HIZS_FUNC_ENABLE(~0u)},
+ {49750, S_028010_HIZ_ZFUNC(~0u)},
+ {49760, S_028010_HIS_SFUNC_FF(~0u)},
+ {49773, S_028010_HIS_SFUNC_BF(~0u)},
+ {49786, S_028010_PRESERVE_ZRANGE(~0u)},
+ {49802, S_028010_PRESERVE_SRESULTS(~0u)},
+ {49820, S_028010_DISABLE_FAST_PASS(~0u)},
+ /* 1216 */
+ {15389, S_028028_CLEAR(~0u)},
+ /* 1217 */
+ {49838, S_028030_TL_X(~0u)},
+ {49843, S_028030_TL_Y(~0u)},
+ /* 1219 */
+ {49848, S_028034_BR_X(~0u)},
+ {49853, S_028034_BR_Y(~0u)},
+ /* 1221 */
+ {49858, S_02803C_ADDR5_SWIZZLE_MASK(~0u)},
+ {45344, S_02803C_ARRAY_MODE(~0u), 7, 682},
+ {45706, S_02803C_PIPE_CONFIG(~0u), 18, 689},
+ {45999, S_02803C_BANK_WIDTH(~0u), 4, 406},
+ {46106, S_02803C_BANK_HEIGHT(~0u), 4, 410},
+ {46218, S_02803C_MACRO_TILE_ASPECT(~0u), 4, 414},
+ {46305, S_02803C_NUM_BANKS(~0u), 4, 418},
+ /* 1228 */
+ {21132, S_028040_FORMAT(~0u), 4, 707},
+ {50010, S_028040_NUM_SAMPLES(~0u)},
+ {45896, S_028040_TILE_SPLIT(~0u), 7, 399},
+ {50022, S_028040_TILE_MODE_INDEX(~0u)},
+ {50038, S_028040_DECOMPRESS_ON_N_ZPLANES(~0u)},
+ {50062, S_028040_ALLOW_EXPCLEAR(~0u)},
+ {50077, S_028040_READ_SIZE(~0u)},
+ {50087, S_028040_TILE_SURFACE_ENABLE(~0u)},
+ {50107, S_028040_CLEAR_DISALLOWED(~0u)},
+ {50124, S_028040_ZRANGE_PRECISION(~0u)},
+ /* 1238 */
+ {21132, S_028044_FORMAT(~0u), 2, 711},
+ {45896, S_028044_TILE_SPLIT(~0u), 7, 399},
+ {50022, S_028044_TILE_MODE_INDEX(~0u)},
+ {50062, S_028044_ALLOW_EXPCLEAR(~0u)},
+ {50167, S_028044_TILE_STENCIL_DISABLE(~0u)},
+ {50107, S_028044_CLEAR_DISALLOWED(~0u)},
+ /* 1244 */
+ {50188, S_028058_PITCH_TILE_MAX(~0u)},
+ {50203, S_028058_HEIGHT_TILE_MAX(~0u)},
+ /* 1246 */
+ {50219, S_02805C_SLICE_TILE_MAX(~0u)},
+ /* 1247 */
+ {39305, S_028084_ADDRESS(~0u)},
+ /* 1248 */
+ {50234, S_028200_WINDOW_X_OFFSET(~0u)},
+ {50250, S_028200_WINDOW_Y_OFFSET(~0u)},
+ /* 1250 */
+ {49838, S_028204_TL_X(~0u)},
+ {49843, S_028204_TL_Y(~0u)},
+ {50266, S_028204_WINDOW_OFFSET_DISABLE(~0u)},
+ /* 1253 */
+ {49848, S_028208_BR_X(~0u)},
+ {49853, S_028208_BR_Y(~0u)},
+ /* 1255 */
+ {50288, S_02820C_CLIP_RULE(~0u)},
+ /* 1256 */
+ {49838, S_028210_TL_X(~0u)},
+ {49843, S_028210_TL_Y(~0u)},
/* 1258 */
- {20039, S_037000_OP_FILTER_ENABLE(~0u)},
- {20056, S_037000_OP_FILTER_SEL(~0u)},
- {20070, S_037000_FORMAT_FILTER_ENABLE(~0u)},
- {20091, S_037000_FORMAT_FILTER_SEL(~0u)},
- {20109, S_037000_CLEAR_FILTER_ENABLE(~0u)},
- {20129, S_037000_CLEAR_FILTER_SEL(~0u)},
- {20146, S_037000_MRT_FILTER_ENABLE(~0u)},
- {20164, S_037000_MRT_FILTER_SEL(~0u)},
- {20179, S_037000_NUM_SAMPLES_FILTER_ENABLE(~0u)},
- {20205, S_037000_NUM_SAMPLES_FILTER_SEL(~0u)},
- {20228, S_037000_NUM_FRAGMENTS_FILTER_ENABLE(~0u)},
- {20256, S_037000_NUM_FRAGMENTS_FILTER_SEL(~0u)},
- /* 1270 */
- {18930, S_037004_PERF_SEL(~0u)},
- {18939, S_037004_PERF_SEL1(~0u)},
- {18949, S_037004_CNTR_MODE(~0u)},
- {19700, S_037004_PERF_MODE1(~0u)},
- {19690, S_037004_PERF_MODE(~0u)},
- /* 1275 */
- {18868, S_037008_PERF_SEL2(~0u)},
- {18878, S_037008_PERF_SEL3(~0u)},
- {19711, S_037008_PERF_MODE3(~0u)},
- {19722, S_037008_PERF_MODE2(~0u)},
- /* 1279 */
- {18930, S_037100_PERF_SEL(~0u)},
- {18939, S_037100_PERF_SEL1(~0u)},
- {18949, S_037100_CNTR_MODE(~0u)},
- {19700, S_037100_PERF_MODE1(~0u)},
- {19690, S_037100_PERF_MODE(~0u)},
- /* 1284 */
- {18868, S_037104_PERF_SEL2(~0u)},
- {18878, S_037104_PERF_SEL3(~0u)},
- {19711, S_037104_PERF_MODE3(~0u)},
- {19722, S_037104_PERF_MODE2(~0u)},
+ {49848, S_028214_BR_X(~0u)},
+ {49853, S_028214_BR_Y(~0u)},
+ /* 1260 */
+ {50298, S_028230_ER_TRI(~0u)},
+ {43433, S_028230_ER_POINT(~0u)},
+ {50305, S_028230_ER_RECT(~0u)},
+ {50313, S_028230_ER_LINE_LR(~0u)},
+ {50324, S_028230_ER_LINE_RL(~0u)},
+ {50335, S_028230_ER_LINE_TB(~0u)},
+ {50346, S_028230_ER_LINE_BT(~0u)},
+ /* 1267 */
+ {50357, S_028234_HW_SCREEN_OFFSET_X(~0u)},
+ {50376, S_028234_HW_SCREEN_OFFSET_Y(~0u)},
+ /* 1269 */
+ {50395, S_028238_TARGET0_ENABLE(~0u)},
+ {50410, S_028238_TARGET1_ENABLE(~0u)},
+ {50425, S_028238_TARGET2_ENABLE(~0u)},
+ {50440, S_028238_TARGET3_ENABLE(~0u)},
+ {50455, S_028238_TARGET4_ENABLE(~0u)},
+ {50470, S_028238_TARGET5_ENABLE(~0u)},
+ {50485, S_028238_TARGET6_ENABLE(~0u)},
+ {50500, S_028238_TARGET7_ENABLE(~0u)},
+ /* 1277 */
+ {50515, S_02823C_OUTPUT0_ENABLE(~0u)},
+ {50530, S_02823C_OUTPUT1_ENABLE(~0u)},
+ {50545, S_02823C_OUTPUT2_ENABLE(~0u)},
+ {50560, S_02823C_OUTPUT3_ENABLE(~0u)},
+ {50575, S_02823C_OUTPUT4_ENABLE(~0u)},
+ {50590, S_02823C_OUTPUT5_ENABLE(~0u)},
+ {50605, S_02823C_OUTPUT6_ENABLE(~0u)},
+ {50620, S_02823C_OUTPUT7_ENABLE(~0u)},
+ /* 1285 */
+ {49838, S_028240_TL_X(~0u)},
+ {49843, S_028240_TL_Y(~0u)},
+ {50266, S_028240_WINDOW_OFFSET_DISABLE(~0u)},
/* 1288 */
- {20281, S_028000_DEPTH_CLEAR_ENABLE(~0u)},
- {20300, S_028000_STENCIL_CLEAR_ENABLE(~0u)},
- {20321, S_028000_DEPTH_COPY(~0u)},
- {20332, S_028000_STENCIL_COPY(~0u)},
- {20345, S_028000_RESUMMARIZE_ENABLE(~0u)},
- {20364, S_028000_STENCIL_COMPRESS_DISABLE(~0u)},
- {20389, S_028000_DEPTH_COMPRESS_DISABLE(~0u)},
- {20412, S_028000_COPY_CENTROID(~0u)},
- {20426, S_028000_COPY_SAMPLE(~0u)},
- {20438, S_028000_DECOMPRESS_ENABLE(~0u)},
- /* 1298 */
- {20456, S_028004_ZPASS_INCREMENT_DISABLE(~0u)},
- {20480, S_028004_PERFECT_ZPASS_COUNTS(~0u)},
- {20501, S_028004_SAMPLE_RATE(~0u)},
- {20513, S_028004_ZPASS_ENABLE(~0u)},
- {20526, S_028004_ZFAIL_ENABLE(~0u)},
- {20539, S_028004_SFAIL_ENABLE(~0u)},
- {20552, S_028004_DBFAIL_ENABLE(~0u)},
- {20566, S_028004_SLICE_EVEN_ENABLE(~0u)},
- {20584, S_028004_SLICE_ODD_ENABLE(~0u)},
- /* 1307 */
- {20601, S_028008_SLICE_START(~0u)},
- {20613, S_028008_SLICE_MAX(~0u)},
- {20623, S_028008_Z_READ_ONLY(~0u)},
- {20635, S_028008_STENCIL_READ_ONLY(~0u)},
- {20653, S_028008_MIPID(~0u)},
- /* 1312 */
- {20711, S_02800C_FORCE_HIZ_ENABLE(~0u), 4, 788},
- {20728, S_02800C_FORCE_HIS_ENABLE0(~0u), 4, 788},
- {20746, S_02800C_FORCE_HIS_ENABLE1(~0u), 4, 788},
- {20764, S_02800C_FORCE_SHADER_Z_ORDER(~0u)},
- {20785, S_02800C_FAST_Z_DISABLE(~0u)},
- {20800, S_02800C_FAST_STENCIL_DISABLE(~0u)},
- {20821, S_02800C_NOOP_CULL_DISABLE(~0u)},
- {20839, S_02800C_FORCE_COLOR_KILL(~0u)},
- {20856, S_02800C_FORCE_Z_READ(~0u)},
- {20869, S_02800C_FORCE_STENCIL_READ(~0u)},
- {20888, S_02800C_FORCE_FULL_Z_RANGE(~0u), 4, 788},
- {20907, S_02800C_FORCE_QC_SMASK_CONFLICT(~0u)},
- {20931, S_02800C_DISABLE_VIEWPORT_CLAMP(~0u)},
- {20954, S_02800C_IGNORE_SC_ZRANGE(~0u)},
- {20971, S_02800C_DISABLE_FULLY_COVERED(~0u)},
- {21056, S_02800C_FORCE_Z_LIMIT_SUMM(~0u), 4, 792},
- {21075, S_02800C_MAX_TILES_IN_DTT(~0u)},
- {21092, S_02800C_DISABLE_TILE_RATE_TILES(~0u)},
- {21116, S_02800C_FORCE_Z_DIRTY(~0u)},
- {21130, S_02800C_FORCE_STENCIL_DIRTY(~0u)},
- {21150, S_02800C_FORCE_Z_VALID(~0u)},
- {21164, S_02800C_FORCE_STENCIL_VALID(~0u)},
- {21184, S_02800C_PRESERVE_COMPRESSION(~0u)},
- /* 1335 */
- {21258, S_028010_PARTIAL_SQUAD_LAUNCH_CONTROL(~0u), 4, 796},
- {21287, S_028010_PARTIAL_SQUAD_LAUNCH_COUNTDOWN(~0u)},
- {21318, S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(~0u)},
- {21354, S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(~0u)},
- {21389, S_028010_DISABLE_COLOR_ON_VALIDATION(~0u)},
- {21417, S_028010_DECOMPRESS_Z_ON_FLUSH(~0u)},
- {21439, S_028010_DISABLE_REG_SNOOP(~0u)},
- {21457, S_028010_DEPTH_BOUNDS_HIER_DEPTH_DISABLE(~0u)},
- {21489, S_028010_SEPARATE_HIZS_FUNC_ENABLE(~0u)},
- {21515, S_028010_HIZ_ZFUNC(~0u)},
- {21525, S_028010_HIS_SFUNC_FF(~0u)},
- {21538, S_028010_HIS_SFUNC_BF(~0u)},
- {21551, S_028010_PRESERVE_ZRANGE(~0u)},
- {21567, S_028010_PRESERVE_SRESULTS(~0u)},
- {21585, S_028010_DISABLE_FAST_PASS(~0u)},
- {21603, S_028010_ALLOW_PARTIAL_RES_HIER_KILL(~0u)},
- /* 1351 */
- {21631, S_028028_CLEAR(~0u)},
- /* 1352 */
- {21637, S_028030_TL_X(~0u)},
- {21642, S_028030_TL_Y(~0u)},
- /* 1354 */
- {21647, S_028034_BR_X(~0u)},
- {21652, S_028034_BR_Y(~0u)},
- /* 1356 */
- {21657, S_02803C_ADDR5_SWIZZLE_MASK(~0u)},
- {16712, S_02803C_ARRAY_MODE(~0u), 7, 800},
- {17074, S_02803C_PIPE_CONFIG(~0u), 18, 807},
- {17367, S_02803C_BANK_WIDTH(~0u), 4, 524},
- {17474, S_02803C_BANK_HEIGHT(~0u), 4, 528},
- {17586, S_02803C_MACRO_TILE_ASPECT(~0u), 4, 532},
- {16278, S_02803C_NUM_BANKS(~0u), 4, 536},
- /* 1363 */
- {9248, S_028040_FORMAT(~0u), 4, 825},
- {21809, S_028040_NUM_SAMPLES(~0u)},
- {17264, S_028040_TILE_SPLIT(~0u), 7, 517},
- {21821, S_028040_TILE_MODE_INDEX(~0u)},
- {21837, S_028040_DECOMPRESS_ON_N_ZPLANES(~0u)},
- {21861, S_028040_ALLOW_EXPCLEAR(~0u)},
- {21876, S_028040_READ_SIZE(~0u)},
- {21886, S_028040_TILE_SURFACE_ENABLE(~0u)},
- {21906, S_028040_CLEAR_DISALLOWED(~0u)},
- {21923, S_028040_ZRANGE_PRECISION(~0u)},
- /* 1373 */
- {9248, S_028044_FORMAT(~0u), 2, 829},
- {17264, S_028044_TILE_SPLIT(~0u), 7, 517},
- {21821, S_028044_TILE_MODE_INDEX(~0u)},
- {21861, S_028044_ALLOW_EXPCLEAR(~0u)},
- {21966, S_028044_TILE_STENCIL_DISABLE(~0u)},
- {21906, S_028044_CLEAR_DISALLOWED(~0u)},
- /* 1379 */
- {21987, S_028058_PITCH_TILE_MAX(~0u)},
- {22002, S_028058_HEIGHT_TILE_MAX(~0u)},
- /* 1381 */
- {22018, S_02805C_SLICE_TILE_MAX(~0u)},
- /* 1382 */
- {8871, S_028084_ADDRESS(~0u)},
- /* 1383 */
- {22033, S_0281E8_DEST_BASE_HI_256B(~0u)},
- /* 1384 */
- {22033, S_0281EC_DEST_BASE_HI_256B(~0u)},
- /* 1385 */
- {22033, S_0281F0_DEST_BASE_HI_256B(~0u)},
- /* 1386 */
- {22033, S_0281F4_DEST_BASE_HI_256B(~0u)},
+ {49848, S_028244_BR_X(~0u)},
+ {49853, S_028244_BR_Y(~0u)},
+ /* 1290 */
+ {49838, S_028250_TL_X(~0u)},
+ {49843, S_028250_TL_Y(~0u)},
+ {50266, S_028250_WINDOW_OFFSET_DISABLE(~0u)},
+ /* 1293 */
+ {49848, S_028254_BR_X(~0u)},
+ {49853, S_028254_BR_Y(~0u)},
+ /* 1295 */
+ {50727, S_028350_RB_MAP_PKR0(~0u), 4, 713},
+ {50739, S_028350_RB_MAP_PKR1(~0u), 4, 713},
+ {50851, S_028350_RB_XSEL2(~0u), 4, 717},
+ {50860, S_028350_RB_XSEL(~0u)},
+ {50868, S_028350_RB_YSEL(~0u)},
+ {50972, S_028350_PKR_MAP(~0u), 4, 721},
+ {51080, S_028350_PKR_XSEL(~0u), 4, 725},
+ {51189, S_028350_PKR_YSEL(~0u), 4, 729},
+ {51302, S_028350_PKR_XSEL2(~0u), 4, 733},
+ {51404, S_028350_SC_MAP(~0u), 4, 737},
+ {51550, S_028350_SC_XSEL(~0u), 4, 741},
+ {51697, S_028350_SC_YSEL(~0u), 4, 745},
+ {51797, S_028350_SE_MAP(~0u), 4, 749},
+ {51943, S_028350_SE_XSEL_GFX6(~0u), 4, 753},
+ {52095, S_028350_SE_YSEL_GFX6(~0u), 4, 757},
+ /* 1310 */
+ {52220, S_028354_SE_PAIR_MAP(~0u), 4, 761},
+ {52391, S_028354_SE_PAIR_XSEL_GFX6(~0u), 4, 765},
+ {52568, S_028354_SE_PAIR_YSEL_GFX6(~0u), 4, 769},
+ /* 1313 */
+ {48807, S_028358_SLICE_EVEN_ENABLE(~0u)},
+ {48825, S_028358_SLICE_ODD_ENABLE(~0u)},
+ /* 1315 */
+ {52586, S_028424_OVERWRITE_COMBINER_DISABLE(~0u)},
+ {52613, S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(~0u)},
+ {52652, S_028424_OVERWRITE_COMBINER_WATERMARK(~0u)},
+ /* 1318 */
+ {52918, S_02842C_STENCILFAIL(~0u), 16, 773},
+ {52930, S_02842C_STENCILZPASS(~0u), 16, 773},
+ {52943, S_02842C_STENCILZFAIL(~0u), 16, 773},
+ {52956, S_02842C_STENCILFAIL_BF(~0u), 16, 773},
+ {52971, S_02842C_STENCILZPASS_BF(~0u), 16, 773},
+ {52987, S_02842C_STENCILZFAIL_BF(~0u), 16, 773},
+ /* 1324 */
+ {53003, S_028430_STENCILTESTVAL(~0u)},
+ {53018, S_028430_STENCILMASK(~0u)},
+ {53030, S_028430_STENCILWRITEMASK(~0u)},
+ {53047, S_028430_STENCILOPVAL(~0u)},
+ /* 1328 */
+ {53060, S_028434_STENCILTESTVAL_BF(~0u)},
+ {53078, S_028434_STENCILMASK_BF(~0u)},
+ {53093, S_028434_STENCILWRITEMASK_BF(~0u)},
+ {53113, S_028434_STENCILOPVAL_BF(~0u)},
+ /* 1332 */
+ {669, S_028644_OFFSET(~0u)},
+ {53136, S_028644_DEFAULT_VAL(~0u), 1, 789},
+ {53148, S_028644_FLAT_SHADE(~0u)},
+ {53159, S_028644_CYL_WRAP(~0u)},
+ {53168, S_028644_PT_SPRITE_TEX(~0u)},
+ {53182, S_028644_DUP(~0u)},
+ {53186, S_028644_FP16_INTERP_MODE(~0u)},
+ {53203, S_028644_USE_DEFAULT_ATTR1(~0u)},
+ {53221, S_028644_DEFAULT_VAL_ATTR1(~0u)},
+ {53239, S_028644_PT_SPRITE_TEX_ATTR1(~0u)},
+ {53259, S_028644_ATTR0_VALID(~0u)},
+ {53271, S_028644_ATTR1_VALID(~0u)},
+ /* 1344 */
+ {53283, S_0286C4_VS_EXPORT_COUNT(~0u)},
+ {53299, S_0286C4_VS_HALF_PACK(~0u)},
+ {53312, S_0286C4_VS_EXPORTS_FOG(~0u)},
+ {53327, S_0286C4_VS_OUT_FOG_VEC_ADDR(~0u)},
+ /* 1348 */
+ {53347, S_0286CC_PERSP_SAMPLE_ENA(~0u)},
+ {53364, S_0286CC_PERSP_CENTER_ENA(~0u)},
+ {53381, S_0286CC_PERSP_CENTROID_ENA(~0u)},
+ {53400, S_0286CC_PERSP_PULL_MODEL_ENA(~0u)},
+ {53421, S_0286CC_LINEAR_SAMPLE_ENA(~0u)},
+ {53439, S_0286CC_LINEAR_CENTER_ENA(~0u)},
+ {53457, S_0286CC_LINEAR_CENTROID_ENA(~0u)},
+ {53477, S_0286CC_LINE_STIPPLE_TEX_ENA(~0u)},
+ {53498, S_0286CC_POS_X_FLOAT_ENA(~0u)},
+ {53514, S_0286CC_POS_Y_FLOAT_ENA(~0u)},
+ {53530, S_0286CC_POS_Z_FLOAT_ENA(~0u)},
+ {53546, S_0286CC_POS_W_FLOAT_ENA(~0u)},
+ {53562, S_0286CC_FRONT_FACE_ENA(~0u)},
+ {53577, S_0286CC_ANCILLARY_ENA(~0u)},
+ {53591, S_0286CC_SAMPLE_COVERAGE_ENA(~0u)},
+ {53611, S_0286CC_POS_FIXED_PT_ENA(~0u)},
+ /* 1364 */
+ {53347, S_0286D0_PERSP_SAMPLE_ENA(~0u)},
+ {53364, S_0286D0_PERSP_CENTER_ENA(~0u)},
+ {53381, S_0286D0_PERSP_CENTROID_ENA(~0u)},
+ {53400, S_0286D0_PERSP_PULL_MODEL_ENA(~0u)},
+ {53421, S_0286D0_LINEAR_SAMPLE_ENA(~0u)},
+ {53439, S_0286D0_LINEAR_CENTER_ENA(~0u)},
+ {53457, S_0286D0_LINEAR_CENTROID_ENA(~0u)},
+ {53477, S_0286D0_LINE_STIPPLE_TEX_ENA(~0u)},
+ {53498, S_0286D0_POS_X_FLOAT_ENA(~0u)},
+ {53514, S_0286D0_POS_Y_FLOAT_ENA(~0u)},
+ {53530, S_0286D0_POS_Z_FLOAT_ENA(~0u)},
+ {53546, S_0286D0_POS_W_FLOAT_ENA(~0u)},
+ {53562, S_0286D0_FRONT_FACE_ENA(~0u)},
+ {53577, S_0286D0_ANCILLARY_ENA(~0u)},
+ {53591, S_0286D0_SAMPLE_COVERAGE_ENA(~0u)},
+ {53611, S_0286D0_POS_FIXED_PT_ENA(~0u)},
+ /* 1380 */
+ {53628, S_0286D4_FLAT_SHADE_ENA(~0u)},
+ {53643, S_0286D4_PNT_SPRITE_ENA(~0u)},
+ {53766, S_0286D4_PNT_SPRITE_OVRD_X(~0u), 5, 790},
+ {53784, S_0286D4_PNT_SPRITE_OVRD_Y(~0u), 5, 790},
+ {53802, S_0286D4_PNT_SPRITE_OVRD_Z(~0u), 5, 790},
+ {53820, S_0286D4_PNT_SPRITE_OVRD_W(~0u), 5, 790},
+ {53838, S_0286D4_PNT_SPRITE_TOP_1(~0u)},
/* 1387 */
- {22051, S_028200_WINDOW_X_OFFSET(~0u)},
- {22067, S_028200_WINDOW_Y_OFFSET(~0u)},
- /* 1389 */
- {21637, S_028204_TL_X(~0u)},
- {21642, S_028204_TL_Y(~0u)},
- {22083, S_028204_WINDOW_OFFSET_DISABLE(~0u)},
+ {53855, S_0286D8_NUM_INTERP(~0u)},
+ {53866, S_0286D8_PARAM_GEN(~0u)},
+ {53876, S_0286D8_FOG_ADDR(~0u)},
+ {53885, S_0286D8_BC_OPTIMIZE_DISABLE(~0u)},
+ {53905, S_0286D8_PASS_FOG_THROUGH_PS(~0u)},
/* 1392 */
- {21647, S_028208_BR_X(~0u)},
- {21652, S_028208_BR_Y(~0u)},
- /* 1394 */
- {22105, S_02820C_CLIP_RULE(~0u)},
- /* 1395 */
- {21637, S_028210_TL_X(~0u)},
- {21642, S_028210_TL_Y(~0u)},
- /* 1397 */
- {21647, S_028214_BR_X(~0u)},
- {21652, S_028214_BR_Y(~0u)},
+ {53925, S_0286E0_PERSP_CENTER_CNTL(~0u)},
+ {53943, S_0286E0_PERSP_CENTROID_CNTL(~0u)},
+ {53963, S_0286E0_LINEAR_CENTER_CNTL(~0u)},
+ {53982, S_0286E0_LINEAR_CENTROID_CNTL(~0u)},
+ {54052, S_0286E0_POS_FLOAT_LOCATION(~0u), 1, 795},
+ {54071, S_0286E0_POS_FLOAT_ULC(~0u)},
+ {54085, S_0286E0_FRONT_FACE_ALL_BITS(~0u)},
/* 1399 */
- {22115, S_028230_ER_TRI(~0u)},
- {14642, S_028230_ER_POINT(~0u)},
- {22122, S_028230_ER_RECT(~0u)},
- {22130, S_028230_ER_LINE_LR(~0u)},
- {22141, S_028230_ER_LINE_RL(~0u)},
- {22152, S_028230_ER_LINE_TB(~0u)},
- {22163, S_028230_ER_LINE_BT(~0u)},
+ {47216, S_0286E8_WAVES(~0u)},
+ {47222, S_0286E8_WAVESIZE(~0u)},
+ /* 1401 */
+ {54105, S_028704_NUM_PS_WAVES(~0u)},
+ {54118, S_028704_NUM_VS_WAVES(~0u)},
+ {54131, S_028704_NUM_GS_WAVES(~0u)},
+ {54144, S_028704_NUM_ES_WAVES(~0u)},
+ {54157, S_028704_NUM_HS_WAVES(~0u)},
/* 1406 */
- {22174, S_028234_HW_SCREEN_OFFSET_X(~0u)},
- {22193, S_028234_HW_SCREEN_OFFSET_Y(~0u)},
- /* 1408 */
- {22212, S_028238_TARGET0_ENABLE(~0u)},
- {22227, S_028238_TARGET1_ENABLE(~0u)},
- {22242, S_028238_TARGET2_ENABLE(~0u)},
- {22257, S_028238_TARGET3_ENABLE(~0u)},
- {22272, S_028238_TARGET4_ENABLE(~0u)},
- {22287, S_028238_TARGET5_ENABLE(~0u)},
- {22302, S_028238_TARGET6_ENABLE(~0u)},
- {22317, S_028238_TARGET7_ENABLE(~0u)},
- /* 1416 */
- {22332, S_02823C_OUTPUT0_ENABLE(~0u)},
- {22347, S_02823C_OUTPUT1_ENABLE(~0u)},
- {22362, S_02823C_OUTPUT2_ENABLE(~0u)},
- {22377, S_02823C_OUTPUT3_ENABLE(~0u)},
- {22392, S_02823C_OUTPUT4_ENABLE(~0u)},
- {22407, S_02823C_OUTPUT5_ENABLE(~0u)},
- {22422, S_02823C_OUTPUT6_ENABLE(~0u)},
- {22437, S_02823C_OUTPUT7_ENABLE(~0u)},
- /* 1424 */
- {21637, S_028240_TL_X(~0u)},
- {21642, S_028240_TL_Y(~0u)},
- {22083, S_028240_WINDOW_OFFSET_DISABLE(~0u)},
- /* 1427 */
- {21647, S_028244_BR_X(~0u)},
- {21652, S_028244_BR_Y(~0u)},
- /* 1429 */
- {21637, S_028250_TL_X(~0u)},
- {21642, S_028250_TL_Y(~0u)},
- {22083, S_028250_WINDOW_OFFSET_DISABLE(~0u)},
- /* 1432 */
- {21647, S_028254_BR_X(~0u)},
- {21652, S_028254_BR_Y(~0u)},
- /* 1434 */
- {22544, S_028350_RB_MAP_PKR0(~0u), 4, 831},
- {22556, S_028350_RB_MAP_PKR1(~0u), 4, 831},
- {22668, S_028350_RB_XSEL2(~0u), 4, 835},
- {22677, S_028350_RB_XSEL(~0u)},
- {22685, S_028350_RB_YSEL(~0u)},
- {22789, S_028350_PKR_MAP(~0u), 4, 839},
- {22897, S_028350_PKR_XSEL(~0u), 4, 843},
- {23006, S_028350_PKR_YSEL(~0u), 4, 847},
- {23119, S_028350_PKR_XSEL2(~0u), 4, 851},
- {23221, S_028350_SC_MAP(~0u), 4, 855},
- {23367, S_028350_SC_XSEL(~0u), 4, 859},
- {23514, S_028350_SC_YSEL(~0u), 4, 863},
- {23614, S_028350_SE_MAP(~0u), 4, 867},
- {23760, S_028350_SE_XSEL_GFX6(~0u), 4, 871},
- {23912, S_028350_SE_YSEL_GFX6(~0u), 4, 875},
- {23925, S_028350_SE_XSEL_GFX9(~0u)},
- {23938, S_028350_SE_YSEL_GFX9(~0u)},
- /* 1451 */
- {24063, S_028354_SE_PAIR_MAP(~0u), 4, 879},
- {24234, S_028354_SE_PAIR_XSEL_GFX6(~0u), 4, 883},
- {24411, S_028354_SE_PAIR_YSEL_GFX6(~0u), 4, 887},
- {24429, S_028354_SE_PAIR_XSEL_GFX9(~0u)},
- {24447, S_028354_SE_PAIR_YSEL_GFX9(~0u)},
- /* 1456 */
- {20566, S_028358_SLICE_EVEN_ENABLE(~0u)},
- {20584, S_028358_SLICE_ODD_ENABLE(~0u)},
- /* 1458 */
- {24465, S_028424_OVERWRITE_COMBINER_DISABLE(~0u)},
- {24492, S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(~0u)},
- {24531, S_028424_OVERWRITE_COMBINER_WATERMARK(~0u)},
- /* 1461 */
- {24797, S_02842C_STENCILFAIL(~0u), 16, 891},
- {24809, S_02842C_STENCILZPASS(~0u), 16, 891},
- {24822, S_02842C_STENCILZFAIL(~0u), 16, 891},
- {24835, S_02842C_STENCILFAIL_BF(~0u), 16, 891},
- {24850, S_02842C_STENCILZPASS_BF(~0u), 16, 891},
- {24866, S_02842C_STENCILZFAIL_BF(~0u), 16, 891},
- /* 1467 */
- {24882, S_028430_STENCILTESTVAL(~0u)},
- {24897, S_028430_STENCILMASK(~0u)},
- {24909, S_028430_STENCILWRITEMASK(~0u)},
- {24926, S_028430_STENCILOPVAL(~0u)},
+ {54170, S_028708_NUM_LS_WAVES(~0u)},
+ /* 1407 */
+ {54271, S_02870C_POS0_EXPORT_FORMAT(~0u), 5, 796},
+ {54290, S_02870C_POS1_EXPORT_FORMAT(~0u), 5, 796},
+ {54309, S_02870C_POS2_EXPORT_FORMAT(~0u), 5, 796},
+ {54328, S_02870C_POS3_EXPORT_FORMAT(~0u), 5, 796},
+ /* 1411 */
+ {54547, S_028710_Z_EXPORT_FORMAT(~0u), 10, 801},
+ /* 1412 */
+ {54563, S_028714_COL0_EXPORT_FORMAT(~0u), 10, 801},
+ {54582, S_028714_COL1_EXPORT_FORMAT(~0u), 10, 801},
+ {54601, S_028714_COL2_EXPORT_FORMAT(~0u), 10, 801},
+ {54620, S_028714_COL3_EXPORT_FORMAT(~0u), 10, 801},
+ {54639, S_028714_COL4_EXPORT_FORMAT(~0u), 10, 801},
+ {54658, S_028714_COL5_EXPORT_FORMAT(~0u), 10, 801},
+ {54677, S_028714_COL6_EXPORT_FORMAT(~0u), 10, 801},
+ {54696, S_028714_COL7_EXPORT_FORMAT(~0u), 10, 801},
+ /* 1420 */
+ {54950, S_028754_MRT0(~0u), 11, 811},
+ {54955, S_028754_MRT1(~0u)},
+ {54960, S_028754_MRT2(~0u)},
+ {54965, S_028754_MRT3(~0u)},
+ {54970, S_028754_MRT4(~0u)},
+ {54975, S_028754_MRT5(~0u)},
+ {54980, S_028754_MRT6(~0u)},
+ {54985, S_028754_MRT7(~0u)},
+ /* 1428 */
+ {55070, S_028758_MRT0_EPSILON(~0u), 16, 822},
+ {55083, S_028758_MRT1_EPSILON(~0u)},
+ {55096, S_028758_MRT2_EPSILON(~0u)},
+ {55109, S_028758_MRT3_EPSILON(~0u)},
+ {55122, S_028758_MRT4_EPSILON(~0u)},
+ {55135, S_028758_MRT5_EPSILON(~0u)},
+ {55148, S_028758_MRT6_EPSILON(~0u)},
+ {55161, S_028758_MRT7_EPSILON(~0u)},
+ /* 1436 */
+ {55174, S_02875C_MRT0_COLOR_OPT_DISABLE(~0u)},
+ {55197, S_02875C_MRT0_ALPHA_OPT_DISABLE(~0u)},
+ {55220, S_02875C_MRT1_COLOR_OPT_DISABLE(~0u)},
+ {55243, S_02875C_MRT1_ALPHA_OPT_DISABLE(~0u)},
+ {55266, S_02875C_MRT2_COLOR_OPT_DISABLE(~0u)},
+ {55289, S_02875C_MRT2_ALPHA_OPT_DISABLE(~0u)},
+ {55312, S_02875C_MRT3_COLOR_OPT_DISABLE(~0u)},
+ {55335, S_02875C_MRT3_ALPHA_OPT_DISABLE(~0u)},
+ {55358, S_02875C_MRT4_COLOR_OPT_DISABLE(~0u)},
+ {55381, S_02875C_MRT4_ALPHA_OPT_DISABLE(~0u)},
+ {55404, S_02875C_MRT5_COLOR_OPT_DISABLE(~0u)},
+ {55427, S_02875C_MRT5_ALPHA_OPT_DISABLE(~0u)},
+ {55450, S_02875C_MRT6_COLOR_OPT_DISABLE(~0u)},
+ {55473, S_02875C_MRT6_ALPHA_OPT_DISABLE(~0u)},
+ {55496, S_02875C_MRT7_COLOR_OPT_DISABLE(~0u)},
+ {55519, S_02875C_MRT7_ALPHA_OPT_DISABLE(~0u)},
+ {55542, S_02875C_PIXEN_ZERO_OPT_DISABLE(~0u)},
+ /* 1453 */
+ {55833, S_028760_COLOR_SRC_OPT(~0u), 8, 838},
+ {55847, S_028760_COLOR_DST_OPT(~0u)},
+ {55995, S_028760_COLOR_COMB_FCN(~0u), 8, 846},
+ {56010, S_028760_ALPHA_SRC_OPT(~0u)},
+ {56024, S_028760_ALPHA_DST_OPT(~0u)},
+ {56038, S_028760_ALPHA_COMB_FCN(~0u)},
+ /* 1459 */
+ {55833, S_028764_COLOR_SRC_OPT(~0u)},
+ {55847, S_028764_COLOR_DST_OPT(~0u)},
+ {55995, S_028764_COLOR_COMB_FCN(~0u)},
+ {56010, S_028764_ALPHA_SRC_OPT(~0u)},
+ {56024, S_028764_ALPHA_DST_OPT(~0u)},
+ {56038, S_028764_ALPHA_COMB_FCN(~0u)},
+ /* 1465 */
+ {55833, S_028768_COLOR_SRC_OPT(~0u)},
+ {55847, S_028768_COLOR_DST_OPT(~0u)},
+ {55995, S_028768_COLOR_COMB_FCN(~0u)},
+ {56010, S_028768_ALPHA_SRC_OPT(~0u)},
+ {56024, S_028768_ALPHA_DST_OPT(~0u)},
+ {56038, S_028768_ALPHA_COMB_FCN(~0u)},
/* 1471 */
- {24939, S_028434_STENCILTESTVAL_BF(~0u)},
- {24957, S_028434_STENCILMASK_BF(~0u)},
- {24972, S_028434_STENCILWRITEMASK_BF(~0u)},
- {24992, S_028434_STENCILOPVAL_BF(~0u)},
- /* 1475 */
- {669, S_028644_OFFSET(~0u)},
- {25015, S_028644_DEFAULT_VAL(~0u), 1, 907},
- {25027, S_028644_FLAT_SHADE(~0u)},
- {25038, S_028644_CYL_WRAP(~0u)},
- {25047, S_028644_PT_SPRITE_TEX(~0u)},
- {25061, S_028644_DUP(~0u)},
- {25065, S_028644_FP16_INTERP_MODE(~0u)},
- {25082, S_028644_USE_DEFAULT_ATTR1(~0u)},
- {25100, S_028644_DEFAULT_VAL_ATTR1(~0u)},
- {25118, S_028644_PT_SPRITE_TEX_ATTR1(~0u)},
- {25138, S_028644_ATTR0_VALID(~0u)},
- {25150, S_028644_ATTR1_VALID(~0u)},
- /* 1487 */
- {25162, S_0286C4_VS_EXPORT_COUNT(~0u)},
- {25178, S_0286C4_VS_HALF_PACK(~0u)},
- {25191, S_0286C4_VS_EXPORTS_FOG(~0u)},
- {25206, S_0286C4_VS_OUT_FOG_VEC_ADDR(~0u)},
- /* 1491 */
- {25226, S_0286CC_PERSP_SAMPLE_ENA(~0u)},
- {25243, S_0286CC_PERSP_CENTER_ENA(~0u)},
- {25260, S_0286CC_PERSP_CENTROID_ENA(~0u)},
- {25279, S_0286CC_PERSP_PULL_MODEL_ENA(~0u)},
- {25300, S_0286CC_LINEAR_SAMPLE_ENA(~0u)},
- {25318, S_0286CC_LINEAR_CENTER_ENA(~0u)},
- {25336, S_0286CC_LINEAR_CENTROID_ENA(~0u)},
- {25356, S_0286CC_LINE_STIPPLE_TEX_ENA(~0u)},
- {25377, S_0286CC_POS_X_FLOAT_ENA(~0u)},
- {25393, S_0286CC_POS_Y_FLOAT_ENA(~0u)},
- {25409, S_0286CC_POS_Z_FLOAT_ENA(~0u)},
- {25425, S_0286CC_POS_W_FLOAT_ENA(~0u)},
- {25441, S_0286CC_FRONT_FACE_ENA(~0u)},
- {25456, S_0286CC_ANCILLARY_ENA(~0u)},
- {25470, S_0286CC_SAMPLE_COVERAGE_ENA(~0u)},
- {25490, S_0286CC_POS_FIXED_PT_ENA(~0u)},
- /* 1507 */
- {25226, S_0286D0_PERSP_SAMPLE_ENA(~0u)},
- {25243, S_0286D0_PERSP_CENTER_ENA(~0u)},
- {25260, S_0286D0_PERSP_CENTROID_ENA(~0u)},
- {25279, S_0286D0_PERSP_PULL_MODEL_ENA(~0u)},
- {25300, S_0286D0_LINEAR_SAMPLE_ENA(~0u)},
- {25318, S_0286D0_LINEAR_CENTER_ENA(~0u)},
- {25336, S_0286D0_LINEAR_CENTROID_ENA(~0u)},
- {25356, S_0286D0_LINE_STIPPLE_TEX_ENA(~0u)},
- {25377, S_0286D0_POS_X_FLOAT_ENA(~0u)},
- {25393, S_0286D0_POS_Y_FLOAT_ENA(~0u)},
- {25409, S_0286D0_POS_Z_FLOAT_ENA(~0u)},
- {25425, S_0286D0_POS_W_FLOAT_ENA(~0u)},
- {25441, S_0286D0_FRONT_FACE_ENA(~0u)},
- {25456, S_0286D0_ANCILLARY_ENA(~0u)},
- {25470, S_0286D0_SAMPLE_COVERAGE_ENA(~0u)},
- {25490, S_0286D0_POS_FIXED_PT_ENA(~0u)},
- /* 1523 */
- {25507, S_0286D4_FLAT_SHADE_ENA(~0u)},
- {25522, S_0286D4_PNT_SPRITE_ENA(~0u)},
- {25645, S_0286D4_PNT_SPRITE_OVRD_X(~0u), 5, 908},
- {25663, S_0286D4_PNT_SPRITE_OVRD_Y(~0u), 5, 908},
- {25681, S_0286D4_PNT_SPRITE_OVRD_Z(~0u), 5, 908},
- {25699, S_0286D4_PNT_SPRITE_OVRD_W(~0u), 5, 908},
- {25717, S_0286D4_PNT_SPRITE_TOP_1(~0u)},
- /* 1530 */
- {25734, S_0286D8_NUM_INTERP(~0u)},
- {25745, S_0286D8_PARAM_GEN(~0u)},
- {25755, S_0286D8_FOG_ADDR(~0u)},
- {25764, S_0286D8_BC_OPTIMIZE_DISABLE(~0u)},
- {25784, S_0286D8_PASS_FOG_THROUGH_PS(~0u)},
- {25804, S_0286D8_OFFCHIP_PARAM_EN(~0u)},
- {25821, S_0286D8_LATE_PC_DEALLOC(~0u)},
- /* 1537 */
- {25837, S_0286E0_PERSP_CENTER_CNTL(~0u)},
- {25855, S_0286E0_PERSP_CENTROID_CNTL(~0u)},
- {25875, S_0286E0_LINEAR_CENTER_CNTL(~0u)},
- {25894, S_0286E0_LINEAR_CENTROID_CNTL(~0u)},
- {25964, S_0286E0_POS_FLOAT_LOCATION(~0u), 1, 913},
- {25983, S_0286E0_POS_FLOAT_ULC(~0u)},
- {25997, S_0286E0_FRONT_FACE_ALL_BITS(~0u)},
- /* 1544 */
- {18769, S_0286E8_WAVES(~0u)},
- {18775, S_0286E8_WAVESIZE(~0u)},
- /* 1546 */
- {26017, S_028704_NUM_PS_WAVES(~0u)},
- {26030, S_028704_NUM_VS_WAVES(~0u)},
- {26043, S_028704_NUM_GS_WAVES(~0u)},
- {26056, S_028704_NUM_ES_WAVES(~0u)},
- {26069, S_028704_NUM_HS_WAVES(~0u)},
- /* 1551 */
- {26082, S_028708_NUM_LS_WAVES(~0u)},
- /* 1552 */
- {26183, S_02870C_POS0_EXPORT_FORMAT(~0u), 5, 914},
- {26202, S_02870C_POS1_EXPORT_FORMAT(~0u), 5, 914},
- {26221, S_02870C_POS2_EXPORT_FORMAT(~0u), 5, 914},
- {26240, S_02870C_POS3_EXPORT_FORMAT(~0u), 5, 914},
+ {55833, S_02876C_COLOR_SRC_OPT(~0u)},
+ {55847, S_02876C_COLOR_DST_OPT(~0u)},
+ {55995, S_02876C_COLOR_COMB_FCN(~0u)},
+ {56010, S_02876C_ALPHA_SRC_OPT(~0u)},
+ {56024, S_02876C_ALPHA_DST_OPT(~0u)},
+ {56038, S_02876C_ALPHA_COMB_FCN(~0u)},
+ /* 1477 */
+ {55833, S_028770_COLOR_SRC_OPT(~0u)},
+ {55847, S_028770_COLOR_DST_OPT(~0u)},
+ {55995, S_028770_COLOR_COMB_FCN(~0u)},
+ {56010, S_028770_ALPHA_SRC_OPT(~0u)},
+ {56024, S_028770_ALPHA_DST_OPT(~0u)},
+ {56038, S_028770_ALPHA_COMB_FCN(~0u)},
+ /* 1483 */
+ {55833, S_028774_COLOR_SRC_OPT(~0u)},
+ {55847, S_028774_COLOR_DST_OPT(~0u)},
+ {55995, S_028774_COLOR_COMB_FCN(~0u)},
+ {56010, S_028774_ALPHA_SRC_OPT(~0u)},
+ {56024, S_028774_ALPHA_DST_OPT(~0u)},
+ {56038, S_028774_ALPHA_COMB_FCN(~0u)},
+ /* 1489 */
+ {55833, S_028778_COLOR_SRC_OPT(~0u)},
+ {55847, S_028778_COLOR_DST_OPT(~0u)},
+ {55995, S_028778_COLOR_COMB_FCN(~0u)},
+ {56010, S_028778_ALPHA_SRC_OPT(~0u)},
+ {56024, S_028778_ALPHA_DST_OPT(~0u)},
+ {56038, S_028778_ALPHA_COMB_FCN(~0u)},
+ /* 1495 */
+ {55833, S_02877C_COLOR_SRC_OPT(~0u)},
+ {55847, S_02877C_COLOR_DST_OPT(~0u)},
+ {55995, S_02877C_COLOR_COMB_FCN(~0u)},
+ {56010, S_02877C_ALPHA_SRC_OPT(~0u)},
+ {56024, S_02877C_ALPHA_DST_OPT(~0u)},
+ {56038, S_02877C_ALPHA_COMB_FCN(~0u)},
+ /* 1501 */
+ {56447, S_028780_COLOR_SRCBLEND(~0u), 21, 854},
+ {55995, S_028780_COLOR_COMB_FCN(~0u), 5, 875},
+ {56552, S_028780_COLOR_DESTBLEND(~0u), 21, 854},
+ {56568, S_028780_ALPHA_SRCBLEND(~0u), 21, 854},
+ {56038, S_028780_ALPHA_COMB_FCN(~0u), 5, 875},
+ {56583, S_028780_ALPHA_DESTBLEND(~0u), 21, 854},
+ {56599, S_028780_SEPARATE_ALPHA_BLEND(~0u)},
+ {7643, S_028780_ENABLE(~0u)},
+ {56620, S_028780_DISABLE_ROP3(~0u)},
+ /* 1510 */
+ {56633, S_0287CC_SRC_STATE_ID(~0u)},
+ /* 1511 */
+ {56646, S_0287E4_BASE_ADDR_GFX6(~0u)},
+ /* 1512 */
+ {56739, S_0287F0_SOURCE_SELECT(~0u), 4, 880},
+ {56785, S_0287F0_MAJOR_MODE(~0u), 2, 884},
+ {56796, S_0287F0_NOT_EOP(~0u)},
+ {56804, S_0287F0_USE_OPAQUE(~0u)},
+ /* 1516 */
+ {56815, S_0287F8_ADDRESS_LOW(~0u)},
+ /* 1517 */
+ {56827, S_028800_STENCIL_ENABLE(~0u)},
+ {48954, S_028800_Z_ENABLE(~0u)},
+ {56842, S_028800_Z_WRITE_ENABLE(~0u)},
+ {56857, S_028800_DEPTH_BOUNDS_ENABLE(~0u)},
+ {49754, S_028800_ZFUNC(~0u), 8, 886},
+ {56972, S_028800_BACKFACE_ENABLE(~0u)},
+ {57075, S_028800_STENCILFUNC(~0u), 8, 894},
+ {57087, S_028800_STENCILFUNC_BF(~0u), 8, 894},
+ {57102, S_028800_ENABLE_COLOR_WRITES_ON_DEPTH_FAIL(~0u)},
+ {57136, S_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS(~0u)},
+ /* 1527 */
+ {57171, S_028804_MAX_ANCHOR_SAMPLES(~0u)},
+ {57190, S_028804_PS_ITER_SAMPLES(~0u)},
+ {57206, S_028804_MASK_EXPORT_NUM_SAMPLES(~0u)},
+ {57230, S_028804_ALPHA_TO_MASK_NUM_SAMPLES(~0u)},
+ {57256, S_028804_HIGH_QUALITY_INTERSECTIONS(~0u)},
+ {57283, S_028804_INCOHERENT_EQAA_READS(~0u)},
+ {57305, S_028804_INTERPOLATE_COMP_Z(~0u)},
+ {57324, S_028804_INTERPOLATE_SRC_Z(~0u)},
+ {57342, S_028804_STATIC_ANCHOR_ASSOCIATIONS(~0u)},
+ {57369, S_028804_ALPHA_TO_MASK_EQAA_DISABLE(~0u)},
+ {57396, S_028804_OVERRASTERIZATION_AMOUNT(~0u)},
+ {57421, S_028804_ENABLE_POSTZ_OVERRASTERIZATION(~0u)},
+ /* 1539 */
+ {57452, S_028808_DISABLE_DUAL_QUAD(~0u)},
+ {57470, S_028808_DEGAMMA_ENABLE(~0u)},
+ {22279, S_028808_MODE(~0u), 7, 902},
+ {56628, S_028808_ROP3(~0u), 256, 909},
+ /* 1543 */
+ {57775, S_02880C_Z_EXPORT_ENABLE(~0u)},
+ {57791, S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(~0u)},
+ {57822, S_02880C_STENCIL_OP_VAL_EXPORT_ENABLE(~0u)},
+ {49012, S_02880C_Z_ORDER(~0u), 4, 1165},
+ {57901, S_02880C_KILL_ENABLE(~0u)},
+ {57913, S_02880C_COVERAGE_TO_MASK_ENABLE(~0u)},
+ {57937, S_02880C_MASK_EXPORT_ENABLE(~0u)},
+ {57956, S_02880C_EXEC_ON_HIER_FAIL(~0u)},
+ {57974, S_02880C_EXEC_ON_NOOP(~0u)},
+ {57987, S_02880C_ALPHA_TO_MASK_DISABLE(~0u)},
+ {58009, S_02880C_DEPTH_BEFORE_SHADER(~0u)},
+ {58099, S_02880C_CONSERVATIVE_Z_EXPORT(~0u), 4, 1169},
+ {58121, S_02880C_DUAL_QUAD_DISABLE(~0u)},
/* 1556 */
- {26459, S_028710_Z_EXPORT_FORMAT(~0u), 10, 919},
- /* 1557 */
- {26475, S_028714_COL0_EXPORT_FORMAT(~0u), 10, 919},
- {26494, S_028714_COL1_EXPORT_FORMAT(~0u), 10, 919},
- {26513, S_028714_COL2_EXPORT_FORMAT(~0u), 10, 919},
- {26532, S_028714_COL3_EXPORT_FORMAT(~0u), 10, 919},
- {26551, S_028714_COL4_EXPORT_FORMAT(~0u), 10, 919},
- {26570, S_028714_COL5_EXPORT_FORMAT(~0u), 10, 919},
- {26589, S_028714_COL6_EXPORT_FORMAT(~0u), 10, 919},
- {26608, S_028714_COL7_EXPORT_FORMAT(~0u), 10, 919},
- /* 1565 */
- {26862, S_028754_MRT0(~0u), 11, 929},
- {26867, S_028754_MRT1(~0u)},
- {26872, S_028754_MRT2(~0u)},
- {26877, S_028754_MRT3(~0u)},
- {26882, S_028754_MRT4(~0u)},
- {26887, S_028754_MRT5(~0u)},
- {26892, S_028754_MRT6(~0u)},
- {26897, S_028754_MRT7(~0u)},
- /* 1573 */
- {26982, S_028758_MRT0_EPSILON(~0u), 16, 940},
- {26995, S_028758_MRT1_EPSILON(~0u)},
- {27008, S_028758_MRT2_EPSILON(~0u)},
- {27021, S_028758_MRT3_EPSILON(~0u)},
- {27034, S_028758_MRT4_EPSILON(~0u)},
- {27047, S_028758_MRT5_EPSILON(~0u)},
- {27060, S_028758_MRT6_EPSILON(~0u)},
- {27073, S_028758_MRT7_EPSILON(~0u)},
- /* 1581 */
- {27086, S_02875C_MRT0_COLOR_OPT_DISABLE(~0u)},
- {27109, S_02875C_MRT0_ALPHA_OPT_DISABLE(~0u)},
- {27132, S_02875C_MRT1_COLOR_OPT_DISABLE(~0u)},
- {27155, S_02875C_MRT1_ALPHA_OPT_DISABLE(~0u)},
- {27178, S_02875C_MRT2_COLOR_OPT_DISABLE(~0u)},
- {27201, S_02875C_MRT2_ALPHA_OPT_DISABLE(~0u)},
- {27224, S_02875C_MRT3_COLOR_OPT_DISABLE(~0u)},
- {27247, S_02875C_MRT3_ALPHA_OPT_DISABLE(~0u)},
- {27270, S_02875C_MRT4_COLOR_OPT_DISABLE(~0u)},
- {27293, S_02875C_MRT4_ALPHA_OPT_DISABLE(~0u)},
- {27316, S_02875C_MRT5_COLOR_OPT_DISABLE(~0u)},
- {27339, S_02875C_MRT5_ALPHA_OPT_DISABLE(~0u)},
- {27362, S_02875C_MRT6_COLOR_OPT_DISABLE(~0u)},
- {27385, S_02875C_MRT6_ALPHA_OPT_DISABLE(~0u)},
- {27408, S_02875C_MRT7_COLOR_OPT_DISABLE(~0u)},
- {27431, S_02875C_MRT7_ALPHA_OPT_DISABLE(~0u)},
- {27454, S_02875C_PIXEN_ZERO_OPT_DISABLE(~0u)},
- /* 1598 */
- {27745, S_028760_COLOR_SRC_OPT(~0u), 8, 956},
- {27759, S_028760_COLOR_DST_OPT(~0u)},
- {27907, S_028760_COLOR_COMB_FCN(~0u), 8, 964},
- {27922, S_028760_ALPHA_SRC_OPT(~0u)},
- {27936, S_028760_ALPHA_DST_OPT(~0u)},
- {27950, S_028760_ALPHA_COMB_FCN(~0u)},
- /* 1604 */
- {27745, S_028764_COLOR_SRC_OPT(~0u)},
- {27759, S_028764_COLOR_DST_OPT(~0u)},
- {27907, S_028764_COLOR_COMB_FCN(~0u)},
- {27922, S_028764_ALPHA_SRC_OPT(~0u)},
- {27936, S_028764_ALPHA_DST_OPT(~0u)},
- {27950, S_028764_ALPHA_COMB_FCN(~0u)},
- /* 1610 */
- {27745, S_028768_COLOR_SRC_OPT(~0u)},
- {27759, S_028768_COLOR_DST_OPT(~0u)},
- {27907, S_028768_COLOR_COMB_FCN(~0u)},
- {27922, S_028768_ALPHA_SRC_OPT(~0u)},
- {27936, S_028768_ALPHA_DST_OPT(~0u)},
- {27950, S_028768_ALPHA_COMB_FCN(~0u)},
- /* 1616 */
- {27745, S_02876C_COLOR_SRC_OPT(~0u)},
- {27759, S_02876C_COLOR_DST_OPT(~0u)},
- {27907, S_02876C_COLOR_COMB_FCN(~0u)},
- {27922, S_02876C_ALPHA_SRC_OPT(~0u)},
- {27936, S_02876C_ALPHA_DST_OPT(~0u)},
- {27950, S_02876C_ALPHA_COMB_FCN(~0u)},
- /* 1622 */
- {27745, S_028770_COLOR_SRC_OPT(~0u)},
- {27759, S_028770_COLOR_DST_OPT(~0u)},
- {27907, S_028770_COLOR_COMB_FCN(~0u)},
- {27922, S_028770_ALPHA_SRC_OPT(~0u)},
- {27936, S_028770_ALPHA_DST_OPT(~0u)},
- {27950, S_028770_ALPHA_COMB_FCN(~0u)},
- /* 1628 */
- {27745, S_028774_COLOR_SRC_OPT(~0u)},
- {27759, S_028774_COLOR_DST_OPT(~0u)},
- {27907, S_028774_COLOR_COMB_FCN(~0u)},
- {27922, S_028774_ALPHA_SRC_OPT(~0u)},
- {27936, S_028774_ALPHA_DST_OPT(~0u)},
- {27950, S_028774_ALPHA_COMB_FCN(~0u)},
- /* 1634 */
- {27745, S_028778_COLOR_SRC_OPT(~0u)},
- {27759, S_028778_COLOR_DST_OPT(~0u)},
- {27907, S_028778_COLOR_COMB_FCN(~0u)},
- {27922, S_028778_ALPHA_SRC_OPT(~0u)},
- {27936, S_028778_ALPHA_DST_OPT(~0u)},
- {27950, S_028778_ALPHA_COMB_FCN(~0u)},
+ {58139, S_028810_UCP_ENA_0(~0u)},
+ {58149, S_028810_UCP_ENA_1(~0u)},
+ {58159, S_028810_UCP_ENA_2(~0u)},
+ {58169, S_028810_UCP_ENA_3(~0u)},
+ {58179, S_028810_UCP_ENA_4(~0u)},
+ {58189, S_028810_UCP_ENA_5(~0u)},
+ {58199, S_028810_PS_UCP_Y_SCALE_NEG(~0u)},
+ {58218, S_028810_PS_UCP_MODE(~0u)},
+ {58230, S_028810_CLIP_DISABLE(~0u)},
+ {58243, S_028810_UCP_CULL_ONLY_ENA(~0u)},
+ {58261, S_028810_BOUNDARY_EDGE_FLAG_ENA(~0u)},
+ {58284, S_028810_DX_CLIP_SPACE_DEF(~0u)},
+ {58302, S_028810_DIS_CLIP_ERR_DETECT(~0u)},
+ {58322, S_028810_VTX_KILL_OR(~0u)},
+ {58334, S_028810_DX_RASTERIZATION_KILL(~0u)},
+ {58356, S_028810_DX_LINEAR_ATTR_CLIP_ENA(~0u)},
+ {58380, S_028810_VTE_VPORT_PROVOKE_DISABLE(~0u)},
+ {58406, S_028810_ZCLIP_NEAR_DISABLE(~0u)},
+ {58425, S_028810_ZCLIP_FAR_DISABLE(~0u)},
+ /* 1575 */
+ {58443, S_028814_CULL_FRONT(~0u)},
+ {58454, S_028814_CULL_BACK(~0u)},
+ {22820, S_028814_FACE(~0u)},
+ {58474, S_028814_POLY_MODE(~0u), 2, 1173},
+ {58540, S_028814_POLYMODE_FRONT_PTYPE(~0u), 3, 1175},
+ {58561, S_028814_POLYMODE_BACK_PTYPE(~0u), 3, 1175},
+ {58581, S_028814_POLY_OFFSET_FRONT_ENABLE(~0u)},
+ {58606, S_028814_POLY_OFFSET_BACK_ENABLE(~0u)},
+ {58630, S_028814_POLY_OFFSET_PARA_ENABLE(~0u)},
+ {58654, S_028814_VTX_WINDOW_OFFSET_ENABLE(~0u)},
+ {58679, S_028814_PROVOKING_VTX_LAST(~0u)},
+ {58698, S_028814_PERSP_CORR_DIS(~0u)},
+ {58713, S_028814_MULTI_PRIM_IB_ENA(~0u)},
+ /* 1588 */
+ {58731, S_028818_VPORT_X_SCALE_ENA(~0u)},
+ {58749, S_028818_VPORT_X_OFFSET_ENA(~0u)},
+ {58768, S_028818_VPORT_Y_SCALE_ENA(~0u)},
+ {58786, S_028818_VPORT_Y_OFFSET_ENA(~0u)},
+ {58805, S_028818_VPORT_Z_SCALE_ENA(~0u)},
+ {58823, S_028818_VPORT_Z_OFFSET_ENA(~0u)},
+ {58842, S_028818_VTX_XY_FMT(~0u)},
+ {58853, S_028818_VTX_Z_FMT(~0u)},
+ {58863, S_028818_VTX_W0_FMT(~0u)},
+ /* 1597 */
+ {58874, S_02881C_CLIP_DIST_ENA_0(~0u)},
+ {58890, S_02881C_CLIP_DIST_ENA_1(~0u)},
+ {58906, S_02881C_CLIP_DIST_ENA_2(~0u)},
+ {58922, S_02881C_CLIP_DIST_ENA_3(~0u)},
+ {58938, S_02881C_CLIP_DIST_ENA_4(~0u)},
+ {58954, S_02881C_CLIP_DIST_ENA_5(~0u)},
+ {58970, S_02881C_CLIP_DIST_ENA_6(~0u)},
+ {58986, S_02881C_CLIP_DIST_ENA_7(~0u)},
+ {59002, S_02881C_CULL_DIST_ENA_0(~0u)},
+ {59018, S_02881C_CULL_DIST_ENA_1(~0u)},
+ {59034, S_02881C_CULL_DIST_ENA_2(~0u)},
+ {59050, S_02881C_CULL_DIST_ENA_3(~0u)},
+ {59066, S_02881C_CULL_DIST_ENA_4(~0u)},
+ {59082, S_02881C_CULL_DIST_ENA_5(~0u)},
+ {59098, S_02881C_CULL_DIST_ENA_6(~0u)},
+ {59114, S_02881C_CULL_DIST_ENA_7(~0u)},
+ {59130, S_02881C_USE_VTX_POINT_SIZE(~0u)},
+ {59149, S_02881C_USE_VTX_EDGE_FLAG(~0u)},
+ {59167, S_02881C_USE_VTX_RENDER_TARGET_INDX(~0u)},
+ {59194, S_02881C_USE_VTX_VIEWPORT_INDX(~0u)},
+ {59216, S_02881C_USE_VTX_KILL_FLAG(~0u)},
+ {59234, S_02881C_VS_OUT_MISC_VEC_ENA(~0u)},
+ {59254, S_02881C_VS_OUT_CCDIST0_VEC_ENA(~0u)},
+ {59277, S_02881C_VS_OUT_CCDIST1_VEC_ENA(~0u)},
+ {59300, S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(~0u)},
+ {59325, S_02881C_USE_VTX_GS_CUT_FLAG(~0u)},
+ {59345, S_02881C_USE_VTX_LINE_WIDTH(~0u)},
+ /* 1624 */
+ {59364, S_028820_VTE_XY_INF_DISCARD(~0u)},
+ {59383, S_028820_VTE_Z_INF_DISCARD(~0u)},
+ {59401, S_028820_VTE_W_INF_DISCARD(~0u)},
+ {59419, S_028820_VTE_0XNANINF_IS_0(~0u)},
+ {59437, S_028820_VTE_XY_NAN_RETAIN(~0u)},
+ {59455, S_028820_VTE_Z_NAN_RETAIN(~0u)},
+ {59472, S_028820_VTE_W_NAN_RETAIN(~0u)},
+ {59489, S_028820_VTE_W_RECIP_NAN_IS_0(~0u)},
+ {59510, S_028820_VS_XY_NAN_TO_INF(~0u)},
+ {59527, S_028820_VS_XY_INF_RETAIN(~0u)},
+ {59544, S_028820_VS_Z_NAN_TO_INF(~0u)},
+ {59560, S_028820_VS_Z_INF_RETAIN(~0u)},
+ {59576, S_028820_VS_W_NAN_TO_INF(~0u)},
+ {59592, S_028820_VS_W_INF_RETAIN(~0u)},
+ {59608, S_028820_VS_CLIP_DIST_INF_DISCARD(~0u)},
+ {59633, S_028820_VTE_NO_OUTPUT_NEG_0(~0u)},
/* 1640 */
- {27745, S_02877C_COLOR_SRC_OPT(~0u)},
- {27759, S_02877C_COLOR_DST_OPT(~0u)},
- {27907, S_02877C_COLOR_COMB_FCN(~0u)},
- {27922, S_02877C_ALPHA_SRC_OPT(~0u)},
- {27936, S_02877C_ALPHA_DST_OPT(~0u)},
- {27950, S_02877C_ALPHA_COMB_FCN(~0u)},
- /* 1646 */
- {28359, S_028780_COLOR_SRCBLEND(~0u), 21, 972},
- {27907, S_028780_COLOR_COMB_FCN(~0u), 5, 993},
- {28464, S_028780_COLOR_DESTBLEND(~0u), 21, 972},
- {28480, S_028780_ALPHA_SRCBLEND(~0u), 21, 972},
- {27950, S_028780_ALPHA_COMB_FCN(~0u), 5, 993},
- {28495, S_028780_ALPHA_DESTBLEND(~0u), 21, 972},
- {28511, S_028780_SEPARATE_ALPHA_BLEND(~0u)},
- {842, S_028780_ENABLE(~0u)},
- {28532, S_028780_DISABLE_ROP3(~0u)},
+ {59653, S_028824_LINE_STIPPLE_RESET(~0u)},
+ {59672, S_028824_EXPAND_FULL_LENGTH(~0u)},
+ {59691, S_028824_FRACTIONAL_ACCUM(~0u)},
+ {59708, S_028824_DIAMOND_ADJUST(~0u)},
+ /* 1644 */
+ {59723, S_02882C_TRIANGLE_FILTER_DISABLE(~0u)},
+ {59747, S_02882C_LINE_FILTER_DISABLE(~0u)},
+ {59767, S_02882C_POINT_FILTER_DISABLE(~0u)},
+ {59788, S_02882C_RECTANGLE_FILTER_DISABLE(~0u)},
+ {59813, S_02882C_TRIANGLE_EXPAND_ENA(~0u)},
+ {59833, S_02882C_LINE_EXPAND_ENA(~0u)},
+ {59849, S_02882C_POINT_EXPAND_ENA(~0u)},
+ {59866, S_02882C_RECTANGLE_EXPAND_ENA(~0u)},
+ {59887, S_02882C_PRIM_EXPAND_CONSTANT(~0u)},
+ {59908, S_02882C_XMAX_RIGHT_EXCLUSION(~0u)},
+ {59929, S_02882C_YMAX_BOTTOM_EXCLUSION(~0u)},
/* 1655 */
- {28545, S_0287CC_SRC_STATE_ID(~0u)},
- /* 1656 */
- {28558, S_0287E4_BASE_ADDR_GFX6(~0u)},
- {28573, S_0287E4_BASE_ADDR_GFX9(~0u)},
- /* 1658 */
- {28666, S_0287F0_SOURCE_SELECT(~0u), 4, 998},
- {28712, S_0287F0_MAJOR_MODE(~0u), 2, 1002},
- {28723, S_0287F0_NOT_EOP(~0u)},
- {28731, S_0287F0_USE_OPAQUE(~0u)},
- {28742, S_0287F0_SPRITE_EN_R6XX(~0u)},
- {28757, S_0287F0_UNROLLED_INST(~0u)},
- {28771, S_0287F0_GRBM_SKEW_NO_DEC(~0u)},
- {28788, S_0287F0_REG_RT_INDEX(~0u)},
- /* 1666 */
- {28801, S_0287F8_ADDRESS_LOW(~0u)},
- /* 1667 */
- {28813, S_028800_STENCIL_ENABLE(~0u)},
- {20719, S_028800_Z_ENABLE(~0u)},
- {28828, S_028800_Z_WRITE_ENABLE(~0u)},
- {28843, S_028800_DEPTH_BOUNDS_ENABLE(~0u)},
- {21519, S_028800_ZFUNC(~0u), 8, 1004},
- {28958, S_028800_BACKFACE_ENABLE(~0u)},
- {29061, S_028800_STENCILFUNC(~0u), 8, 1012},
- {29073, S_028800_STENCILFUNC_BF(~0u), 8, 1012},
- {29088, S_028800_ENABLE_COLOR_WRITES_ON_DEPTH_FAIL(~0u)},
- {29122, S_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS(~0u)},
+ {59951, S_028830_SMALL_PRIM_FILTER_ENABLE(~0u)},
+ {59723, S_028830_TRIANGLE_FILTER_DISABLE(~0u)},
+ {59747, S_028830_LINE_FILTER_DISABLE(~0u)},
+ {59767, S_028830_POINT_FILTER_DISABLE(~0u)},
+ {59788, S_028830_RECTANGLE_FILTER_DISABLE(~0u)},
+ /* 1660 */
+ {42242, S_028A00_HEIGHT(~0u)},
+ {42236, S_028A00_WIDTH(~0u)},
+ /* 1662 */
+ {59976, S_028A04_MIN_SIZE(~0u)},
+ {22515, S_028A04_MAX_SIZE(~0u)},
+ /* 1664 */
+ {42236, S_028A08_WIDTH(~0u)},
+ /* 1665 */
+ {59985, S_028A0C_LINE_PATTERN(~0u)},
+ {59998, S_028A0C_REPEAT_COUNT(~0u)},
+ {60011, S_028A0C_PATTERN_BIT_ORDER(~0u)},
+ {60029, S_028A0C_AUTO_RESET_CNTL(~0u)},
+ /* 1669 */
+ {60150, S_028A10_PATH_SELECT(~0u), 5, 1178},
+ /* 1670 */
+ {60162, S_028A14_TESS_MODE(~0u)},
+ /* 1671 */
+ {22108, S_028A20_REUSE_DEPTH(~0u)},
+ /* 1672 */
+ {22130, S_028A24_PRIM_TYPE(~0u), 18, 1183},
+ {60527, S_028A24_RETAIN_ORDER(~0u)},
+ {60540, S_028A24_RETAIN_QUADS(~0u)},
+ {60621, S_028A24_PRIM_ORDER(~0u), 5, 1201},
+ /* 1676 */
+ {22150, S_028A28_FIRST_DECR(~0u)},
/* 1677 */
- {29157, S_028804_MAX_ANCHOR_SAMPLES(~0u)},
- {29176, S_028804_PS_ITER_SAMPLES(~0u)},
- {29192, S_028804_MASK_EXPORT_NUM_SAMPLES(~0u)},
- {29216, S_028804_ALPHA_TO_MASK_NUM_SAMPLES(~0u)},
- {29242, S_028804_HIGH_QUALITY_INTERSECTIONS(~0u)},
- {29269, S_028804_INCOHERENT_EQAA_READS(~0u)},
- {29291, S_028804_INTERPOLATE_COMP_Z(~0u)},
- {29310, S_028804_INTERPOLATE_SRC_Z(~0u)},
- {29328, S_028804_STATIC_ANCHOR_ASSOCIATIONS(~0u)},
- {29355, S_028804_ALPHA_TO_MASK_EQAA_DISABLE(~0u)},
- {29382, S_028804_OVERRASTERIZATION_AMOUNT(~0u)},
- {29407, S_028804_ENABLE_POSTZ_OVERRASTERIZATION(~0u)},
- /* 1689 */
- {29438, S_028808_DISABLE_DUAL_QUAD(~0u)},
- {29456, S_028808_DEGAMMA_ENABLE(~0u)},
- {9808, S_028808_MODE(~0u), 7, 1020},
- {28540, S_028808_ROP3(~0u), 256, 1027},
- /* 1693 */
- {29761, S_02880C_Z_EXPORT_ENABLE(~0u)},
- {29777, S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(~0u)},
- {29808, S_02880C_STENCIL_OP_VAL_EXPORT_ENABLE(~0u)},
- {20777, S_02880C_Z_ORDER(~0u), 4, 1283},
- {29887, S_02880C_KILL_ENABLE(~0u)},
- {29899, S_02880C_COVERAGE_TO_MASK_ENABLE(~0u)},
- {29923, S_02880C_MASK_EXPORT_ENABLE(~0u)},
- {29942, S_02880C_EXEC_ON_HIER_FAIL(~0u)},
- {29960, S_02880C_EXEC_ON_NOOP(~0u)},
- {29973, S_02880C_ALPHA_TO_MASK_DISABLE(~0u)},
- {29995, S_02880C_DEPTH_BEFORE_SHADER(~0u)},
- {30085, S_02880C_CONSERVATIVE_Z_EXPORT(~0u), 4, 1287},
- {30107, S_02880C_DUAL_QUAD_DISABLE(~0u)},
- {30125, S_02880C_PRIMITIVE_ORDERED_PIXEL_SHADER(~0u)},
- {30156, S_02880C_EXEC_IF_OVERLAPPED(~0u)},
- {30175, S_02880C_POPS_OVERLAP_NUM_SAMPLES(~0u)},
- /* 1709 */
- {30200, S_028810_UCP_ENA_0(~0u)},
- {30210, S_028810_UCP_ENA_1(~0u)},
- {30220, S_028810_UCP_ENA_2(~0u)},
- {30230, S_028810_UCP_ENA_3(~0u)},
- {30240, S_028810_UCP_ENA_4(~0u)},
- {30250, S_028810_UCP_ENA_5(~0u)},
- {30260, S_028810_PS_UCP_Y_SCALE_NEG(~0u)},
- {30279, S_028810_PS_UCP_MODE(~0u)},
- {30291, S_028810_CLIP_DISABLE(~0u)},
- {30304, S_028810_UCP_CULL_ONLY_ENA(~0u)},
- {30322, S_028810_BOUNDARY_EDGE_FLAG_ENA(~0u)},
- {30345, S_028810_DX_CLIP_SPACE_DEF(~0u)},
- {30363, S_028810_DIS_CLIP_ERR_DETECT(~0u)},
- {30383, S_028810_VTX_KILL_OR(~0u)},
- {30395, S_028810_DX_RASTERIZATION_KILL(~0u)},
- {30417, S_028810_DX_LINEAR_ATTR_CLIP_ENA(~0u)},
- {30441, S_028810_VTE_VPORT_PROVOKE_DISABLE(~0u)},
- {30467, S_028810_ZCLIP_NEAR_DISABLE(~0u)},
- {30486, S_028810_ZCLIP_FAR_DISABLE(~0u)},
- /* 1728 */
- {30504, S_028814_CULL_FRONT(~0u)},
- {30515, S_028814_CULL_BACK(~0u)},
- {30525, S_028814_FACE(~0u)},
- {30540, S_028814_POLY_MODE(~0u), 2, 1291},
- {30606, S_028814_POLYMODE_FRONT_PTYPE(~0u), 3, 1293},
- {30627, S_028814_POLYMODE_BACK_PTYPE(~0u), 3, 1293},
- {30647, S_028814_POLY_OFFSET_FRONT_ENABLE(~0u)},
- {30672, S_028814_POLY_OFFSET_BACK_ENABLE(~0u)},
- {30696, S_028814_POLY_OFFSET_PARA_ENABLE(~0u)},
- {30720, S_028814_VTX_WINDOW_OFFSET_ENABLE(~0u)},
- {30745, S_028814_PROVOKING_VTX_LAST(~0u)},
- {30764, S_028814_PERSP_CORR_DIS(~0u)},
- {30779, S_028814_MULTI_PRIM_IB_ENA(~0u)},
- {30797, S_028814_RIGHT_TRIANGLE_ALTERNATE_GRADIENT_REF(~0u)},
- {30835, S_028814_NEW_QUAD_DECOMPOSITION(~0u)},
- /* 1743 */
- {30858, S_028818_VPORT_X_SCALE_ENA(~0u)},
- {30876, S_028818_VPORT_X_OFFSET_ENA(~0u)},
- {30895, S_028818_VPORT_Y_SCALE_ENA(~0u)},
- {30913, S_028818_VPORT_Y_OFFSET_ENA(~0u)},
- {30932, S_028818_VPORT_Z_SCALE_ENA(~0u)},
- {30950, S_028818_VPORT_Z_OFFSET_ENA(~0u)},
- {30969, S_028818_VTX_XY_FMT(~0u)},
- {30980, S_028818_VTX_Z_FMT(~0u)},
- {30990, S_028818_VTX_W0_FMT(~0u)},
- {31001, S_028818_PERFCOUNTER_REF(~0u)},
+ {22156, S_028A2C_DECR(~0u)},
+ /* 1678 */
+ {60632, S_028A30_COMP_X_EN(~0u)},
+ {60642, S_028A30_COMP_Y_EN(~0u)},
+ {60652, S_028A30_COMP_Z_EN(~0u)},
+ {60662, S_028A30_COMP_W_EN(~0u)},
+ {23319, S_028A30_STRIDE(~0u)},
+ {60672, S_028A30_SHIFT(~0u)},
+ /* 1684 */
+ {60632, S_028A34_COMP_X_EN(~0u)},
+ {60642, S_028A34_COMP_Y_EN(~0u)},
+ {60652, S_028A34_COMP_Z_EN(~0u)},
+ {60662, S_028A34_COMP_W_EN(~0u)},
+ {23319, S_028A34_STRIDE(~0u)},
+ {60672, S_028A34_SHIFT(~0u)},
+ /* 1690 */
+ {60837, S_028A38_X_CONV(~0u), 9, 1206},
+ {17707, S_028A38_X_OFFSET(~0u)},
+ {60844, S_028A38_Y_CONV(~0u), 9, 1206},
+ {50257, S_028A38_Y_OFFSET(~0u)},
+ {60851, S_028A38_Z_CONV(~0u), 9, 1206},
+ {60858, S_028A38_Z_OFFSET(~0u)},
+ {60867, S_028A38_W_CONV(~0u), 9, 1206},
+ {15767, S_028A38_W_OFFSET(~0u)},
+ /* 1698 */
+ {60837, S_028A3C_X_CONV(~0u), 9, 1206},
+ {17707, S_028A3C_X_OFFSET(~0u)},
+ {60844, S_028A3C_Y_CONV(~0u), 9, 1206},
+ {50257, S_028A3C_Y_OFFSET(~0u)},
+ {60851, S_028A3C_Z_CONV(~0u), 9, 1206},
+ {60858, S_028A3C_Z_OFFSET(~0u)},
+ {60867, S_028A3C_W_CONV(~0u), 9, 1206},
+ {15767, S_028A3C_W_OFFSET(~0u)},
+ /* 1706 */
+ {22279, S_028A40_MODE(~0u), 6, 1215},
+ {39383, S_028A40_RESERVED_0(~0u)},
+ {60992, S_028A40_CUT_MODE(~0u), 4, 1221},
+ {39401, S_028A40_RESERVED_1(~0u)},
+ {61001, S_028A40_GS_C_PACK_EN(~0u)},
+ {61014, S_028A40_RESERVED_2(~0u)},
+ {61025, S_028A40_ES_PASSTHRU(~0u)},
+ {61037, S_028A40_COMPUTE_MODE(~0u)},
+ {61050, S_028A40_FAST_COMPUTE_MODE(~0u)},
+ {61068, S_028A40_ELEMENT_INFO_EN(~0u)},
+ {61084, S_028A40_PARTIAL_THD_AT_EOI(~0u)},
+ {61103, S_028A40_SUPPRESS_CUTS(~0u)},
+ {61117, S_028A40_ES_WRITE_OPTIMIZE(~0u)},
+ {61135, S_028A40_GS_WRITE_OPTIMIZE(~0u)},
+ {61186, S_028A40_ONCHIP(~0u), 4, 1225},
+ /* 1721 */
+ {61193, S_028A44_ES_VERTS_PER_SUBGRP(~0u)},
+ {61213, S_028A44_GS_PRIMS_PER_SUBGRP(~0u)},
+ /* 1723 */
+ {61233, S_028A48_MSAA_ENABLE(~0u)},
+ {61245, S_028A48_VPORT_SCISSOR_ENABLE(~0u)},
+ {61266, S_028A48_LINE_STIPPLE_ENABLE(~0u)},
+ {61286, S_028A48_SEND_UNLIT_STILES_TO_PKR(~0u)},
+ /* 1727 */
+ {61311, S_028A4C_WALK_SIZE(~0u)},
+ {61321, S_028A4C_WALK_ALIGNMENT(~0u)},
+ {61336, S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(~0u)},
+ {61361, S_028A4C_WALK_FENCE_ENABLE(~0u)},
+ {61379, S_028A4C_WALK_FENCE_SIZE(~0u)},
+ {61395, S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(~0u)},
+ {61400, S_028A4C_TILE_WALK_ORDER_ENABLE(~0u)},
+ {61423, S_028A4C_TILE_COVER_DISABLE(~0u)},
+ {61442, S_028A4C_TILE_COVER_NO_SCISSOR(~0u)},
+ {61464, S_028A4C_ZMM_LINE_EXTENT(~0u)},
+ {61480, S_028A4C_ZMM_LINE_OFFSET(~0u)},
+ {61496, S_028A4C_ZMM_RECT_EXTENT(~0u)},
+ {61512, S_028A4C_KILL_PIX_POST_HI_Z(~0u)},
+ {61531, S_028A4C_KILL_PIX_POST_DETAIL_MASK(~0u)},
+ {61557, S_028A4C_PS_ITER_SAMPLE(~0u)},
+ {61572, S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(~0u)},
+ {61612, S_028A4C_MULTI_GPU_SUPERTILE_ENABLE(~0u)},
+ {61639, S_028A4C_GPU_ID_OVERRIDE_ENABLE(~0u)},
+ {61662, S_028A4C_GPU_ID_OVERRIDE(~0u)},
+ {61678, S_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(~0u)},
+ {61708, S_028A4C_FORCE_EOV_CNTDWN_ENABLE(~0u)},
+ {61732, S_028A4C_FORCE_EOV_REZ_ENABLE(~0u)},
+ {61753, S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(~0u)},
+ {61783, S_028A4C_OUT_OF_ORDER_WATER_MARK(~0u)},
+ /* 1751 */
+ {22355, S_028A54_GS_PER_ES(~0u)},
+ /* 1752 */
+ {22369, S_028A58_ES_PER_GS(~0u)},
/* 1753 */
- {31017, S_02881C_CLIP_DIST_ENA_0(~0u)},
- {31033, S_02881C_CLIP_DIST_ENA_1(~0u)},
- {31049, S_02881C_CLIP_DIST_ENA_2(~0u)},
- {31065, S_02881C_CLIP_DIST_ENA_3(~0u)},
- {31081, S_02881C_CLIP_DIST_ENA_4(~0u)},
- {31097, S_02881C_CLIP_DIST_ENA_5(~0u)},
- {31113, S_02881C_CLIP_DIST_ENA_6(~0u)},
- {31129, S_02881C_CLIP_DIST_ENA_7(~0u)},
- {31145, S_02881C_CULL_DIST_ENA_0(~0u)},
- {31161, S_02881C_CULL_DIST_ENA_1(~0u)},
- {31177, S_02881C_CULL_DIST_ENA_2(~0u)},
- {31193, S_02881C_CULL_DIST_ENA_3(~0u)},
- {31209, S_02881C_CULL_DIST_ENA_4(~0u)},
- {31225, S_02881C_CULL_DIST_ENA_5(~0u)},
- {31241, S_02881C_CULL_DIST_ENA_6(~0u)},
- {31257, S_02881C_CULL_DIST_ENA_7(~0u)},
- {31273, S_02881C_USE_VTX_POINT_SIZE(~0u)},
- {31292, S_02881C_USE_VTX_EDGE_FLAG(~0u)},
- {31310, S_02881C_USE_VTX_RENDER_TARGET_INDX(~0u)},
- {31337, S_02881C_USE_VTX_VIEWPORT_INDX(~0u)},
- {31359, S_02881C_USE_VTX_KILL_FLAG(~0u)},
- {31377, S_02881C_VS_OUT_MISC_VEC_ENA(~0u)},
- {31397, S_02881C_VS_OUT_CCDIST0_VEC_ENA(~0u)},
- {31420, S_02881C_VS_OUT_CCDIST1_VEC_ENA(~0u)},
- {31443, S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(~0u)},
- {31468, S_02881C_USE_VTX_GS_CUT_FLAG(~0u)},
- {31488, S_02881C_USE_VTX_LINE_WIDTH(~0u)},
- {31507, S_02881C_USE_VTX_SHD_OBJPRIM_ID(~0u)},
- /* 1781 */
- {31530, S_028820_VTE_XY_INF_DISCARD(~0u)},
- {31549, S_028820_VTE_Z_INF_DISCARD(~0u)},
- {31567, S_028820_VTE_W_INF_DISCARD(~0u)},
- {31585, S_028820_VTE_0XNANINF_IS_0(~0u)},
- {31603, S_028820_VTE_XY_NAN_RETAIN(~0u)},
- {31621, S_028820_VTE_Z_NAN_RETAIN(~0u)},
- {31638, S_028820_VTE_W_NAN_RETAIN(~0u)},
- {31655, S_028820_VTE_W_RECIP_NAN_IS_0(~0u)},
- {31676, S_028820_VS_XY_NAN_TO_INF(~0u)},
- {31693, S_028820_VS_XY_INF_RETAIN(~0u)},
- {31710, S_028820_VS_Z_NAN_TO_INF(~0u)},
- {31726, S_028820_VS_Z_INF_RETAIN(~0u)},
- {31742, S_028820_VS_W_NAN_TO_INF(~0u)},
- {31758, S_028820_VS_W_INF_RETAIN(~0u)},
- {31774, S_028820_VS_CLIP_DIST_INF_DISCARD(~0u)},
- {31799, S_028820_VTE_NO_OUTPUT_NEG_0(~0u)},
- /* 1797 */
- {31819, S_028824_LINE_STIPPLE_RESET(~0u)},
- {31838, S_028824_EXPAND_FULL_LENGTH(~0u)},
- {31857, S_028824_FRACTIONAL_ACCUM(~0u)},
- {31874, S_028824_DIAMOND_ADJUST(~0u)},
- /* 1801 */
- {31889, S_02882C_TRIANGLE_FILTER_DISABLE(~0u)},
- {31913, S_02882C_LINE_FILTER_DISABLE(~0u)},
- {31933, S_02882C_POINT_FILTER_DISABLE(~0u)},
- {31954, S_02882C_RECTANGLE_FILTER_DISABLE(~0u)},
- {31979, S_02882C_TRIANGLE_EXPAND_ENA(~0u)},
- {31999, S_02882C_LINE_EXPAND_ENA(~0u)},
- {32015, S_02882C_POINT_EXPAND_ENA(~0u)},
- {32032, S_02882C_RECTANGLE_EXPAND_ENA(~0u)},
- {32053, S_02882C_PRIM_EXPAND_CONSTANT(~0u)},
- {32074, S_02882C_XMAX_RIGHT_EXCLUSION(~0u)},
- {32095, S_02882C_YMAX_BOTTOM_EXCLUSION(~0u)},
+ {22383, S_028A5C_GS_PER_VS(~0u)},
+ /* 1754 */
+ {669, S_028A60_OFFSET(~0u)},
+ /* 1755 */
+ {669, S_028A64_OFFSET(~0u)},
+ /* 1756 */
+ {669, S_028A68_OFFSET(~0u)},
+ /* 1757 */
+ {61875, S_028A6C_OUTPRIM_TYPE(~0u), 3, 1229},
+ {61888, S_028A6C_OUTPRIM_TYPE_1(~0u)},
+ {61903, S_028A6C_OUTPRIM_TYPE_2(~0u)},
+ {61918, S_028A6C_OUTPRIM_TYPE_3(~0u)},
+ {61933, S_028A6C_UNIQUE_TYPE_PER_STREAM(~0u)},
+ /* 1762 */
+ {203, S_028A7C_INDEX_TYPE(~0u), 3, 1232},
+ {62070, S_028A7C_SWAP_MODE(~0u), 4, 1235},
+ {62131, S_028A7C_BUF_TYPE(~0u), 3, 1239},
+ {62173, S_028A7C_RDREQ_POLICY_CIK(~0u), 2, 1242},
+ {62190, S_028A7C_RDREQ_POLICY(~0u)},
+ {40107, S_028A7C_ATC(~0u)},
+ {56796, S_028A7C_NOT_EOP(~0u)},
+ {62203, S_028A7C_REQ_PATH(~0u)},
+ {40128, S_028A7C_MTYPE(~0u)},
+ /* 1771 */
+ {22558, S_028A84_PRIMITIVEID_EN(~0u)},
+ {62212, S_028A84_DISABLE_RESET_ON_EOI(~0u)},
+ /* 1773 */
+ {63168, S_028A90_EVENT_TYPE(~0u), 59, 1244},
+ {63179, S_028A90_ADDRESS_HI_GFX6(~0u)},
+ {63195, S_028A90_EXTENDED_EVENT(~0u)},
+ /* 1776 */
+ {22655, S_028A94_RESET_EN(~0u)},
+ /* 1777 */
+ {63210, S_028AA8_PRIMGROUP_SIZE(~0u)},
+ {63225, S_028AA8_PARTIAL_VS_WAVE_ON(~0u)},
+ {63244, S_028AA8_SWITCH_ON_EOP(~0u)},
+ {63258, S_028AA8_PARTIAL_ES_WAVE_ON(~0u)},
+ {63277, S_028AA8_SWITCH_ON_EOI(~0u)},
+ {63291, S_028AA8_WD_SWITCH_ON_EOP(~0u)},
+ {63308, S_028AA8_MAX_PRIMGRP_IN_WAVE(~0u)},
+ /* 1784 */
+ {22747, S_028AAC_ITEMSIZE(~0u)},
+ /* 1785 */
+ {22747, S_028AB0_ITEMSIZE(~0u)},
+ /* 1786 */
+ {22783, S_028AB4_REUSE_OFF(~0u)},
+ /* 1787 */
+ {22797, S_028AB8_VTX_CNT_EN(~0u)},
+ /* 1788 */
+ {43461, S_028ABC_LINEAR(~0u)},
+ {63328, S_028ABC_FULL_CACHE(~0u)},
+ {63339, S_028ABC_HTILE_USES_PRELOAD_WIN(~0u)},
+ {63362, S_028ABC_PRELOAD(~0u)},
+ {63370, S_028ABC_PREFETCH_WIDTH(~0u)},
+ {63385, S_028ABC_PREFETCH_HEIGHT(~0u)},
+ {63401, S_028ABC_DST_OUTSIDE_ZERO_TO_ONE(~0u)},
+ {63425, S_028ABC_TC_COMPATIBLE(~0u)},
+ /* 1796 */
+ {63439, S_028AC0_COMPAREFUNC0(~0u), 8, 894},
+ {63452, S_028AC0_COMPAREVALUE0(~0u)},
+ {63466, S_028AC0_COMPAREMASK0(~0u)},
+ {48973, S_028AC0_ENABLE0(~0u)},
+ /* 1800 */
+ {63479, S_028AC4_COMPAREFUNC1(~0u), 8, 894},
+ {63492, S_028AC4_COMPAREVALUE1(~0u)},
+ {63506, S_028AC4_COMPAREMASK1(~0u)},
+ {48991, S_028AC4_ENABLE1(~0u)},
+ /* 1804 */
+ {7499, S_028AC8_START_X(~0u)},
+ {7515, S_028AC8_START_Y(~0u)},
+ {63519, S_028AC8_MAX_X(~0u)},
+ {63525, S_028AC8_MAX_Y(~0u)},
+ /* 1808 */
+ {23319, S_028AD4_STRIDE(~0u)},
+ /* 1809 */
+ {23319, S_028AE4_STRIDE(~0u)},
+ /* 1810 */
+ {23319, S_028AF4_STRIDE(~0u)},
+ /* 1811 */
+ {23319, S_028B04_STRIDE(~0u)},
/* 1812 */
- {32117, S_028830_SMALL_PRIM_FILTER_ENABLE(~0u)},
- {31889, S_028830_TRIANGLE_FILTER_DISABLE(~0u)},
- {31913, S_028830_LINE_FILTER_DISABLE(~0u)},
- {31933, S_028830_POINT_FILTER_DISABLE(~0u)},
- {31954, S_028830_RECTANGLE_FILTER_DISABLE(~0u)},
- {32142, S_028830_SRBSL_ENABLE(~0u)},
- /* 1818 */
- {13235, S_028A00_HEIGHT(~0u)},
- {13229, S_028A00_WIDTH(~0u)},
- /* 1820 */
- {32155, S_028A04_MIN_SIZE(~0u)},
- {32164, S_028A04_MAX_SIZE(~0u)},
- /* 1822 */
- {13229, S_028A08_WIDTH(~0u)},
- /* 1823 */
- {32173, S_028A0C_LINE_PATTERN(~0u)},
- {32186, S_028A0C_REPEAT_COUNT(~0u)},
- {32199, S_028A0C_PATTERN_BIT_ORDER(~0u)},
- {32217, S_028A0C_AUTO_RESET_CNTL(~0u)},
- /* 1827 */
- {32338, S_028A10_PATH_SELECT(~0u), 5, 1296},
- /* 1828 */
- {32350, S_028A14_TESS_MODE(~0u)},
+ {23312, S_028B30_VERTEX_STRIDE(~0u)},
+ /* 1813 */
+ {23333, S_028B38_MAX_VERT_OUT(~0u)},
+ /* 1814 */
+ {63531, S_028B50_ACCUM_ISOLINE(~0u)},
+ {63545, S_028B50_ACCUM_TRI(~0u)},
+ {63555, S_028B50_ACCUM_QUAD(~0u)},
+ {63566, S_028B50_DONUT_SPLIT(~0u)},
+ {63578, S_028B50_TRAP_SPLIT(~0u)},
+ /* 1819 */
+ {48157, S_028B54_LS_EN(~0u), 3, 1303},
+ {48151, S_028B54_HS_EN(~0u)},
+ {23383, S_028B54_ES_EN(~0u), 3, 1306},
+ {48145, S_028B54_GS_EN(~0u)},
+ {48139, S_028B54_VS_EN(~0u), 3, 1309},
+ {63712, S_028B54_DYNAMIC_HS(~0u)},
+ {46732, S_028B54_DISPATCH_DRAW_EN(~0u)},
+ {63723, S_028B54_DIS_DEALLOC_ACCUM_0(~0u)},
+ {63743, S_028B54_DIS_DEALLOC_ACCUM_1(~0u)},
+ {63763, S_028B54_VS_WAVE_ID_EN(~0u)},
/* 1829 */
- {32360, S_028A20_REUSE_DEPTH(~0u)},
- /* 1830 */
- {6343, S_028A24_PRIM_TYPE(~0u), 18, 1301},
- {32727, S_028A24_RETAIN_ORDER(~0u)},
- {32740, S_028A24_RETAIN_QUADS(~0u)},
- {32821, S_028A24_PRIM_ORDER(~0u), 5, 1319},
+ {63777, S_028B58_NUM_PATCHES(~0u)},
+ {63789, S_028B58_HS_NUM_INPUT_CP(~0u)},
+ {63805, S_028B58_HS_NUM_OUTPUT_CP(~0u)},
+ /* 1832 */
+ {22747, S_028B5C_ITEMSIZE(~0u)},
+ /* 1833 */
+ {22747, S_028B60_ITEMSIZE(~0u)},
/* 1834 */
- {32832, S_028A28_FIRST_DECR(~0u)},
+ {22747, S_028B64_ITEMSIZE(~0u)},
/* 1835 */
- {32838, S_028A2C_DECR(~0u)},
+ {22747, S_028B68_ITEMSIZE(~0u)},
/* 1836 */
- {32843, S_028A30_COMP_X_EN(~0u)},
- {32853, S_028A30_COMP_Y_EN(~0u)},
- {32863, S_028A30_COMP_Z_EN(~0u)},
- {32873, S_028A30_COMP_W_EN(~0u)},
- {8904, S_028A30_STRIDE(~0u)},
- {32883, S_028A30_SHIFT(~0u)},
- /* 1842 */
- {32843, S_028A34_COMP_X_EN(~0u)},
- {32853, S_028A34_COMP_Y_EN(~0u)},
- {32863, S_028A34_COMP_Z_EN(~0u)},
- {32873, S_028A34_COMP_W_EN(~0u)},
- {8904, S_028A34_STRIDE(~0u)},
- {32883, S_028A34_SHIFT(~0u)},
- /* 1848 */
- {33048, S_028A38_X_CONV(~0u), 9, 1324},
- {22058, S_028A38_X_OFFSET(~0u)},
- {33055, S_028A38_Y_CONV(~0u), 9, 1324},
- {22074, S_028A38_Y_OFFSET(~0u)},
- {33062, S_028A38_Z_CONV(~0u), 9, 1324},
- {33069, S_028A38_Z_OFFSET(~0u)},
- {33078, S_028A38_W_CONV(~0u), 9, 1324},
- {33085, S_028A38_W_OFFSET(~0u)},
- /* 1856 */
- {33048, S_028A3C_X_CONV(~0u), 9, 1324},
- {22058, S_028A3C_X_OFFSET(~0u)},
- {33055, S_028A3C_Y_CONV(~0u), 9, 1324},
- {22074, S_028A3C_Y_OFFSET(~0u)},
- {33062, S_028A3C_Z_CONV(~0u), 9, 1324},
- {33069, S_028A3C_Z_OFFSET(~0u)},
- {33078, S_028A3C_W_CONV(~0u), 9, 1324},
- {33085, S_028A3C_W_OFFSET(~0u)},
+ {209, S_028B6C_TYPE(~0u), 3, 1312},
+ {63911, S_028B6C_PARTITIONING(~0u), 4, 1315},
+ {63988, S_028B6C_TOPOLOGY(~0u), 4, 1319},
+ {63997, S_028B6C_RESERVED_REDUC_AXIS(~0u)},
+ {64017, S_028B6C_DEPRECATED(~0u)},
+ {64028, S_028B6C_NUM_DS_WAVES_PER_SIMD(~0u)},
+ {64050, S_028B6C_DISABLE_DONUTS(~0u)},
+ {62173, S_028B6C_RDREQ_POLICY_CIK(~0u), 3, 1323},
+ {62190, S_028B6C_RDREQ_POLICY(~0u)},
+ {64189, S_028B6C_DISTRIBUTION_MODE(~0u), 4, 1326},
+ {40128, S_028B6C_MTYPE(~0u)},
+ /* 1847 */
+ {64207, S_028B70_ALPHA_TO_MASK_ENABLE(~0u)},
+ {64228, S_028B70_ALPHA_TO_MASK_OFFSET0(~0u)},
+ {64250, S_028B70_ALPHA_TO_MASK_OFFSET1(~0u)},
+ {64272, S_028B70_ALPHA_TO_MASK_OFFSET2(~0u)},
+ {64294, S_028B70_ALPHA_TO_MASK_OFFSET3(~0u)},
+ {64316, S_028B70_OFFSET_ROUND(~0u)},
+ /* 1853 */
+ {64329, S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(~0u)},
+ {64357, S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(~0u)},
+ /* 1855 */
+ {7643, S_028B90_ENABLE(~0u)},
+ {23740, S_028B90_CNT(~0u)},
+ /* 1857 */
+ {64385, S_028B94_STREAMOUT_0_EN(~0u)},
+ {64400, S_028B94_STREAMOUT_1_EN(~0u)},
+ {64415, S_028B94_STREAMOUT_2_EN(~0u)},
+ {64430, S_028B94_STREAMOUT_3_EN(~0u)},
+ {64445, S_028B94_RAST_STREAM(~0u)},
+ {64457, S_028B94_RAST_STREAM_MASK(~0u)},
+ {64474, S_028B94_USE_RAST_STREAM_MASK(~0u)},
/* 1864 */
- {9808, S_028A40_MODE(~0u), 6, 1333},
- {8965, S_028A40_RESERVED_0(~0u)},
- {33212, S_028A40_CUT_MODE(~0u), 4, 1339},
- {8983, S_028A40_RESERVED_1(~0u)},
- {33221, S_028A40_GS_C_PACK_EN(~0u)},
- {33234, S_028A40_RESERVED_2(~0u)},
- {33245, S_028A40_ES_PASSTHRU(~0u)},
- {33257, S_028A40_COMPUTE_MODE(~0u)},
- {33270, S_028A40_FAST_COMPUTE_MODE(~0u)},
- {33288, S_028A40_ELEMENT_INFO_EN(~0u)},
- {33304, S_028A40_PARTIAL_THD_AT_EOI(~0u)},
- {33323, S_028A40_SUPPRESS_CUTS(~0u)},
- {33337, S_028A40_ES_WRITE_OPTIMIZE(~0u)},
- {33355, S_028A40_GS_WRITE_OPTIMIZE(~0u)},
- {33406, S_028A40_ONCHIP(~0u), 4, 1343},
- {33413, S_028A40_RESERVED_3(~0u)},
- {33424, S_028A40_RESERVED_4(~0u)},
- {33435, S_028A40_RESERVED_5(~0u)},
- /* 1882 */
- {33446, S_028A44_ES_VERTS_PER_SUBGRP(~0u)},
- {33466, S_028A44_GS_PRIMS_PER_SUBGRP(~0u)},
- {33486, S_028A44_GS_INST_PRIMS_IN_SUBGRP(~0u)},
- /* 1885 */
- {33510, S_028A48_MSAA_ENABLE(~0u)},
- {33522, S_028A48_VPORT_SCISSOR_ENABLE(~0u)},
- {33543, S_028A48_LINE_STIPPLE_ENABLE(~0u)},
- {33563, S_028A48_SEND_UNLIT_STILES_TO_PKR(~0u)},
- {33588, S_028A48_SCALE_LINE_WIDTH_PAD(~0u)},
- {33609, S_028A48_ALTERNATE_RBS_PER_TILE(~0u)},
- {33632, S_028A48_COARSE_TILE_STARTS_ON_EVEN_RB(~0u)},
- /* 1892 */
- {33662, S_028A4C_WALK_SIZE(~0u)},
- {33672, S_028A4C_WALK_ALIGNMENT(~0u)},
- {33687, S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(~0u)},
- {33712, S_028A4C_WALK_FENCE_ENABLE(~0u)},
- {33730, S_028A4C_WALK_FENCE_SIZE(~0u)},
- {33746, S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(~0u)},
- {33751, S_028A4C_TILE_WALK_ORDER_ENABLE(~0u)},
- {33774, S_028A4C_TILE_COVER_DISABLE(~0u)},
- {33793, S_028A4C_TILE_COVER_NO_SCISSOR(~0u)},
- {33815, S_028A4C_ZMM_LINE_EXTENT(~0u)},
- {33831, S_028A4C_ZMM_LINE_OFFSET(~0u)},
- {33847, S_028A4C_ZMM_RECT_EXTENT(~0u)},
- {33863, S_028A4C_KILL_PIX_POST_HI_Z(~0u)},
- {33882, S_028A4C_KILL_PIX_POST_DETAIL_MASK(~0u)},
- {33908, S_028A4C_PS_ITER_SAMPLE(~0u)},
- {33923, S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(~0u)},
- {33963, S_028A4C_MULTI_GPU_SUPERTILE_ENABLE(~0u)},
- {33990, S_028A4C_GPU_ID_OVERRIDE_ENABLE(~0u)},
- {34013, S_028A4C_GPU_ID_OVERRIDE(~0u)},
- {34029, S_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(~0u)},
- {34059, S_028A4C_FORCE_EOV_CNTDWN_ENABLE(~0u)},
- {34083, S_028A4C_FORCE_EOV_REZ_ENABLE(~0u)},
- {34104, S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(~0u)},
- {34134, S_028A4C_OUT_OF_ORDER_WATER_MARK(~0u)},
- /* 1916 */
- {34158, S_028A54_GS_PER_ES(~0u)},
- /* 1917 */
- {34168, S_028A58_ES_PER_GS(~0u)},
- /* 1918 */
- {34178, S_028A5C_GS_PER_VS(~0u)},
- /* 1919 */
- {669, S_028A60_OFFSET(~0u)},
+ {64495, S_028B98_STREAM_0_BUFFER_EN(~0u)},
+ {64514, S_028B98_STREAM_1_BUFFER_EN(~0u)},
+ {64533, S_028B98_STREAM_2_BUFFER_EN(~0u)},
+ {64552, S_028B98_STREAM_3_BUFFER_EN(~0u)},
+ /* 1868 */
+ {64571, S_028BD4_DISTANCE_0(~0u)},
+ {64582, S_028BD4_DISTANCE_1(~0u)},
+ {64593, S_028BD4_DISTANCE_2(~0u)},
+ {64604, S_028BD4_DISTANCE_3(~0u)},
+ {64615, S_028BD4_DISTANCE_4(~0u)},
+ {64626, S_028BD4_DISTANCE_5(~0u)},
+ {64637, S_028BD4_DISTANCE_6(~0u)},
+ {64648, S_028BD4_DISTANCE_7(~0u)},
+ /* 1876 */
+ {64659, S_028BD8_DISTANCE_8(~0u)},
+ {64670, S_028BD8_DISTANCE_9(~0u)},
+ {64681, S_028BD8_DISTANCE_10(~0u)},
+ {64693, S_028BD8_DISTANCE_11(~0u)},
+ {64705, S_028BD8_DISTANCE_12(~0u)},
+ {64717, S_028BD8_DISTANCE_13(~0u)},
+ {64729, S_028BD8_DISTANCE_14(~0u)},
+ {64741, S_028BD8_DISTANCE_15(~0u)},
+ /* 1884 */
+ {64753, S_028BDC_EXPAND_LINE_WIDTH(~0u)},
+ {64771, S_028BDC_LAST_PIXEL(~0u)},
+ {64782, S_028BDC_PERPENDICULAR_ENDCAP_ENA(~0u)},
+ {64807, S_028BDC_DX10_DIAMOND_TEST_ENA(~0u)},
+ /* 1888 */
+ {64829, S_028BE0_MSAA_NUM_SAMPLES(~0u)},
+ {64846, S_028BE0_AA_MASK_CENTROID_DTMN(~0u)},
+ {64868, S_028BE0_MAX_SAMPLE_DIST(~0u)},
+ {64884, S_028BE0_MSAA_EXPOSED_SAMPLES(~0u)},
+ {64905, S_028BE0_DETAIL_TO_EXPOSED_MODE(~0u)},
+ /* 1893 */
+ {64928, S_028BE4_PIX_CENTER(~0u)},
+ {64989, S_028BE4_ROUND_MODE(~0u), 4, 1330},
+ {65205, S_028BE4_QUANT_MODE(~0u), 8, 1334},
+ /* 1896 */
+ {65216, S_028BF8_S0_X(~0u)},
+ {65221, S_028BF8_S0_Y(~0u)},
+ {65226, S_028BF8_S1_X(~0u)},
+ {65231, S_028BF8_S1_Y(~0u)},
+ {65236, S_028BF8_S2_X(~0u)},
+ {65241, S_028BF8_S2_Y(~0u)},
+ {65246, S_028BF8_S3_X(~0u)},
+ {65251, S_028BF8_S3_Y(~0u)},
+ /* 1904 */
+ {65256, S_028BFC_S4_X(~0u)},
+ {65261, S_028BFC_S4_Y(~0u)},
+ {65266, S_028BFC_S5_X(~0u)},
+ {65271, S_028BFC_S5_Y(~0u)},
+ {65276, S_028BFC_S6_X(~0u)},
+ {65281, S_028BFC_S6_Y(~0u)},
+ {65286, S_028BFC_S7_X(~0u)},
+ {65291, S_028BFC_S7_Y(~0u)},
+ /* 1912 */
+ {65296, S_028C00_S8_X(~0u)},
+ {65301, S_028C00_S8_Y(~0u)},
+ {65306, S_028C00_S9_X(~0u)},
+ {65311, S_028C00_S9_Y(~0u)},
+ {65316, S_028C00_S10_X(~0u)},
+ {65322, S_028C00_S10_Y(~0u)},
+ {65328, S_028C00_S11_X(~0u)},
+ {65334, S_028C00_S11_Y(~0u)},
/* 1920 */
- {669, S_028A64_OFFSET(~0u)},
- /* 1921 */
- {669, S_028A68_OFFSET(~0u)},
- /* 1922 */
- {34256, S_028A6C_OUTPRIM_TYPE(~0u), 3, 1347},
- {34269, S_028A6C_OUTPRIM_TYPE_1(~0u)},
- {34284, S_028A6C_OUTPRIM_TYPE_2(~0u)},
- {34299, S_028A6C_OUTPRIM_TYPE_3(~0u)},
- {34314, S_028A6C_UNIQUE_TYPE_PER_STREAM(~0u)},
- /* 1927 */
- {203, S_028A7C_INDEX_TYPE(~0u), 3, 1350},
- {34451, S_028A7C_SWAP_MODE(~0u), 4, 1353},
- {34512, S_028A7C_BUF_TYPE(~0u), 3, 1357},
- {34554, S_028A7C_RDREQ_POLICY_CIK(~0u), 2, 1360},
- {34571, S_028A7C_RDREQ_POLICY(~0u)},
- {9689, S_028A7C_ATC(~0u)},
- {28723, S_028A7C_NOT_EOP(~0u)},
- {34584, S_028A7C_REQ_PATH(~0u)},
- {9710, S_028A7C_MTYPE(~0u)},
- {8334, S_028A7C_PRIMGEN_EN(~0u)},
- /* 1937 */
- {34593, S_028A84_PRIMITIVEID_EN(~0u)},
- {34608, S_028A84_DISABLE_RESET_ON_EOI(~0u)},
- {34629, S_028A84_NGG_DISABLE_PROVOK_REUSE(~0u)},
- /* 1940 */
- {35689, S_028A90_EVENT_TYPE(~0u), 63, 1362},
- {35700, S_028A90_ADDRESS_HI_GFX6(~0u)},
- {35716, S_028A90_EXTENDED_EVENT(~0u)},
- {35731, S_028A90_ADDRESS_HI_GFX9(~0u)},
+ {65340, S_028C04_S12_X(~0u)},
+ {65346, S_028C04_S12_Y(~0u)},
+ {65352, S_028C04_S13_X(~0u)},
+ {65358, S_028C04_S13_Y(~0u)},
+ {65364, S_028C04_S14_X(~0u)},
+ {65370, S_028C04_S14_Y(~0u)},
+ {65376, S_028C04_S15_X(~0u)},
+ {65382, S_028C04_S15_Y(~0u)},
+ /* 1928 */
+ {65216, S_028C08_S0_X(~0u)},
+ {65221, S_028C08_S0_Y(~0u)},
+ {65226, S_028C08_S1_X(~0u)},
+ {65231, S_028C08_S1_Y(~0u)},
+ {65236, S_028C08_S2_X(~0u)},
+ {65241, S_028C08_S2_Y(~0u)},
+ {65246, S_028C08_S3_X(~0u)},
+ {65251, S_028C08_S3_Y(~0u)},
+ /* 1936 */
+ {65256, S_028C0C_S4_X(~0u)},
+ {65261, S_028C0C_S4_Y(~0u)},
+ {65266, S_028C0C_S5_X(~0u)},
+ {65271, S_028C0C_S5_Y(~0u)},
+ {65276, S_028C0C_S6_X(~0u)},
+ {65281, S_028C0C_S6_Y(~0u)},
+ {65286, S_028C0C_S7_X(~0u)},
+ {65291, S_028C0C_S7_Y(~0u)},
/* 1944 */
- {35747, S_028A94_RESET_EN(~0u)},
- /* 1945 */
- {35756, S_028AA8_PRIMGROUP_SIZE(~0u)},
- {35771, S_028AA8_PARTIAL_VS_WAVE_ON(~0u)},
- {35790, S_028AA8_SWITCH_ON_EOP(~0u)},
- {35804, S_028AA8_PARTIAL_ES_WAVE_ON(~0u)},
- {35823, S_028AA8_SWITCH_ON_EOI(~0u)},
- {35837, S_028AA8_WD_SWITCH_ON_EOP(~0u)},
- {35854, S_028AA8_MAX_PRIMGRP_IN_WAVE(~0u)},
+ {65296, S_028C10_S8_X(~0u)},
+ {65301, S_028C10_S8_Y(~0u)},
+ {65306, S_028C10_S9_X(~0u)},
+ {65311, S_028C10_S9_Y(~0u)},
+ {65316, S_028C10_S10_X(~0u)},
+ {65322, S_028C10_S10_Y(~0u)},
+ {65328, S_028C10_S11_X(~0u)},
+ {65334, S_028C10_S11_Y(~0u)},
/* 1952 */
- {35874, S_028AAC_ITEMSIZE(~0u)},
- /* 1953 */
- {35874, S_028AB0_ITEMSIZE(~0u)},
- /* 1954 */
- {35883, S_028AB4_REUSE_OFF(~0u)},
- /* 1955 */
- {35893, S_028AB8_VTX_CNT_EN(~0u)},
- /* 1956 */
- {13808, S_028ABC_LINEAR(~0u)},
- {35904, S_028ABC_FULL_CACHE(~0u)},
- {35915, S_028ABC_HTILE_USES_PRELOAD_WIN(~0u)},
- {35938, S_028ABC_PRELOAD(~0u)},
- {35946, S_028ABC_PREFETCH_WIDTH(~0u)},
- {35961, S_028ABC_PREFETCH_HEIGHT(~0u)},
- {35977, S_028ABC_DST_OUTSIDE_ZERO_TO_ONE(~0u)},
- {36001, S_028ABC_TC_COMPATIBLE(~0u)},
- {13820, S_028ABC_PIPE_ALIGNED(~0u)},
- {13838, S_028ABC_RB_ALIGNED(~0u)},
- /* 1966 */
- {36015, S_028AC0_COMPAREFUNC0(~0u), 8, 1012},
- {36028, S_028AC0_COMPAREVALUE0(~0u)},
- {36042, S_028AC0_COMPAREMASK0(~0u)},
- {20738, S_028AC0_ENABLE0(~0u)},
- /* 1970 */
- {36055, S_028AC4_COMPAREFUNC1(~0u), 8, 1012},
- {36068, S_028AC4_COMPAREVALUE1(~0u)},
- {36082, S_028AC4_COMPAREMASK1(~0u)},
- {20756, S_028AC4_ENABLE1(~0u)},
- /* 1974 */
- {36095, S_028AC8_START_X(~0u)},
- {36103, S_028AC8_START_Y(~0u)},
- {36111, S_028AC8_MAX_X(~0u)},
- {36117, S_028AC8_MAX_Y(~0u)},
- /* 1978 */
- {8904, S_028AD4_STRIDE(~0u)},
- /* 1979 */
- {8904, S_028AE4_STRIDE(~0u)},
- /* 1980 */
- {8904, S_028AF4_STRIDE(~0u)},
- /* 1981 */
- {8904, S_028B04_STRIDE(~0u)},
- /* 1982 */
- {36123, S_028B30_VERTEX_STRIDE(~0u)},
- /* 1983 */
- {36137, S_028B38_MAX_VERT_OUT(~0u)},
+ {65340, S_028C14_S12_X(~0u)},
+ {65346, S_028C14_S12_Y(~0u)},
+ {65352, S_028C14_S13_X(~0u)},
+ {65358, S_028C14_S13_Y(~0u)},
+ {65364, S_028C14_S14_X(~0u)},
+ {65370, S_028C14_S14_Y(~0u)},
+ {65376, S_028C14_S15_X(~0u)},
+ {65382, S_028C14_S15_Y(~0u)},
+ /* 1960 */
+ {65216, S_028C18_S0_X(~0u)},
+ {65221, S_028C18_S0_Y(~0u)},
+ {65226, S_028C18_S1_X(~0u)},
+ {65231, S_028C18_S1_Y(~0u)},
+ {65236, S_028C18_S2_X(~0u)},
+ {65241, S_028C18_S2_Y(~0u)},
+ {65246, S_028C18_S3_X(~0u)},
+ {65251, S_028C18_S3_Y(~0u)},
+ /* 1968 */
+ {65256, S_028C1C_S4_X(~0u)},
+ {65261, S_028C1C_S4_Y(~0u)},
+ {65266, S_028C1C_S5_X(~0u)},
+ {65271, S_028C1C_S5_Y(~0u)},
+ {65276, S_028C1C_S6_X(~0u)},
+ {65281, S_028C1C_S6_Y(~0u)},
+ {65286, S_028C1C_S7_X(~0u)},
+ {65291, S_028C1C_S7_Y(~0u)},
+ /* 1976 */
+ {65296, S_028C20_S8_X(~0u)},
+ {65301, S_028C20_S8_Y(~0u)},
+ {65306, S_028C20_S9_X(~0u)},
+ {65311, S_028C20_S9_Y(~0u)},
+ {65316, S_028C20_S10_X(~0u)},
+ {65322, S_028C20_S10_Y(~0u)},
+ {65328, S_028C20_S11_X(~0u)},
+ {65334, S_028C20_S11_Y(~0u)},
/* 1984 */
- {36150, S_028B50_ACCUM_ISOLINE(~0u)},
- {36164, S_028B50_ACCUM_TRI(~0u)},
- {36174, S_028B50_ACCUM_QUAD(~0u)},
- {36185, S_028B50_DONUT_SPLIT(~0u)},
- {36197, S_028B50_TRAP_SPLIT(~0u)},
- /* 1989 */
- {19897, S_028B54_LS_EN(~0u), 3, 1425},
- {19891, S_028B54_HS_EN(~0u)},
- {19885, S_028B54_ES_EN(~0u), 3, 1428},
- {19879, S_028B54_GS_EN(~0u)},
- {19873, S_028B54_VS_EN(~0u), 3, 1431},
- {36331, S_028B54_DYNAMIC_HS(~0u)},
- {18186, S_028B54_DISPATCH_DRAW_EN(~0u)},
- {36342, S_028B54_DIS_DEALLOC_ACCUM_0(~0u)},
- {36362, S_028B54_DIS_DEALLOC_ACCUM_1(~0u)},
- {36382, S_028B54_VS_WAVE_ID_EN(~0u)},
- {8334, S_028B54_PRIMGEN_EN(~0u)},
- {36396, S_028B54_ORDERED_ID_MODE(~0u)},
- {35854, S_028B54_MAX_PRIMGRP_IN_WAVE(~0u)},
- {36412, S_028B54_GS_FAST_LAUNCH(~0u)},
- /* 2003 */
- {36427, S_028B58_NUM_PATCHES(~0u)},
- {36439, S_028B58_HS_NUM_INPUT_CP(~0u)},
- {36455, S_028B58_HS_NUM_OUTPUT_CP(~0u)},
- /* 2006 */
- {35874, S_028B5C_ITEMSIZE(~0u)},
- /* 2007 */
- {35874, S_028B60_ITEMSIZE(~0u)},
+ {65340, S_028C24_S12_X(~0u)},
+ {65346, S_028C24_S12_Y(~0u)},
+ {65352, S_028C24_S13_X(~0u)},
+ {65358, S_028C24_S13_Y(~0u)},
+ {65364, S_028C24_S14_X(~0u)},
+ {65370, S_028C24_S14_Y(~0u)},
+ {65376, S_028C24_S15_X(~0u)},
+ {65382, S_028C24_S15_Y(~0u)},
+ /* 1992 */
+ {65216, S_028C28_S0_X(~0u)},
+ {65221, S_028C28_S0_Y(~0u)},
+ {65226, S_028C28_S1_X(~0u)},
+ {65231, S_028C28_S1_Y(~0u)},
+ {65236, S_028C28_S2_X(~0u)},
+ {65241, S_028C28_S2_Y(~0u)},
+ {65246, S_028C28_S3_X(~0u)},
+ {65251, S_028C28_S3_Y(~0u)},
+ /* 2000 */
+ {65256, S_028C2C_S4_X(~0u)},
+ {65261, S_028C2C_S4_Y(~0u)},
+ {65266, S_028C2C_S5_X(~0u)},
+ {65271, S_028C2C_S5_Y(~0u)},
+ {65276, S_028C2C_S6_X(~0u)},
+ {65281, S_028C2C_S6_Y(~0u)},
+ {65286, S_028C2C_S7_X(~0u)},
+ {65291, S_028C2C_S7_Y(~0u)},
/* 2008 */
- {35874, S_028B64_ITEMSIZE(~0u)},
- /* 2009 */
- {35874, S_028B68_ITEMSIZE(~0u)},
- /* 2010 */
- {209, S_028B6C_TYPE(~0u), 3, 1434},
- {36561, S_028B6C_PARTITIONING(~0u), 4, 1437},
- {36638, S_028B6C_TOPOLOGY(~0u), 4, 1441},
- {36647, S_028B6C_RESERVED_REDUC_AXIS(~0u)},
- {36667, S_028B6C_DEPRECATED(~0u)},
- {36678, S_028B6C_NUM_DS_WAVES_PER_SIMD(~0u)},
- {36700, S_028B6C_DISABLE_DONUTS(~0u)},
- {34554, S_028B6C_RDREQ_POLICY_CIK(~0u), 3, 1445},
- {34571, S_028B6C_RDREQ_POLICY(~0u)},
- {36839, S_028B6C_DISTRIBUTION_MODE(~0u), 4, 1448},
- {9710, S_028B6C_MTYPE(~0u)},
- /* 2021 */
- {36857, S_028B70_ALPHA_TO_MASK_ENABLE(~0u)},
- {36878, S_028B70_ALPHA_TO_MASK_OFFSET0(~0u)},
- {36900, S_028B70_ALPHA_TO_MASK_OFFSET1(~0u)},
- {36922, S_028B70_ALPHA_TO_MASK_OFFSET2(~0u)},
- {36944, S_028B70_ALPHA_TO_MASK_OFFSET3(~0u)},
- {36966, S_028B70_OFFSET_ROUND(~0u)},
- /* 2027 */
- {36979, S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(~0u)},
- {37007, S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(~0u)},
+ {65296, S_028C30_S8_X(~0u)},
+ {65301, S_028C30_S8_Y(~0u)},
+ {65306, S_028C30_S9_X(~0u)},
+ {65311, S_028C30_S9_Y(~0u)},
+ {65316, S_028C30_S10_X(~0u)},
+ {65322, S_028C30_S10_Y(~0u)},
+ {65328, S_028C30_S11_X(~0u)},
+ {65334, S_028C30_S11_Y(~0u)},
+ /* 2016 */
+ {65340, S_028C34_S12_X(~0u)},
+ {65346, S_028C34_S12_Y(~0u)},
+ {65352, S_028C34_S13_X(~0u)},
+ {65358, S_028C34_S13_Y(~0u)},
+ {65364, S_028C34_S14_X(~0u)},
+ {65370, S_028C34_S14_Y(~0u)},
+ {65376, S_028C34_S15_X(~0u)},
+ {65382, S_028C34_S15_Y(~0u)},
+ /* 2024 */
+ {65388, S_028C38_AA_MASK_X0Y0(~0u)},
+ {65401, S_028C38_AA_MASK_X1Y0(~0u)},
+ /* 2026 */
+ {65414, S_028C3C_AA_MASK_X0Y1(~0u)},
+ {65427, S_028C3C_AA_MASK_X1Y1(~0u)},
+ /* 2028 */
+ {65440, S_028C40_REALIGN_DQUADS_AFTER_N_WAVES(~0u)},
/* 2029 */
- {842, S_028B90_ENABLE(~0u)},
- {18102, S_028B90_CNT(~0u)},
+ {65469, S_028C58_VTX_REUSE_DEPTH(~0u)},
+ /* 2030 */
+ {65485, S_028C5C_DEALLOC_DIST(~0u)},
/* 2031 */
- {37035, S_028B94_STREAMOUT_0_EN(~0u)},
- {37050, S_028B94_STREAMOUT_1_EN(~0u)},
- {37065, S_028B94_STREAMOUT_2_EN(~0u)},
- {37080, S_028B94_STREAMOUT_3_EN(~0u)},
- {37095, S_028B94_RAST_STREAM(~0u)},
- {37107, S_028B94_RAST_STREAM_MASK(~0u)},
- {37124, S_028B94_USE_RAST_STREAM_MASK(~0u)},
- {37145, S_028B94_EN_PRIMS_NEEDED_CNT(~0u)},
- /* 2039 */
- {37165, S_028B98_STREAM_0_BUFFER_EN(~0u)},
- {37184, S_028B98_STREAM_1_BUFFER_EN(~0u)},
- {37203, S_028B98_STREAM_2_BUFFER_EN(~0u)},
- {37222, S_028B98_STREAM_3_BUFFER_EN(~0u)},
- /* 2043 */
- {37241, S_028BD4_DISTANCE_0(~0u)},
- {37252, S_028BD4_DISTANCE_1(~0u)},
- {37263, S_028BD4_DISTANCE_2(~0u)},
- {37274, S_028BD4_DISTANCE_3(~0u)},
- {37285, S_028BD4_DISTANCE_4(~0u)},
- {37296, S_028BD4_DISTANCE_5(~0u)},
- {37307, S_028BD4_DISTANCE_6(~0u)},
- {37318, S_028BD4_DISTANCE_7(~0u)},
- /* 2051 */
- {37329, S_028BD8_DISTANCE_8(~0u)},
- {37340, S_028BD8_DISTANCE_9(~0u)},
- {37351, S_028BD8_DISTANCE_10(~0u)},
- {37363, S_028BD8_DISTANCE_11(~0u)},
- {37375, S_028BD8_DISTANCE_12(~0u)},
- {37387, S_028BD8_DISTANCE_13(~0u)},
- {37399, S_028BD8_DISTANCE_14(~0u)},
- {37411, S_028BD8_DISTANCE_15(~0u)},
- /* 2059 */
- {37423, S_028BDC_EXPAND_LINE_WIDTH(~0u)},
- {37441, S_028BDC_LAST_PIXEL(~0u)},
- {37452, S_028BDC_PERPENDICULAR_ENDCAP_ENA(~0u)},
- {37477, S_028BDC_DX10_DIAMOND_TEST_ENA(~0u)},
- /* 2063 */
- {37499, S_028BE0_MSAA_NUM_SAMPLES(~0u)},
- {37516, S_028BE0_AA_MASK_CENTROID_DTMN(~0u)},
- {37538, S_028BE0_MAX_SAMPLE_DIST(~0u)},
- {37554, S_028BE0_MSAA_EXPOSED_SAMPLES(~0u)},
- {37575, S_028BE0_DETAIL_TO_EXPOSED_MODE(~0u)},
- {37598, S_028BE0_COVERAGE_TO_SHADER_SELECT(~0u)},
+ {50194, S_028C64_TILE_MAX(~0u)},
+ {65498, S_028C64_FMASK_TILE_MAX(~0u)},
+ /* 2033 */
+ {50194, S_028C68_TILE_MAX(~0u)},
+ /* 2034 */
+ {48842, S_028C6C_SLICE_START(~0u)},
+ {48854, S_028C6C_SLICE_MAX(~0u)},
+ /* 2036 */
+ {65564, S_028C70_ENDIAN(~0u), 4, 1342},
+ {21132, S_028C70_FORMAT(~0u), 23, 1346},
+ {45121, S_028C70_LINEAR_GENERAL(~0u)},
+ {65931, S_028C70_NUMBER_TYPE(~0u), 8, 1369},
+ {65987, S_028C70_COMP_SWAP(~0u), 4, 1377},
+ {57519, S_028C70_FAST_CLEAR(~0u)},
+ {49428, S_028C70_COMPRESSION(~0u)},
+ {65997, S_028C70_BLEND_CLAMP(~0u)},
+ {66009, S_028C70_BLEND_BYPASS(~0u)},
+ {66022, S_028C70_SIMPLE_FLOAT(~0u)},
+ {64989, S_028C70_ROUND_MODE(~0u)},
+ {66035, S_028C70_CMASK_IS_LINEAR(~0u)},
+ {66262, S_028C70_BLEND_OPT_DONT_RD_DST(~0u), 8, 1381},
+ {66284, S_028C70_BLEND_OPT_DISCARD_PIXEL(~0u), 8, 1381},
+ {66308, S_028C70_FMASK_COMPRESSION_DISABLE(~0u)},
+ {66334, S_028C70_FMASK_COMPRESS_1FRAG_ONLY(~0u)},
+ {66360, S_028C70_DCC_ENABLE(~0u)},
+ {66371, S_028C70_CMASK_ADDR_TYPE(~0u)},
+ /* 2054 */
+ {50022, S_028C74_TILE_MODE_INDEX(~0u)},
+ {66387, S_028C74_FMASK_TILE_MODE_INDEX(~0u)},
+ {66409, S_028C74_FMASK_BANK_HEIGHT(~0u)},
+ {50010, S_028C74_NUM_SAMPLES(~0u)},
+ {66427, S_028C74_NUM_FRAGMENTS(~0u)},
+ {66441, S_028C74_FORCE_DST_ALPHA_1(~0u)},
+ /* 2060 */
+ {52586, S_028C78_OVERWRITE_COMBINER_DISABLE(~0u)},
+ {66459, S_028C78_KEY_CLEAR_ENABLE(~0u)},
+ {66476, S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(~0u)},
+ {66504, S_028C78_MIN_COMPRESSED_BLOCK_SIZE(~0u)},
+ {66530, S_028C78_MAX_COMPRESSED_BLOCK_SIZE(~0u)},
+ {42723, S_028C78_COLOR_TRANSFORM(~0u)},
+ {66556, S_028C78_INDEPENDENT_64B_BLOCKS(~0u)},
+ {66579, S_028C78_LOSSY_RGB_PRECISION(~0u)},
+ {66599, S_028C78_LOSSY_ALPHA_PRECISION(~0u)},
/* 2069 */
- {37624, S_028BE4_PIX_CENTER(~0u)},
- {37685, S_028BE4_ROUND_MODE(~0u), 4, 1452},
- {37901, S_028BE4_QUANT_MODE(~0u), 8, 1456},
- /* 2072 */
- {37912, S_028BF8_S0_X(~0u)},
- {37917, S_028BF8_S0_Y(~0u)},
- {37922, S_028BF8_S1_X(~0u)},
- {37927, S_028BF8_S1_Y(~0u)},
- {37932, S_028BF8_S2_X(~0u)},
- {37937, S_028BF8_S2_Y(~0u)},
- {37942, S_028BF8_S3_X(~0u)},
- {37947, S_028BF8_S3_Y(~0u)},
- /* 2080 */
- {37952, S_028BFC_S4_X(~0u)},
- {37957, S_028BFC_S4_Y(~0u)},
- {37962, S_028BFC_S5_X(~0u)},
- {37967, S_028BFC_S5_Y(~0u)},
- {37972, S_028BFC_S6_X(~0u)},
- {37977, S_028BFC_S6_Y(~0u)},
- {37982, S_028BFC_S7_X(~0u)},
- {37987, S_028BFC_S7_Y(~0u)},
- /* 2088 */
- {37992, S_028C00_S8_X(~0u)},
- {37997, S_028C00_S8_Y(~0u)},
- {38002, S_028C00_S9_X(~0u)},
- {38007, S_028C00_S9_Y(~0u)},
- {38012, S_028C00_S10_X(~0u)},
- {38018, S_028C00_S10_Y(~0u)},
- {38024, S_028C00_S11_X(~0u)},
- {38030, S_028C00_S11_Y(~0u)},
+ {50194, S_028C80_TILE_MAX(~0u)},
+ /* 2070 */
+ {50194, S_028C88_TILE_MAX(~0u)},
+ /* 2071 */
+ {33200, S_008008_ME0PIPE1_CMDFIFO_AVAIL(~0u)},
+ {33223, S_008008_ME0PIPE1_CF_RQ_PENDING(~0u)},
+ {33246, S_008008_ME0PIPE1_PF_RQ_PENDING(~0u)},
+ {33269, S_008008_ME1PIPE0_RQ_PENDING(~0u)},
+ {33289, S_008008_ME1PIPE1_RQ_PENDING(~0u)},
+ {33309, S_008008_ME1PIPE2_RQ_PENDING(~0u)},
+ {33329, S_008008_ME1PIPE3_RQ_PENDING(~0u)},
+ {33349, S_008008_ME2PIPE0_RQ_PENDING(~0u)},
+ {33369, S_008008_ME2PIPE1_RQ_PENDING(~0u)},
+ {33389, S_008008_ME2PIPE2_RQ_PENDING(~0u)},
+ {33409, S_008008_ME2PIPE3_RQ_PENDING(~0u)},
+ {33429, S_008008_RLC_RQ_PENDING(~0u)},
+ {66621, S_008008_UTCL2_BUSY(~0u)},
+ {66632, S_008008_EA_BUSY(~0u)},
+ {66640, S_008008_RMI_BUSY(~0u)},
+ {66649, S_008008_UTCL2_RQ_PENDING(~0u)},
+ {66666, S_008008_CPF_RQ_PENDING(~0u)},
+ {66681, S_008008_EA_LINK_BUSY(~0u)},
+ {33444, S_008008_RLC_BUSY(~0u)},
+ {33453, S_008008_TC_BUSY(~0u)},
+ {33461, S_008008_TCC_CC_RESIDENT(~0u)},
+ {33477, S_008008_CPF_BUSY(~0u)},
+ {33486, S_008008_CPC_BUSY(~0u)},
+ {33495, S_008008_CPG_BUSY(~0u)},
+ {66694, S_008008_CPAXI_BUSY(~0u)},
/* 2096 */
- {38036, S_028C04_S12_X(~0u)},
- {38042, S_028C04_S12_Y(~0u)},
- {38048, S_028C04_S13_X(~0u)},
- {38054, S_028C04_S13_Y(~0u)},
- {38060, S_028C04_S14_X(~0u)},
- {38066, S_028C04_S14_Y(~0u)},
- {38072, S_028C04_S15_X(~0u)},
- {38078, S_028C04_S15_Y(~0u)},
- /* 2104 */
- {37912, S_028C08_S0_X(~0u)},
- {37917, S_028C08_S0_Y(~0u)},
- {37922, S_028C08_S1_X(~0u)},
- {37927, S_028C08_S1_Y(~0u)},
- {37932, S_028C08_S2_X(~0u)},
- {37937, S_028C08_S2_Y(~0u)},
- {37942, S_028C08_S3_X(~0u)},
- {37947, S_028C08_S3_Y(~0u)},
- /* 2112 */
- {37952, S_028C0C_S4_X(~0u)},
- {37957, S_028C0C_S4_Y(~0u)},
- {37962, S_028C0C_S5_X(~0u)},
- {37967, S_028C0C_S5_Y(~0u)},
- {37972, S_028C0C_S6_X(~0u)},
- {37977, S_028C0C_S6_Y(~0u)},
- {37982, S_028C0C_S7_X(~0u)},
- {37987, S_028C0C_S7_Y(~0u)},
+ {33504, S_008010_ME0PIPE0_CMDFIFO_AVAIL(~0u)},
+ {66705, S_008010_RSMU_RQ_PENDING(~0u)},
+ {33543, S_008010_ME0PIPE0_CF_RQ_PENDING(~0u)},
+ {33566, S_008010_ME0PIPE0_PF_RQ_PENDING(~0u)},
+ {33589, S_008010_GDS_DMA_RQ_PENDING(~0u)},
+ {33608, S_008010_DB_CLEAN(~0u)},
+ {33617, S_008010_CB_CLEAN(~0u)},
+ {33626, S_008010_TA_BUSY(~0u)},
+ {33634, S_008010_GDS_BUSY(~0u)},
+ {33643, S_008010_WD_BUSY_NO_DMA(~0u)},
+ {33658, S_008010_VGT_BUSY(~0u)},
+ {33667, S_008010_IA_BUSY_NO_DMA(~0u)},
+ {33682, S_008010_IA_BUSY(~0u)},
+ {33690, S_008010_SX_BUSY(~0u)},
+ {33698, S_008010_WD_BUSY(~0u)},
+ {33706, S_008010_SPI_BUSY(~0u)},
+ {33715, S_008010_BCI_BUSY(~0u)},
+ {33724, S_008010_SC_BUSY(~0u)},
+ {33732, S_008010_PA_BUSY(~0u)},
+ {33740, S_008010_DB_BUSY(~0u)},
+ {33748, S_008010_CP_COHERENCY_BUSY(~0u)},
+ {32311, S_008010_CP_BUSY(~0u)},
+ {32533, S_008010_CB_BUSY(~0u)},
+ {33766, S_008010_GUI_ACTIVE(~0u)},
/* 2120 */
- {37992, S_028C10_S8_X(~0u)},
- {37997, S_028C10_S8_Y(~0u)},
- {38002, S_028C10_S9_X(~0u)},
- {38007, S_028C10_S9_Y(~0u)},
- {38012, S_028C10_S10_X(~0u)},
- {38018, S_028C10_S10_Y(~0u)},
- {38024, S_028C10_S11_X(~0u)},
- {38030, S_028C10_S11_Y(~0u)},
- /* 2128 */
- {38036, S_028C14_S12_X(~0u)},
- {38042, S_028C14_S12_Y(~0u)},
- {38048, S_028C14_S13_X(~0u)},
- {38054, S_028C14_S13_Y(~0u)},
- {38060, S_028C14_S14_X(~0u)},
- {38066, S_028C14_S14_Y(~0u)},
- {38072, S_028C14_S15_X(~0u)},
- {38078, S_028C14_S15_Y(~0u)},
- /* 2136 */
- {37912, S_028C18_S0_X(~0u)},
- {37917, S_028C18_S0_Y(~0u)},
- {37922, S_028C18_S1_X(~0u)},
- {37927, S_028C18_S1_Y(~0u)},
- {37932, S_028C18_S2_X(~0u)},
- {37937, S_028C18_S2_Y(~0u)},
- {37942, S_028C18_S3_X(~0u)},
- {37947, S_028C18_S3_Y(~0u)},
+ {33608, S_008014_DB_CLEAN(~0u)},
+ {33617, S_008014_CB_CLEAN(~0u)},
+ {66640, S_008014_RMI_BUSY(~0u)},
+ {33715, S_008014_BCI_BUSY(~0u)},
+ {33658, S_008014_VGT_BUSY(~0u)},
+ {33732, S_008014_PA_BUSY(~0u)},
+ {33626, S_008014_TA_BUSY(~0u)},
+ {33690, S_008014_SX_BUSY(~0u)},
+ {33706, S_008014_SPI_BUSY(~0u)},
+ {33724, S_008014_SC_BUSY(~0u)},
+ {33740, S_008014_DB_BUSY(~0u)},
+ {32533, S_008014_CB_BUSY(~0u)},
+ /* 2132 */
+ {33608, S_008018_DB_CLEAN(~0u)},
+ {33617, S_008018_CB_CLEAN(~0u)},
+ {66640, S_008018_RMI_BUSY(~0u)},
+ {33715, S_008018_BCI_BUSY(~0u)},
+ {33658, S_008018_VGT_BUSY(~0u)},
+ {33732, S_008018_PA_BUSY(~0u)},
+ {33626, S_008018_TA_BUSY(~0u)},
+ {33690, S_008018_SX_BUSY(~0u)},
+ {33706, S_008018_SPI_BUSY(~0u)},
+ {33724, S_008018_SC_BUSY(~0u)},
+ {33740, S_008018_DB_BUSY(~0u)},
+ {32533, S_008018_CB_BUSY(~0u)},
/* 2144 */
- {37952, S_028C1C_S4_X(~0u)},
- {37957, S_028C1C_S4_Y(~0u)},
- {37962, S_028C1C_S5_X(~0u)},
- {37967, S_028C1C_S5_Y(~0u)},
- {37972, S_028C1C_S6_X(~0u)},
- {37977, S_028C1C_S6_Y(~0u)},
- {37982, S_028C1C_S7_X(~0u)},
- {37987, S_028C1C_S7_Y(~0u)},
- /* 2152 */
- {37992, S_028C20_S8_X(~0u)},
- {37997, S_028C20_S8_Y(~0u)},
- {38002, S_028C20_S9_X(~0u)},
- {38007, S_028C20_S9_Y(~0u)},
- {38012, S_028C20_S10_X(~0u)},
- {38018, S_028C20_S10_Y(~0u)},
- {38024, S_028C20_S11_X(~0u)},
- {38030, S_028C20_S11_Y(~0u)},
- /* 2160 */
- {38036, S_028C24_S12_X(~0u)},
- {38042, S_028C24_S12_Y(~0u)},
- {38048, S_028C24_S13_X(~0u)},
- {38054, S_028C24_S13_Y(~0u)},
- {38060, S_028C24_S14_X(~0u)},
- {38066, S_028C24_S14_Y(~0u)},
- {38072, S_028C24_S15_X(~0u)},
- {38078, S_028C24_S15_Y(~0u)},
+ {33608, S_008038_DB_CLEAN(~0u)},
+ {33617, S_008038_CB_CLEAN(~0u)},
+ {66640, S_008038_RMI_BUSY(~0u)},
+ {33715, S_008038_BCI_BUSY(~0u)},
+ {33658, S_008038_VGT_BUSY(~0u)},
+ {33732, S_008038_PA_BUSY(~0u)},
+ {33626, S_008038_TA_BUSY(~0u)},
+ {33690, S_008038_SX_BUSY(~0u)},
+ {33706, S_008038_SPI_BUSY(~0u)},
+ {33724, S_008038_SC_BUSY(~0u)},
+ {33740, S_008038_DB_BUSY(~0u)},
+ {32533, S_008038_CB_BUSY(~0u)},
+ /* 2156 */
+ {33608, S_00803C_DB_CLEAN(~0u)},
+ {33617, S_00803C_CB_CLEAN(~0u)},
+ {66640, S_00803C_RMI_BUSY(~0u)},
+ {33715, S_00803C_BCI_BUSY(~0u)},
+ {33658, S_00803C_VGT_BUSY(~0u)},
+ {33732, S_00803C_PA_BUSY(~0u)},
+ {33626, S_00803C_TA_BUSY(~0u)},
+ {33690, S_00803C_SX_BUSY(~0u)},
+ {33706, S_00803C_SPI_BUSY(~0u)},
+ {33724, S_00803C_SC_BUSY(~0u)},
+ {33740, S_00803C_DB_BUSY(~0u)},
+ {32533, S_00803C_CB_BUSY(~0u)},
/* 2168 */
- {37912, S_028C28_S0_X(~0u)},
- {37917, S_028C28_S0_Y(~0u)},
- {37922, S_028C28_S1_X(~0u)},
- {37927, S_028C28_S1_Y(~0u)},
- {37932, S_028C28_S2_X(~0u)},
- {37937, S_028C28_S2_Y(~0u)},
- {37942, S_028C28_S3_X(~0u)},
- {37947, S_028C28_S3_Y(~0u)},
- /* 2176 */
- {37952, S_028C2C_S4_X(~0u)},
- {37957, S_028C2C_S4_Y(~0u)},
- {37962, S_028C2C_S5_X(~0u)},
- {37967, S_028C2C_S5_Y(~0u)},
- {37972, S_028C2C_S6_X(~0u)},
- {37977, S_028C2C_S6_Y(~0u)},
- {37982, S_028C2C_S7_X(~0u)},
- {37987, S_028C2C_S7_Y(~0u)},
- /* 2184 */
- {37992, S_028C30_S8_X(~0u)},
- {37997, S_028C30_S8_Y(~0u)},
- {38002, S_028C30_S9_X(~0u)},
- {38007, S_028C30_S9_Y(~0u)},
- {38012, S_028C30_S10_X(~0u)},
- {38018, S_028C30_S10_Y(~0u)},
- {38024, S_028C30_S11_X(~0u)},
- {38030, S_028C30_S11_Y(~0u)},
- /* 2192 */
- {38036, S_028C34_S12_X(~0u)},
- {38042, S_028C34_S12_Y(~0u)},
- {38048, S_028C34_S13_X(~0u)},
- {38054, S_028C34_S13_Y(~0u)},
- {38060, S_028C34_S14_X(~0u)},
- {38066, S_028C34_S14_Y(~0u)},
- {38072, S_028C34_S15_X(~0u)},
- {38078, S_028C34_S15_Y(~0u)},
- /* 2200 */
- {38084, S_028C38_AA_MASK_X0Y0(~0u)},
- {38097, S_028C38_AA_MASK_X1Y0(~0u)},
- /* 2202 */
- {38110, S_028C3C_AA_MASK_X0Y1(~0u)},
- {38123, S_028C3C_AA_MASK_X1Y1(~0u)},
- /* 2204 */
- {38136, S_028C40_REALIGN_DQUADS_AFTER_N_WAVES(~0u)},
- {18019, S_028C40_LOAD_COLLISION_WAVEID(~0u)},
- {18041, S_028C40_LOAD_INTRAWAVE_COLLISION(~0u)},
- /* 2207 */
- {38165, S_028C58_VTX_REUSE_DEPTH(~0u)},
- /* 2208 */
- {38181, S_028C5C_DEALLOC_DIST(~0u)},
- /* 2209 */
- {21993, S_028C64_TILE_MAX(~0u)},
- {38194, S_028C64_FMASK_TILE_MAX(~0u)},
- /* 2211 */
- {21993, S_028C68_TILE_MAX(~0u)},
- /* 2212 */
- {20601, S_028C6C_SLICE_START(~0u)},
- {20613, S_028C6C_SLICE_MAX(~0u)},
- {38209, S_028C6C_MIP_LEVEL(~0u)},
- /* 2215 */
- {38270, S_028C70_ENDIAN(~0u), 4, 1464},
- {9248, S_028C70_FORMAT(~0u), 23, 1468},
- {16489, S_028C70_LINEAR_GENERAL(~0u)},
- {38637, S_028C70_NUMBER_TYPE(~0u), 8, 1491},
- {38693, S_028C70_COMP_SWAP(~0u), 4, 1499},
- {29505, S_028C70_FAST_CLEAR(~0u)},
- {21193, S_028C70_COMPRESSION(~0u)},
- {38703, S_028C70_BLEND_CLAMP(~0u)},
- {38715, S_028C70_BLEND_BYPASS(~0u)},
- {38728, S_028C70_SIMPLE_FLOAT(~0u)},
- {37685, S_028C70_ROUND_MODE(~0u)},
- {38741, S_028C70_CMASK_IS_LINEAR(~0u)},
- {38968, S_028C70_BLEND_OPT_DONT_RD_DST(~0u), 8, 1503},
- {38990, S_028C70_BLEND_OPT_DISCARD_PIXEL(~0u), 8, 1503},
- {39014, S_028C70_FMASK_COMPRESSION_DISABLE(~0u)},
- {39040, S_028C70_FMASK_COMPRESS_1FRAG_ONLY(~0u)},
- {39066, S_028C70_DCC_ENABLE(~0u)},
- {39077, S_028C70_CMASK_ADDR_TYPE(~0u)},
- /* 2233 */
- {21821, S_028C74_TILE_MODE_INDEX(~0u)},
- {39093, S_028C74_FMASK_TILE_MODE_INDEX(~0u)},
- {39115, S_028C74_FMASK_BANK_HEIGHT(~0u)},
- {21809, S_028C74_NUM_SAMPLES(~0u)},
- {39133, S_028C74_NUM_FRAGMENTS(~0u)},
- {39147, S_028C74_FORCE_DST_ALPHA_1(~0u)},
- {39165, S_028C74_MIP0_DEPTH(~0u)},
- {13803, S_028C74_META_LINEAR(~0u)},
- {39176, S_028C74_COLOR_SW_MODE(~0u)},
- {39190, S_028C74_FMASK_SW_MODE(~0u)},
- {39204, S_028C74_RESOURCE_TYPE(~0u), 4, 1511},
- {13838, S_028C74_RB_ALIGNED(~0u)},
- {13820, S_028C74_PIPE_ALIGNED(~0u)},
- /* 2246 */
- {24465, S_028C78_OVERWRITE_COMBINER_DISABLE(~0u)},
- {39218, S_028C78_KEY_CLEAR_ENABLE(~0u)},
- {39235, S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(~0u)},
- {39263, S_028C78_MIN_COMPRESSED_BLOCK_SIZE(~0u)},
- {39289, S_028C78_MAX_COMPRESSED_BLOCK_SIZE(~0u)},
- {13932, S_028C78_COLOR_TRANSFORM(~0u)},
- {39315, S_028C78_INDEPENDENT_64B_BLOCKS(~0u)},
- {39338, S_028C78_LOSSY_RGB_PRECISION(~0u)},
- {39358, S_028C78_LOSSY_ALPHA_PRECISION(~0u)},
- /* 2255 */
- {21993, S_028C80_TILE_MAX(~0u)},
- /* 2256 */
- {21993, S_028C88_TILE_MAX(~0u)},
- /* 2257 */
- {35747, S_03092C_RESET_EN(~0u)},
- {39380, S_03092C_MATCH_ALL_BITS(~0u)},
- /* 2259 */
- {39395, S_030944_BASE_HI(~0u)},
- /* 2260 */
- {39395, S_03094C_BASE_HI(~0u)},
- /* 2261 */
- {39395, S_030954_BASE_HI(~0u)},
- /* 2262 */
- {39395, S_03095C_BASE_HI(~0u)},
- /* 2263 */
- {35756, S_030960_PRIMGROUP_SIZE(~0u)},
- {35771, S_030960_PARTIAL_VS_WAVE_ON(~0u)},
- {35790, S_030960_SWITCH_ON_EOP(~0u)},
- {35804, S_030960_PARTIAL_ES_WAVE_ON(~0u)},
- {35823, S_030960_SWITCH_ON_EOI(~0u)},
- {35837, S_030960_WD_SWITCH_ON_EOP(~0u)},
- {39403, S_030960_EN_INST_OPT_BASIC(~0u)},
- {39421, S_030960_EN_INST_OPT_ADV(~0u)},
- {39437, S_030960_HW_USE_ONLY(~0u)},
- /* 2272 */
- {39449, S_030D24_DWB(~0u)},
- {21124, S_030D24_DIRTY(~0u)},
- /* 2274 */
- {39453, S_030E08_GRAD_ADJ_0(~0u)},
- {39464, S_030E08_GRAD_ADJ_1(~0u)},
- {39475, S_030E08_GRAD_ADJ_2(~0u)},
- {39486, S_030E08_GRAD_ADJ_3(~0u)},
- /* 2278 */
- {15469, S_031100_GPR_WRITE_PRIORITY(~0u)},
- {15488, S_031100_EXP_PRIORITY_ORDER(~0u)},
- {15507, S_031100_ENABLE_SQG_TOP_EVENTS(~0u)},
- {15529, S_031100_ENABLE_SQG_BOP_EVENTS(~0u)},
- {15551, S_031100_RSRC_MGMT_RESET(~0u)},
- {39497, S_031100_TTRACE_STALL_ALL(~0u)},
- {39514, S_031100_ALLOC_ARB_LRU_ENA(~0u)},
- {39532, S_031100_EXP_ARB_LRU_ENA(~0u)},
- {39548, S_031100_PS_PKR_PRIORITY_CNTL(~0u)},
+ {34175, S_0301F0_TC_NC_ACTION_ENA(~0u)},
+ {66721, S_0301F0_TC_WC_ACTION_ENA(~0u)},
+ {66738, S_0301F0_TC_INV_METADATA_ACTION_ENA(~0u)},
+ {34192, S_0301F0_TCL1_VOL_ACTION_ENA(~0u)},
+ {34230, S_0301F0_TC_WB_ACTION_ENA(~0u)},
+ {34021, S_0301F0_TCL1_ACTION_ENA(~0u)},
+ {34037, S_0301F0_TC_ACTION_ENA(~0u)},
+ {34051, S_0301F0_CB_ACTION_ENA(~0u)},
+ {34065, S_0301F0_DB_ACTION_ENA(~0u)},
+ {34079, S_0301F0_SH_KCACHE_ACTION_ENA(~0u)},
+ {34247, S_0301F0_SH_KCACHE_VOL_ACTION_ENA(~0u)},
+ {34100, S_0301F0_SH_ICACHE_ACTION_ENA(~0u)},
+ {34272, S_0301F0_SH_KCACHE_WB_ACTION_ENA(~0u)},
+ /* 2181 */
+ {34331, S_0301FC_MEID(~0u)},
+ {1005, S_0301FC_STATUS(~0u)},
+ /* 2183 */
+ {34350, S_008210_MEC1_BUSY(~0u)},
+ {34360, S_008210_MEC2_BUSY(~0u)},
+ {34370, S_008210_DC0_BUSY(~0u)},
+ {34379, S_008210_DC1_BUSY(~0u)},
+ {34388, S_008210_RCIU1_BUSY(~0u)},
+ {34399, S_008210_RCIU2_BUSY(~0u)},
+ {34410, S_008210_ROQ1_BUSY(~0u)},
+ {34420, S_008210_ROQ2_BUSY(~0u)},
+ {34430, S_008210_TCIU_BUSY(~0u)},
+ {34440, S_008210_SCRATCH_RAM_BUSY(~0u)},
+ {34457, S_008210_QU_BUSY(~0u)},
+ {66765, S_008210_UTCL2IU_BUSY(~0u)},
+ {66778, S_008210_SAVE_RESTORE_BUSY(~0u)},
+ {34478, S_008210_CPG_CPC_BUSY(~0u)},
+ {34491, S_008210_CPF_CPC_BUSY(~0u)},
+ {33486, S_008210_CPC_BUSY(~0u)},
+ /* 2199 */
+ {34988, S_008218_RCIU_TX_FREE_STALL(~0u)},
+ {35007, S_008218_RCIU_PRIV_VIOLATION(~0u)},
+ {35027, S_008218_TCIU_TX_FREE_STALL(~0u)},
+ {35046, S_008218_MEC1_DECODING_PACKET(~0u)},
+ {35067, S_008218_MEC1_WAIT_ON_RCIU(~0u)},
+ {35085, S_008218_MEC1_WAIT_ON_RCIU_READ(~0u)},
+ {35108, S_008218_MEC1_WAIT_ON_ROQ_DATA(~0u)},
+ {35130, S_008218_MEC2_DECODING_PACKET(~0u)},
+ {35151, S_008218_MEC2_WAIT_ON_RCIU(~0u)},
+ {35169, S_008218_MEC2_WAIT_ON_RCIU_READ(~0u)},
+ {35192, S_008218_MEC2_WAIT_ON_ROQ_DATA(~0u)},
+ {66796, S_008218_UTCL2IU_WAITING_ON_FREE(~0u)},
+ {66820, S_008218_UTCL2IU_WAITING_ON_TAGS(~0u)},
+ {66844, S_008218_UTCL1_WAITING_ON_TRANS(~0u)},
+ /* 2213 */
+ {35285, S_00821C_POST_WPTR_GFX_BUSY(~0u)},
+ {35304, S_00821C_CSF_BUSY(~0u)},
+ {35313, S_00821C_ROQ_ALIGN_BUSY(~0u)},
+ {35328, S_00821C_ROQ_RING_BUSY(~0u)},
+ {35342, S_00821C_ROQ_INDIRECT1_BUSY(~0u)},
+ {35361, S_00821C_ROQ_INDIRECT2_BUSY(~0u)},
+ {35380, S_00821C_ROQ_STATE_BUSY(~0u)},
+ {35395, S_00821C_ROQ_CE_RING_BUSY(~0u)},
+ {35412, S_00821C_ROQ_CE_INDIRECT1_BUSY(~0u)},
+ {35434, S_00821C_ROQ_CE_INDIRECT2_BUSY(~0u)},
+ {35456, S_00821C_SEMAPHORE_BUSY(~0u)},
+ {35471, S_00821C_INTERRUPT_BUSY(~0u)},
+ {34430, S_00821C_TCIU_BUSY(~0u)},
+ {35486, S_00821C_HQD_BUSY(~0u)},
+ {35495, S_00821C_PRT_BUSY(~0u)},
+ {66765, S_00821C_UTCL2IU_BUSY(~0u)},
+ {35504, S_00821C_CPF_GFX_BUSY(~0u)},
+ {35517, S_00821C_CPF_CMP_BUSY(~0u)},
+ {35530, S_00821C_GRBM_CPF_STAT_BUSY(~0u)},
+ {35549, S_00821C_CPC_CPF_BUSY(~0u)},
+ {33477, S_00821C_CPF_BUSY(~0u)},
+ /* 2234 */
+ {36137, S_008224_RING_FETCHING_DATA(~0u)},
+ {36156, S_008224_INDR1_FETCHING_DATA(~0u)},
+ {36176, S_008224_INDR2_FETCHING_DATA(~0u)},
+ {36196, S_008224_STATE_FETCHING_DATA(~0u)},
+ {36216, S_008224_TCIU_WAITING_ON_FREE(~0u)},
+ {36237, S_008224_TCIU_WAITING_ON_TAGS(~0u)},
+ {66796, S_008224_UTCL2IU_WAITING_ON_FREE(~0u)},
+ {66820, S_008224_UTCL2IU_WAITING_ON_TAGS(~0u)},
+ {66867, S_008224_GFX_UTCL1_WAITING_ON_TRANS(~0u)},
+ {66894, S_008224_CMP_UTCL1_WAITING_ON_TRANS(~0u)},
+ {66921, S_008224_RCIU_WAITING_ON_FREE(~0u)},
+ /* 2245 */
+ {37027, S_008670_CE_TO_CSF_NOT_RDY_TO_RCV(~0u)},
+ {37052, S_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(~0u)},
+ {37090, S_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(~0u)},
+ {37131, S_008670_CE_TO_RAM_INIT_NOT_RDY(~0u)},
+ {37154, S_008670_CE_TO_RAM_DUMP_NOT_RDY(~0u)},
+ {37177, S_008670_CE_TO_RAM_WRITE_NOT_RDY(~0u)},
+ {37201, S_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(~0u)},
+ {37231, S_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(~0u)},
+ {37260, S_008670_CE_WAITING_ON_BUFFER_DATA(~0u)},
+ {37286, S_008670_CE_WAITING_ON_CE_BUFFER_FLAG(~0u)},
+ {37315, S_008670_CE_WAITING_ON_DE_COUNTER(~0u)},
+ {37340, S_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(~0u)},
+ {36216, S_008670_TCIU_WAITING_ON_FREE(~0u)},
+ {36237, S_008670_TCIU_WAITING_ON_TAGS(~0u)},
+ {37375, S_008670_CE_STALLED_ON_TC_WR_CONFIRM(~0u)},
+ {37403, S_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(~0u)},
+ {66796, S_008670_UTCL2IU_WAITING_ON_FREE(~0u)},
+ {66820, S_008670_UTCL2IU_WAITING_ON_TAGS(~0u)},
+ {66844, S_008670_UTCL1_WAITING_ON_TRANS(~0u)},
+ /* 2264 */
+ {35328, S_008680_ROQ_RING_BUSY(~0u)},
+ {35342, S_008680_ROQ_INDIRECT1_BUSY(~0u)},
+ {35361, S_008680_ROQ_INDIRECT2_BUSY(~0u)},
+ {35380, S_008680_ROQ_STATE_BUSY(~0u)},
+ {38633, S_008680_DC_BUSY(~0u)},
+ {66765, S_008680_UTCL2IU_BUSY(~0u)},
+ {38641, S_008680_PFP_BUSY(~0u)},
+ {38650, S_008680_MEQ_BUSY(~0u)},
+ {38659, S_008680_ME_BUSY(~0u)},
+ {38667, S_008680_QUERY_BUSY(~0u)},
+ {35456, S_008680_SEMAPHORE_BUSY(~0u)},
+ {35471, S_008680_INTERRUPT_BUSY(~0u)},
+ {38678, S_008680_SURFACE_SYNC_BUSY(~0u)},
+ {32235, S_008680_DMA_BUSY(~0u)},
+ {38696, S_008680_RCIU_BUSY(~0u)},
+ {34440, S_008680_SCRATCH_RAM_BUSY(~0u)},
+ {38719, S_008680_CE_BUSY(~0u)},
+ {34430, S_008680_TCIU_BUSY(~0u)},
+ {35395, S_008680_ROQ_CE_RING_BUSY(~0u)},
+ {35412, S_008680_ROQ_CE_INDIRECT1_BUSY(~0u)},
+ {35434, S_008680_ROQ_CE_INDIRECT2_BUSY(~0u)},
+ {32311, S_008680_CP_BUSY(~0u)},
+ /* 2286 */
+ {22130, S_030908_PRIM_TYPE(~0u), 29, 26},
/* 2287 */
- {15820, S_031104_VTX_DONE_DELAY(~0u)},
- {15835, S_031104_INTERP_ONE_PRIM_PER_ROW(~0u)},
- {39569, S_031104_BATON_RESET_DISABLE(~0u)},
- {15859, S_031104_PC_LIMIT_ENABLE(~0u)},
- {15875, S_031104_PC_LIMIT_STRICT(~0u)},
- {39589, S_031104_CRC_SIMD_ID_WADDR_DISABLE(~0u)},
- {39615, S_031104_LBPW_CU_CHK_MODE(~0u)},
- {39632, S_031104_LBPW_CU_CHK_CNT(~0u)},
- {39648, S_031104_CSC_PWR_SAVE_DISABLE(~0u)},
- {39669, S_031104_CSG_PWR_SAVE_DISABLE(~0u)},
- {15891, S_031104_PC_LIMIT_SIZE(~0u)},
- /* 2298 */
- {39690, S_031108_CONTEXT_SAVE_WAIT_GDS_REQUEST_CYCLE_OVHD(~0u)},
- {39731, S_031108_CONTEXT_SAVE_WAIT_GDS_GRANT_CYCLE_OVHD(~0u)},
- /* 2300 */
- {17939, S_00B1F0_SCRATCH_EN(~0u)},
- {17950, S_00B1F0_USER_SGPR(~0u)},
- {17960, S_00B1F0_TRAP_PRESENT(~0u)},
- {18011, S_00B1F0_EXCP_EN(~0u)},
- {18092, S_00B1F0_VGPR_COMP_CNT(~0u)},
- {18122, S_00B1F0_OC_LDS_EN(~0u)},
- {17991, S_00B1F0_LDS_SIZE(~0u)},
- {18066, S_00B1F0_SKIP_USGPR0(~0u)},
- {18078, S_00B1F0_USER_SGPR_MSB(~0u)},
- /* 2309 */
- {18214, S_00B204_GROUP_FIFO_DEPTH(~0u)},
- {39770, S_00B204_SPI_SHADER_LATE_ALLOC_GS(~0u)},
- /* 2311 */
- {17737, S_00B214_MEM_BASE(~0u)},
+ {203, S_03090C_INDEX_TYPE(~0u), 2, 55},
+ {66942, S_03090C_PRIMGEN_EN(~0u)},
+ /* 2289 */
+ {22655, S_03092C_RESET_EN(~0u)},
+ {66953, S_03092C_MATCH_ALL_BITS(~0u)},
+ /* 2291 */
+ {36906, S_03093C_OFFCHIP_BUFFERING(~0u)},
+ {38874, S_03093C_OFFCHIP_GRANULARITY(~0u)},
+ /* 2293 */
+ {880, S_030944_BASE_HI(~0u)},
+ /* 2294 */
+ {880, S_03094C_BASE_HI(~0u)},
+ /* 2295 */
+ {880, S_030954_BASE_HI(~0u)},
+ /* 2296 */
+ {880, S_03095C_BASE_HI(~0u)},
+ /* 2297 */
+ {63210, S_030960_PRIMGROUP_SIZE(~0u)},
+ {63225, S_030960_PARTIAL_VS_WAVE_ON(~0u)},
+ {63244, S_030960_SWITCH_ON_EOP(~0u)},
+ {63258, S_030960_PARTIAL_ES_WAVE_ON(~0u)},
+ {63277, S_030960_SWITCH_ON_EOI(~0u)},
+ {63291, S_030960_WD_SWITCH_ON_EOP(~0u)},
+ {66968, S_030960_EN_INST_OPT_BASIC(~0u)},
+ {66986, S_030960_EN_INST_OPT_ADV(~0u)},
+ {67002, S_030960_HW_USE_ONLY(~0u)},
+ /* 2306 */
+ {67014, S_030D20_TARGET_INST(~0u)},
+ {67026, S_030D20_TARGET_DATA(~0u)},
+ {39148, S_030D20_INVALIDATE(~0u)},
+ {26860, S_030D20_WRITEBACK(~0u)},
+ {46950, S_030D20_VOL(~0u)},
+ {67038, S_030D20_COMPLETE(~0u)},
/* 2312 */
- {18214, S_00B404_GROUP_FIFO_DEPTH(~0u)},
- /* 2313 */
- {17737, S_00B414_MEM_BASE(~0u)},
+ {67047, S_030D24_DWB(~0u)},
+ {49359, S_030D24_DIRTY(~0u)},
/* 2314 */
- {391, S_00B83C_DATA(~0u)},
- /* 2315 */
- {391, S_00B844_DATA(~0u)},
- /* 2316 */
- {441, S_036028_INDEX(~0u)},
- {14427, S_036028_ALWAYS(~0u)},
- {842, S_036028_ENABLE(~0u)},
- /* 2319 */
- {441, S_03602C_INDEX(~0u)},
- {14427, S_03602C_ALWAYS(~0u)},
- {842, S_03602C_ENABLE(~0u)},
- /* 2322 */
- {441, S_036030_INDEX(~0u)},
- {21631, S_036030_CLEAR(~0u)},
- {842, S_036030_ENABLE(~0u)},
- /* 2325 */
- {441, S_036034_INDEX(~0u)},
- {21631, S_036034_CLEAR(~0u)},
- {842, S_036034_ENABLE(~0u)},
- /* 2328 */
- {441, S_036038_INDEX(~0u)},
- {21631, S_036038_CLEAR(~0u)},
- {842, S_036038_ENABLE(~0u)},
- /* 2331 */
- {39395, S_028018_BASE_HI(~0u)},
- /* 2332 */
- {39795, S_02801C_X_MAX(~0u)},
- {39801, S_02801C_Y_MAX(~0u)},
- /* 2334 */
- {9248, S_028038_FORMAT(~0u)},
- {21809, S_028038_NUM_SAMPLES(~0u)},
- {13608, S_028038_SW_MODE(~0u)},
- {39807, S_028038_PARTIALLY_RESIDENT(~0u)},
- {39826, S_028038_FAULT_BEHAVIOR(~0u)},
- {39841, S_028038_ITERATE_FLUSH(~0u)},
- {39855, S_028038_MAXMIP(~0u)},
- {21837, S_028038_DECOMPRESS_ON_N_ZPLANES(~0u)},
- {21861, S_028038_ALLOW_EXPCLEAR(~0u)},
- {21876, S_028038_READ_SIZE(~0u)},
- {21886, S_028038_TILE_SURFACE_ENABLE(~0u)},
- {21906, S_028038_CLEAR_DISALLOWED(~0u)},
- {21923, S_028038_ZRANGE_PRECISION(~0u)},
+ {67051, S_030E08_GRAD_ADJ_0(~0u)},
+ {67062, S_030E08_GRAD_ADJ_1(~0u)},
+ {67073, S_030E08_GRAD_ADJ_2(~0u)},
+ {67084, S_030E08_GRAD_ADJ_3(~0u)},
+ /* 2318 */
+ {39448, S_008F0C_DST_SEL_X(~0u), 8, 186},
+ {39458, S_008F0C_DST_SEL_Y(~0u), 8, 186},
+ {39468, S_008F0C_DST_SEL_Z(~0u), 8, 186},
+ {39478, S_008F0C_DST_SEL_W(~0u), 8, 186},
+ {39662, S_008F0C_NUM_FORMAT(~0u), 8, 194},
+ {40054, S_008F0C_DATA_FORMAT(~0u), 16, 202},
+ {67095, S_008F0C_USER_VM_ENABLE(~0u)},
+ {67110, S_008F0C_USER_VM_MODE(~0u)},
+ {40079, S_008F0C_INDEX_STRIDE(~0u)},
+ {40092, S_008F0C_ADD_TID_ENABLE(~0u)},
+ {60841, S_008F0C_NV(~0u)},
+ {209, S_008F0C_TYPE(~0u), 4, 218},
+ /* 2330 */
+ {39313, S_008F14_BASE_ADDRESS_HI(~0u)},
+ {40203, S_008F14_MIN_LOD(~0u)},
+ {67695, S_008F14_DATA_FORMAT_GFX9(~0u), 64, 1389},
+ {67788, S_008F14_NUM_FORMAT_GFX9(~0u), 11, 1453},
+ {68023, S_008F14_NUM_FORMAT_FMASK(~0u), 13, 1464},
+ {68273, S_008F14_NUM_FORMAT_ASTC_2D(~0u), 14, 1477},
+ {68472, S_008F14_NUM_FORMAT_ASTC_3D(~0u), 10, 1491},
+ {60841, S_008F14_NV(~0u)},
+ {68491, S_008F14_META_DIRECT(~0u)},
+ /* 2339 */
+ {39448, S_008F1C_DST_SEL_X(~0u), 8, 186},
+ {39458, S_008F1C_DST_SEL_Y(~0u), 8, 186},
+ {39468, S_008F1C_DST_SEL_Z(~0u), 8, 186},
+ {39478, S_008F1C_DST_SEL_W(~0u), 8, 186},
+ {42269, S_008F1C_BASE_LEVEL(~0u)},
+ {42280, S_008F1C_LAST_LEVEL(~0u)},
+ {68503, S_008F1C_SW_MODE(~0u)},
+ {209, S_008F1C_TYPE(~0u), 16, 302},
/* 2347 */
- {9248, S_02803C_FORMAT(~0u)},
- {13608, S_02803C_SW_MODE(~0u)},
- {39807, S_02803C_PARTIALLY_RESIDENT(~0u)},
- {39826, S_02803C_FAULT_BEHAVIOR(~0u)},
- {39841, S_02803C_ITERATE_FLUSH(~0u)},
- {21861, S_02803C_ALLOW_EXPCLEAR(~0u)},
- {21966, S_02803C_TILE_STENCIL_DISABLE(~0u)},
- {21906, S_02803C_CLEAR_DISALLOWED(~0u)},
- /* 2355 */
- {39395, S_028044_BASE_HI(~0u)},
- /* 2356 */
- {39395, S_02804C_BASE_HI(~0u)},
+ {22114, S_008F20_DEPTH(~0u)},
+ {68511, S_008F20_PITCH_GFX9(~0u)},
+ {68618, S_008F20_BC_SWIZZLE(~0u), 6, 1501},
+ /* 2350 */
+ {42626, S_008F24_BASE_ARRAY(~0u)},
+ {68629, S_008F24_ARRAY_PITCH(~0u)},
+ {68641, S_008F24_META_DATA_ADDRESS(~0u)},
+ {68659, S_008F24_META_LINEAR(~0u)},
+ {68671, S_008F24_META_PIPE_ALIGNED(~0u)},
+ {68689, S_008F24_META_RB_ALIGNED(~0u)},
+ {68705, S_008F24_MAX_MIP(~0u)},
/* 2357 */
- {39395, S_028054_BASE_HI(~0u)},
- /* 2358 */
- {39395, S_02805C_BASE_HI(~0u)},
- /* 2359 */
- {39871, S_028060_PUNCHOUT_MODE(~0u), 4, 1515},
- {39885, S_028060_POPS_DRAIN_PS_ON_OVERLAP(~0u)},
- {39910, S_028060_DISALLOW_OVERFLOW(~0u)},
- /* 2362 */
- {39928, S_028064_PS_INVOKE_MASK(~0u)},
- /* 2363 */
- {39943, S_028068_EPITCH(~0u)},
- /* 2364 */
- {39943, S_02806C_EPITCH(~0u)},
- /* 2365 */
- {842, S_02835C_ENABLE(~0u)},
- {39950, S_02835C_NUM_SE(~0u)},
- {16326, S_02835C_NUM_RB_PER_SE(~0u)},
- {39957, S_02835C_DISABLE_SRBSL_DB_OPTIMIZED_PACKING(~0u)},
- /* 2369 */
- {39992, S_028360_PERFMON_ENABLE(~0u)},
+ {43397, S_008F38_LOD_BIAS(~0u)},
+ {43406, S_008F38_LOD_BIAS_SEC(~0u)},
+ {43468, S_008F38_XY_MAG_FILTER(~0u), 2, 334},
+ {43543, S_008F38_XY_MIN_FILTER(~0u), 4, 336},
+ {43623, S_008F38_Z_FILTER(~0u), 3, 340},
+ {43632, S_008F38_MIP_FILTER(~0u), 3, 340},
+ {43643, S_008F38_MIP_POINT_PRECLAMP(~0u)},
+ {68713, S_008F38_BLEND_ZERO_PRT(~0u)},
+ {43679, S_008F38_FILTER_PREC_FIX(~0u)},
+ {43695, S_008F38_ANISO_OVERRIDE(~0u)},
+ /* 2367 */
+ {43710, S_008F3C_BORDER_COLOR_PTR(~0u)},
+ {68728, S_008F3C_SKIP_DEGAMMA(~0u)},
+ {43869, S_008F3C_BORDER_COLOR_TYPE(~0u), 4, 343},
/* 2370 */
- {40007, S_0283A0_LEFT_QTR(~0u)},
- {40016, S_0283A0_LEFT_HALF(~0u)},
- {40026, S_0283A0_RIGHT_HALF(~0u)},
- {40037, S_0283A0_RIGHT_QTR(~0u)},
- /* 2374 */
- {40007, S_0283A4_LEFT_QTR(~0u)},
- {40016, S_0283A4_LEFT_HALF(~0u)},
- {40026, S_0283A4_RIGHT_HALF(~0u)},
- {40037, S_0283A4_RIGHT_QTR(~0u)},
- /* 2378 */
- {40047, S_0283A8_TOP_QTR(~0u)},
- {40055, S_0283A8_TOP_HALF(~0u)},
- {40064, S_0283A8_BOT_HALF(~0u)},
- {40073, S_0283A8_BOT_QTR(~0u)},
- /* 2382 */
- {40081, S_0283AC_LEFT_EYE_FOV_LEFT(~0u)},
- {40099, S_0283AC_LEFT_EYE_FOV_RIGHT(~0u)},
- {40118, S_0283AC_RIGHT_EYE_FOV_LEFT(~0u)},
- {40137, S_0283AC_RIGHT_EYE_FOV_RIGHT(~0u)},
- /* 2386 */
- {40157, S_0283B0_FOV_TOP(~0u)},
- {40165, S_0283B0_FOV_BOT(~0u)},
- /* 2388 */
- {39943, S_0287A0_EPITCH(~0u)},
- /* 2389 */
- {39943, S_0287A4_EPITCH(~0u)},
+ {44220, S_031100_GPR_WRITE_PRIORITY(~0u)},
+ {44239, S_031100_EXP_PRIORITY_ORDER(~0u)},
+ {44258, S_031100_ENABLE_SQG_TOP_EVENTS(~0u)},
+ {44280, S_031100_ENABLE_SQG_BOP_EVENTS(~0u)},
+ {44302, S_031100_RSRC_MGMT_RESET(~0u)},
+ {68741, S_031100_TTRACE_STALL_ALL(~0u)},
+ {68758, S_031100_ALLOC_ARB_LRU_ENA(~0u)},
+ {68776, S_031100_EXP_ARB_LRU_ENA(~0u)},
+ {68792, S_031100_PS_PKR_PRIORITY_CNTL(~0u)},
+ /* 2379 */
+ {44571, S_031104_VTX_DONE_DELAY(~0u), 16, 348},
+ {44586, S_031104_INTERP_ONE_PRIM_PER_ROW(~0u)},
+ {68813, S_031104_BATON_RESET_DISABLE(~0u)},
+ {44610, S_031104_PC_LIMIT_ENABLE(~0u)},
+ {44626, S_031104_PC_LIMIT_STRICT(~0u)},
+ {68833, S_031104_CRC_SIMD_ID_WADDR_DISABLE(~0u)},
+ {68859, S_031104_LBPW_CU_CHK_MODE(~0u)},
+ {68876, S_031104_LBPW_CU_CHK_CNT(~0u)},
+ {68892, S_031104_CSC_PWR_SAVE_DISABLE(~0u)},
+ {68913, S_031104_CSG_PWR_SAVE_DISABLE(~0u)},
+ {44642, S_031104_PC_LIMIT_SIZE(~0u)},
/* 2390 */
- {39943, S_0287A8_EPITCH(~0u)},
- /* 2391 */
- {39943, S_0287AC_EPITCH(~0u)},
+ {68934, S_031108_CONTEXT_SAVE_WAIT_GDS_REQUEST_CYCLE_OVHD(~0u)},
+ {68975, S_031108_CONTEXT_SAVE_WAIT_GDS_GRANT_CYCLE_OVHD(~0u)},
/* 2392 */
- {39943, S_0287B0_EPITCH(~0u)},
- /* 2393 */
- {39943, S_0287B4_EPITCH(~0u)},
- /* 2394 */
- {39943, S_0287B8_EPITCH(~0u)},
- /* 2395 */
- {39943, S_0287BC_EPITCH(~0u)},
- /* 2396 */
- {40173, S_028834_OBJ_ID_SEL(~0u)},
- {40184, S_028834_ADD_PIPED_PRIM_ID(~0u)},
- {40202, S_028834_EN_32BIT_OBJPRIMID(~0u)},
- /* 2399 */
- {40221, S_028838_VERTEX_REUSE_OFF(~0u)},
- {40238, S_028838_INDEX_BUF_EDGE_FLAG_ENA(~0u)},
- /* 2401 */
- {40262, S_02883C_DISCARD_0_AREA_TRIANGLES(~0u)},
- {40287, S_02883C_DISCARD_0_AREA_LINES(~0u)},
- {40308, S_02883C_DISCARD_0_AREA_POINTS(~0u)},
- {40330, S_02883C_DISCARD_0_AREA_RECTANGLES(~0u)},
- {40356, S_02883C_USE_PROVOKING_ZW(~0u)},
- /* 2406 */
- {40373, S_028A94_MAX_PRIMS_PER_SUBGROUP(~0u)},
- /* 2407 */
- {40396, S_028A98_OBJPRIM_ID_EN(~0u)},
- {40410, S_028A98_EN_REG_RT_INDEX(~0u)},
- {40426, S_028A98_EN_PIPELINE_PRIMID(~0u)},
- {40445, S_028A98_OBJECT_ID_INST_EN(~0u)},
- /* 2411 */
- {40463, S_028A9C_COMPOUND_INDEX_EN(~0u)},
- /* 2412 */
- {35689, S_028B9C_EVENT_TYPE(~0u)},
- {8893, S_028B9C_ADDRESS_HI(~0u)},
- {35716, S_028B9C_EXTENDED_EVENT(~0u)},
- /* 2415 */
- {40571, S_028C44_BINNING_MODE(~0u), 4, 1519},
- {40584, S_028C44_BIN_SIZE_X(~0u)},
- {40595, S_028C44_BIN_SIZE_Y(~0u)},
- {40606, S_028C44_BIN_SIZE_X_EXTEND(~0u)},
- {40624, S_028C44_BIN_SIZE_Y_EXTEND(~0u)},
- {40642, S_028C44_CONTEXT_STATES_PER_BIN(~0u)},
- {40665, S_028C44_PERSISTENT_STATES_PER_BIN(~0u)},
- {40691, S_028C44_DISABLE_START_OF_PRIM(~0u)},
- {40713, S_028C44_FPOVS_PER_BATCH(~0u)},
- {40729, S_028C44_OPTIMAL_BIN_SELECTION(~0u)},
+ {44818, S_0098F8_NUM_PIPES(~0u)},
+ {69014, S_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(~0u)},
+ {69040, S_0098F8_MAX_COMPRESSED_FRAGS(~0u)},
+ {44854, S_0098F8_BANK_INTERLEAVE_SIZE(~0u)},
+ {46305, S_0098F8_NUM_BANKS(~0u)},
+ {44899, S_0098F8_SHADER_ENGINE_TILE_SIZE(~0u)},
+ {69061, S_0098F8_NUM_SHADER_ENGINES_GFX9(~0u)},
+ {69085, S_0098F8_NUM_GPUS_GFX9(~0u)},
+ {44937, S_0098F8_MULTI_GPU_TILE_SIZE(~0u)},
+ {69099, S_0098F8_NUM_RB_PER_SE(~0u)},
+ {44957, S_0098F8_ROW_SIZE(~0u)},
+ {44966, S_0098F8_NUM_LOWER_PIPES(~0u)},
+ {69113, S_0098F8_SE_ENABLE(~0u)},
+ /* 2405 */
+ {45344, S_009910_ARRAY_MODE(~0u), 16, 368},
+ {45706, S_009910_PIPE_CONFIG(~0u), 15, 384},
+ {45896, S_009910_TILE_SPLIT(~0u), 7, 399},
+ {46346, S_009910_MICRO_TILE_MODE_NEW(~0u), 4, 422},
+ {46366, S_009910_SAMPLE_SPLIT(~0u)},
+ /* 2410 */
+ {43975, S_00B01C_CU_EN(~0u)},
+ {46388, S_00B01C_WAVE_LIMIT(~0u)},
+ {46399, S_00B01C_LOCK_LOW_THRESHOLD(~0u)},
+ {69123, S_00B01C_SIMD_DISABLE(~0u)},
+ /* 2414 */
+ {46418, S_00B028_VGPRS(~0u)},
+ {46424, S_00B028_SGPRS(~0u)},
+ {2676, S_00B028_PRIORITY(~0u)},
+ {46473, S_00B028_FLOAT_MODE(~0u), 241, 426},
+ {46484, S_00B028_PRIV(~0u)},
+ {46489, S_00B028_DX10_CLAMP(~0u)},
+ {46500, S_00B028_DEBUG_MODE(~0u)},
+ {46511, S_00B028_IEEE_MODE(~0u)},
+ {46521, S_00B028_CU_GROUP_DISABLE(~0u)},
+ {46548, S_00B028_CDBG_USER(~0u)},
+ {69136, S_00B028_FP16_OVFL(~0u)},
/* 2425 */
- {40751, S_028C48_MAX_ALLOC_COUNT(~0u)},
- {40767, S_028C48_MAX_PRIM_PER_BATCH(~0u)},
- /* 2427 */
- {40786, S_028C4C_OVER_RAST_ENABLE(~0u)},
- {40803, S_028C4C_OVER_RAST_SAMPLE_SELECT(~0u)},
- {40827, S_028C4C_UNDER_RAST_ENABLE(~0u)},
- {40845, S_028C4C_UNDER_RAST_SAMPLE_SELECT(~0u)},
- {40870, S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(~0u)},
- {40900, S_028C4C_ZMM_TRI_EXTENT(~0u)},
- {40915, S_028C4C_ZMM_TRI_OFFSET(~0u)},
- {40930, S_028C4C_OVERRIDE_OVER_RAST_INNER_TO_NORMAL(~0u)},
- {40965, S_028C4C_OVERRIDE_UNDER_RAST_INNER_TO_NORMAL(~0u)},
- {41001, S_028C4C_DEGENERATE_OVERRIDE_INNER_TO_NORMAL_DISABLE(~0u)},
- {41045, S_028C4C_UNCERTAINTY_REGION_MODE(~0u)},
- {41069, S_028C4C_OUTER_UNCERTAINTY_EDGERULE_OVERRIDE(~0u)},
- {41105, S_028C4C_INNER_UNCERTAINTY_EDGERULE_OVERRIDE(~0u)},
- {41141, S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(~0u)},
- {41167, S_028C4C_COVERAGE_AA_MASK_ENABLE(~0u)},
- {41191, S_028C4C_PREZ_AA_MASK_ENABLE(~0u)},
- {41211, S_028C4C_POSTZ_AA_MASK_ENABLE(~0u)},
- {41232, S_028C4C_CENTROID_SAMPLE_OVERRIDE(~0u)},
- /* 2445 */
- {41257, S_028C50_MAX_DEALLOCS_IN_WAVE(~0u)},
- /* 2446 */
- {41278, S_028C64_BASE_256B(~0u)},
- /* 2447 */
- {41288, S_028C68_MIP0_HEIGHT(~0u)},
- {41300, S_028C68_MIP0_WIDTH(~0u)},
- {13849, S_028C68_MAX_MIP(~0u)},
- /* 2450 */
- {41278, S_028C80_BASE_256B(~0u)},
+ {46558, S_00B02C_SCRATCH_EN(~0u)},
+ {46569, S_00B02C_USER_SGPR(~0u)},
+ {46579, S_00B02C_TRAP_PRESENT(~0u)},
+ {46592, S_00B02C_WAVE_CNT_EN(~0u)},
+ {46604, S_00B02C_EXTRA_LDS_SIZE(~0u)},
+ {46630, S_00B02C_EXCP_EN(~0u)},
+ {69146, S_00B02C_LOAD_COLLISION_WAVEID(~0u)},
+ {69168, S_00B02C_LOAD_INTRAWAVE_COLLISION(~0u)},
+ {69193, S_00B02C_SKIP_USGPR0(~0u)},
+ {69205, S_00B02C_USER_SGPR_MSB(~0u)},
+ /* 2435 */
+ {43975, S_00B118_CU_EN(~0u)},
+ {46388, S_00B118_WAVE_LIMIT(~0u)},
+ {46399, S_00B118_LOCK_LOW_THRESHOLD(~0u)},
+ {69123, S_00B118_SIMD_DISABLE(~0u)},
+ /* 2439 */
+ {46418, S_00B128_VGPRS(~0u)},
+ {46424, S_00B128_SGPRS(~0u)},
+ {2676, S_00B128_PRIORITY(~0u)},
+ {46473, S_00B128_FLOAT_MODE(~0u)},
+ {46484, S_00B128_PRIV(~0u)},
+ {46489, S_00B128_DX10_CLAMP(~0u)},
+ {46500, S_00B128_DEBUG_MODE(~0u)},
+ {46511, S_00B128_IEEE_MODE(~0u)},
+ {46638, S_00B128_VGPR_COMP_CNT(~0u)},
+ {46652, S_00B128_CU_GROUP_ENABLE(~0u)},
+ {46548, S_00B128_CDBG_USER(~0u)},
+ {69136, S_00B128_FP16_OVFL(~0u)},
/* 2451 */
- {41278, S_028C88_BASE_256B(~0u)},
- /* 2452 */
- {41278, S_028C98_BASE_256B(~0u)},
-};
-
-static const struct si_reg sid_reg_table[] = {
- {41311, R_2C3_DRAW_INDEX_LOC, 2, 0},
- {195, R_370_CONTROL, 4, 2},
- {1111, R_371_DST_ADDR_LO},
- {1123, R_372_DST_ADDR_HI},
- {41326, R_3F0_IB_BASE_LO},
- {41337, R_3F1_IB_BASE_HI},
- {195, R_3F2_CONTROL, 3, 6},
- {41348, R_410_CP_DMA_WORD0, 1, 9},
- {41361, R_411_CP_DMA_WORD1, 5, 10},
- {41374, R_412_CP_DMA_WORD2, 1, 15},
- {41387, R_413_CP_DMA_WORD3, 1, 16},
- {41400, R_414_COMMAND, 11, 17},
- {41408, R_500_DMA_DATA_WORD0, 4, 28},
- {1000, R_501_SRC_ADDR_LO},
- {1099, R_502_SRC_ADDR_HI},
- {1111, R_503_DST_ADDR_LO},
- {1123, R_504_DST_ADDR_HI},
- {41423, R_000E4C_SRBM_STATUS2, 21, 32},
- {41436, R_000E50_SRBM_STATUS, 20, 53},
- {41448, R_000E54_SRBM_STATUS3, 16, 73},
- {41461, R_00D034_SDMA0_STATUS_REG, 29, 89},
- {41478, R_00D834_SDMA1_STATUS_REG, 29, 89},
- {41495, R_008008_GRBM_STATUS2, 25, 118},
- {41508, R_008010_GRBM_STATUS, 25, 143},
- {41520, R_0084FC_CP_STRMOUT_CNTL, 1, 168},
- {41536, R_0085F0_CP_COHER_CNTL, 19, 169},
- {41550, R_0085F4_CP_COHER_SIZE},
- {41564, R_0085F8_CP_COHER_BASE},
- {41578, R_008014_GRBM_STATUS_SE0, 12, 188},
- {41594, R_008018_GRBM_STATUS_SE1, 12, 200},
- {41610, R_008038_GRBM_STATUS_SE2, 12, 212},
- {41626, R_00803C_GRBM_STATUS_SE3, 12, 224},
- {41520, R_0300FC_CP_STRMOUT_CNTL, 1, 236},
- {41642, R_0301E4_CP_COHER_BASE_HI, 1, 237},
- {41659, R_0301EC_CP_COHER_START_DELAY, 1, 238},
- {41536, R_0301F0_CP_COHER_CNTL, 29, 239},
- {41550, R_0301F4_CP_COHER_SIZE},
- {41564, R_0301F8_CP_COHER_BASE},
- {41680, R_0301FC_CP_COHER_STATUS, 4, 268},
- {41696, R_008210_CP_CPC_STATUS, 17, 272},
- {41710, R_008214_CP_CPC_BUSY_STAT, 28, 289},
- {41727, R_008218_CP_CPC_STALLED_STAT1, 17, 317},
- {41748, R_00821C_CP_CPF_STATUS, 22, 334},
- {41762, R_008220_CP_CPF_BUSY_STAT, 31, 356},
- {41779, R_008224_CP_CPF_STALLED_STAT1, 14, 387},
- {41800, R_030230_CP_COHER_SIZE_HI, 1, 401},
- {41817, R_0088B0_VGT_VTX_VECT_EJECT_REG, 1, 402},
- {41840, R_0088C4_VGT_CACHE_INVALIDATION, 3, 403},
- {41863, R_0088C8_VGT_ESGS_RING_SIZE},
- {41882, R_0088CC_VGT_GSVS_RING_SIZE},
- {41901, R_0088D4_VGT_GS_VERTEX_REUSE, 1, 406},
- {41921, R_008958_VGT_PRIMITIVE_TYPE, 1, 407},
- {41940, R_00895C_VGT_INDEX_TYPE, 1, 408},
- {41955, R_008960_VGT_STRMOUT_BUFFER_FILLED_SIZE_0},
- {41988, R_008964_VGT_STRMOUT_BUFFER_FILLED_SIZE_1},
- {42021, R_008968_VGT_STRMOUT_BUFFER_FILLED_SIZE_2},
- {42054, R_00896C_VGT_STRMOUT_BUFFER_FILLED_SIZE_3},
- {42087, R_008970_VGT_NUM_INDICES},
- {42103, R_008974_VGT_NUM_INSTANCES},
- {42121, R_008988_VGT_TF_RING_SIZE, 1, 409},
- {42138, R_0089B0_VGT_HS_OFFCHIP_PARAM, 1, 410},
- {42159, R_0089B8_VGT_TF_MEMORY_BASE},
- {42178, R_008A14_PA_CL_ENHANCE, 4, 411},
- {42192, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 1, 415},
- {42217, R_008B10_PA_SC_LINE_STIPPLE_STATE, 2, 416},
- {42242, R_008670_CP_STALLED_STAT3, 22, 418},
- {42259, R_008674_CP_STALLED_STAT1, 16, 440},
- {42276, R_008678_CP_STALLED_STAT2, 29, 456},
- {42293, R_008680_CP_STAT, 24, 485},
- {42301, R_030800_GRBM_GFX_INDEX, 6, 509},
- {41863, R_030900_VGT_ESGS_RING_SIZE},
- {41882, R_030904_VGT_GSVS_RING_SIZE},
- {41921, R_030908_VGT_PRIMITIVE_TYPE, 1, 515},
- {41940, R_03090C_VGT_INDEX_TYPE, 2, 516},
- {41955, R_030910_VGT_STRMOUT_BUFFER_FILLED_SIZE_0},
- {41988, R_030914_VGT_STRMOUT_BUFFER_FILLED_SIZE_1},
- {42021, R_030918_VGT_STRMOUT_BUFFER_FILLED_SIZE_2},
- {42054, R_03091C_VGT_STRMOUT_BUFFER_FILLED_SIZE_3},
- {42087, R_030930_VGT_NUM_INDICES},
- {42103, R_030934_VGT_NUM_INSTANCES},
- {42121, R_030938_VGT_TF_RING_SIZE, 1, 518},
- {42138, R_03093C_VGT_HS_OFFCHIP_PARAM, 2, 519},
- {42159, R_030940_VGT_TF_MEMORY_BASE},
- {42192, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 1, 521},
- {42217, R_030A04_PA_SC_LINE_STIPPLE_STATE, 2, 522},
- {42316, R_030A10_PA_SC_SCREEN_EXTENT_MIN_0, 2, 524},
- {42342, R_030A14_PA_SC_SCREEN_EXTENT_MAX_0, 2, 526},
- {42368, R_030A18_PA_SC_SCREEN_EXTENT_MIN_1, 2, 528},
- {42394, R_030A2C_PA_SC_SCREEN_EXTENT_MAX_1, 2, 530},
- {42420, R_008BF0_PA_SC_ENHANCE, 9, 532},
- {42434, R_008C08_SQC_CACHES, 2, 541},
- {42434, R_030D20_SQC_CACHES, 9, 543},
- {42445, R_008C0C_SQ_RANDOM_WAVE_PRI, 3, 552},
- {42464, R_008DFC_SQ_EXP_0, 6, 555},
- {42473, R_030E00_TA_CS_BC_BASE_ADDR},
- {42492, R_030E04_TA_CS_BC_BASE_ADDR_HI, 1, 561},
- {42514, R_030F00_DB_OCCLUSION_COUNT0_LOW},
- {42538, R_008F00_SQ_BUF_RSRC_WORD0},
- {42556, R_030F04_DB_OCCLUSION_COUNT0_HI, 1, 562},
- {42579, R_008F04_SQ_BUF_RSRC_WORD1, 4, 563},
- {42597, R_030F08_DB_OCCLUSION_COUNT1_LOW},
- {42621, R_008F08_SQ_BUF_RSRC_WORD2},
- {42639, R_030F0C_DB_OCCLUSION_COUNT1_HI, 1, 567},
- {42662, R_008F0C_SQ_BUF_RSRC_WORD3, 17, 568},
- {42680, R_030F10_DB_OCCLUSION_COUNT2_LOW},
- {42704, R_008F10_SQ_IMG_RSRC_WORD0},
- {42722, R_030F14_DB_OCCLUSION_COUNT2_HI, 1, 585},
- {42745, R_008F14_SQ_IMG_RSRC_WORD1, 12, 586},
- {42763, R_030F18_DB_OCCLUSION_COUNT3_LOW},
- {42787, R_008F18_SQ_IMG_RSRC_WORD2, 4, 598},
- {42805, R_030F1C_DB_OCCLUSION_COUNT3_HI, 1, 602},
- {42828, R_008F1C_SQ_IMG_RSRC_WORD3, 12, 603},
- {42846, R_008F20_SQ_IMG_RSRC_WORD4, 4, 615},
- {42864, R_008F24_SQ_IMG_RSRC_WORD5, 8, 619},
- {42882, R_008F28_SQ_IMG_RSRC_WORD6, 8, 627},
- {42900, R_008F2C_SQ_IMG_RSRC_WORD7},
- {42918, R_008F30_SQ_IMG_SAMP_WORD0, 14, 635},
- {42936, R_008F34_SQ_IMG_SAMP_WORD1, 4, 649},
- {42954, R_008F38_SQ_IMG_SAMP_WORD2, 11, 653},
- {42972, R_008F3C_SQ_IMG_SAMP_WORD3, 4, 664},
- {42990, R_0090DC_SPI_DYN_GPR_LOCK_EN, 5, 668},
- {43010, R_0090E0_SPI_STATIC_THREAD_MGMT_1, 2, 673},
- {43035, R_0090E4_SPI_STATIC_THREAD_MGMT_2, 2, 675},
- {43060, R_0090E8_SPI_STATIC_THREAD_MGMT_3, 1, 677},
- {43085, R_0090EC_SPI_PS_MAX_WAVE_ID, 1, 678},
- {43085, R_0090E8_SPI_PS_MAX_WAVE_ID, 1, 679},
- {43104, R_0090F0_SPI_ARB_PRIORITY, 3, 680},
- {43104, R_00C700_SPI_ARB_PRIORITY, 8, 683},
- {43121, R_0090F4_SPI_ARB_CYCLES_0, 2, 691},
- {43138, R_0090F8_SPI_ARB_CYCLES_1, 1, 693},
- {43155, R_008F40_SQ_FLAT_SCRATCH_WORD0, 1, 694},
- {43177, R_008F44_SQ_FLAT_SCRATCH_WORD1, 1, 695},
- {43199, R_030FF8_DB_ZPASS_COUNT_LOW},
- {43218, R_030FFC_DB_ZPASS_COUNT_HI, 1, 696},
- {43236, R_009100_SPI_CONFIG_CNTL, 5, 697},
- {43252, R_00913C_SPI_CONFIG_CNTL_1, 5, 702},
- {43270, R_00936C_SPI_RESOURCE_RESERVE_CU_AB_0, 12, 707},
- {42473, R_00950C_TA_CS_BC_BASE_ADDR},
- {43299, R_009858_DB_SUBTILE_CONTROL, 10, 719},
- {43318, R_0098F8_GB_ADDR_CONFIG, 16, 729},
- {43333, R_009910_GB_TILE_MODE0, 10, 745},
- {43347, R_009914_GB_TILE_MODE1, 10, 745},
- {43361, R_009918_GB_TILE_MODE2, 10, 745},
- {43375, R_00991C_GB_TILE_MODE3, 10, 745},
- {43389, R_009920_GB_TILE_MODE4, 10, 745},
- {43403, R_009924_GB_TILE_MODE5, 10, 745},
- {43417, R_009928_GB_TILE_MODE6, 10, 745},
- {43431, R_00992C_GB_TILE_MODE7, 10, 745},
- {43445, R_009930_GB_TILE_MODE8, 10, 745},
- {43459, R_009934_GB_TILE_MODE9, 10, 745},
- {43473, R_009938_GB_TILE_MODE10, 10, 745},
- {43488, R_00993C_GB_TILE_MODE11, 10, 745},
- {43503, R_009940_GB_TILE_MODE12, 10, 745},
- {43518, R_009944_GB_TILE_MODE13, 10, 745},
- {43533, R_009948_GB_TILE_MODE14, 10, 745},
- {43548, R_00994C_GB_TILE_MODE15, 10, 745},
- {43563, R_009950_GB_TILE_MODE16, 10, 745},
- {43578, R_009954_GB_TILE_MODE17, 10, 745},
- {43593, R_009958_GB_TILE_MODE18, 10, 745},
- {43608, R_00995C_GB_TILE_MODE19, 10, 745},
- {43623, R_009960_GB_TILE_MODE20, 10, 745},
- {43638, R_009964_GB_TILE_MODE21, 10, 745},
- {43653, R_009968_GB_TILE_MODE22, 10, 745},
- {43668, R_00996C_GB_TILE_MODE23, 10, 745},
- {43683, R_009970_GB_TILE_MODE24, 10, 745},
- {43698, R_009974_GB_TILE_MODE25, 10, 745},
- {43713, R_009978_GB_TILE_MODE26, 10, 745},
- {43728, R_00997C_GB_TILE_MODE27, 10, 745},
- {43743, R_009980_GB_TILE_MODE28, 10, 745},
- {43758, R_009984_GB_TILE_MODE29, 10, 745},
- {43773, R_009988_GB_TILE_MODE30, 10, 745},
- {43788, R_00998C_GB_TILE_MODE31, 10, 745},
- {43803, R_009990_GB_MACROTILE_MODE0, 4, 755},
- {43822, R_009994_GB_MACROTILE_MODE1, 4, 755},
- {43841, R_009998_GB_MACROTILE_MODE2, 4, 755},
- {43860, R_00999C_GB_MACROTILE_MODE3, 4, 755},
- {43879, R_0099A0_GB_MACROTILE_MODE4, 4, 755},
- {43898, R_0099A4_GB_MACROTILE_MODE5, 4, 755},
- {43917, R_0099A8_GB_MACROTILE_MODE6, 4, 755},
- {43936, R_0099AC_GB_MACROTILE_MODE7, 4, 755},
- {43955, R_0099B0_GB_MACROTILE_MODE8, 4, 755},
- {43974, R_0099B4_GB_MACROTILE_MODE9, 4, 755},
- {43993, R_0099B8_GB_MACROTILE_MODE10, 4, 755},
- {44013, R_0099BC_GB_MACROTILE_MODE11, 4, 755},
- {44033, R_0099C0_GB_MACROTILE_MODE12, 4, 755},
- {44053, R_0099C4_GB_MACROTILE_MODE13, 4, 755},
- {44073, R_0099C8_GB_MACROTILE_MODE14, 4, 755},
- {44093, R_0099CC_GB_MACROTILE_MODE15, 4, 755},
- {44113, R_00B000_SPI_SHADER_TBA_LO_PS},
- {44134, R_00B004_SPI_SHADER_TBA_HI_PS, 1, 759},
- {44155, R_00B008_SPI_SHADER_TMA_LO_PS},
- {44176, R_00B00C_SPI_SHADER_TMA_HI_PS, 1, 760},
- {44197, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 4, 761},
- {44221, R_00B020_SPI_SHADER_PGM_LO_PS},
- {44242, R_00B024_SPI_SHADER_PGM_HI_PS, 1, 765},
- {44263, R_00B028_SPI_SHADER_PGM_RSRC1_PS, 12, 766},
- {44287, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 11, 778},
- {44311, R_00B030_SPI_SHADER_USER_DATA_PS_0},
- {44337, R_00B034_SPI_SHADER_USER_DATA_PS_1},
- {44363, R_00B038_SPI_SHADER_USER_DATA_PS_2},
- {44389, R_00B03C_SPI_SHADER_USER_DATA_PS_3},
- {44415, R_00B040_SPI_SHADER_USER_DATA_PS_4},
- {44441, R_00B044_SPI_SHADER_USER_DATA_PS_5},
- {44467, R_00B048_SPI_SHADER_USER_DATA_PS_6},
- {44493, R_00B04C_SPI_SHADER_USER_DATA_PS_7},
- {44519, R_00B050_SPI_SHADER_USER_DATA_PS_8},
- {44545, R_00B054_SPI_SHADER_USER_DATA_PS_9},
- {44571, R_00B058_SPI_SHADER_USER_DATA_PS_10},
- {44598, R_00B05C_SPI_SHADER_USER_DATA_PS_11},
- {44625, R_00B060_SPI_SHADER_USER_DATA_PS_12},
- {44652, R_00B064_SPI_SHADER_USER_DATA_PS_13},
- {44679, R_00B068_SPI_SHADER_USER_DATA_PS_14},
- {44706, R_00B06C_SPI_SHADER_USER_DATA_PS_15},
- {44733, R_00B100_SPI_SHADER_TBA_LO_VS},
- {44754, R_00B104_SPI_SHADER_TBA_HI_VS, 1, 789},
- {44775, R_00B108_SPI_SHADER_TMA_LO_VS},
- {44796, R_00B10C_SPI_SHADER_TMA_HI_VS, 1, 790},
- {44817, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 4, 791},
- {44841, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 1, 795},
- {44866, R_00B120_SPI_SHADER_PGM_LO_VS},
- {44887, R_00B124_SPI_SHADER_PGM_HI_VS, 1, 796},
- {44908, R_00B128_SPI_SHADER_PGM_RSRC1_VS, 13, 797},
- {44932, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, 15, 810},
- {44956, R_00B130_SPI_SHADER_USER_DATA_VS_0},
- {44982, R_00B134_SPI_SHADER_USER_DATA_VS_1},
- {45008, R_00B138_SPI_SHADER_USER_DATA_VS_2},
- {45034, R_00B13C_SPI_SHADER_USER_DATA_VS_3},
- {45060, R_00B140_SPI_SHADER_USER_DATA_VS_4},
- {45086, R_00B144_SPI_SHADER_USER_DATA_VS_5},
- {45112, R_00B148_SPI_SHADER_USER_DATA_VS_6},
- {45138, R_00B14C_SPI_SHADER_USER_DATA_VS_7},
- {45164, R_00B150_SPI_SHADER_USER_DATA_VS_8},
- {45190, R_00B154_SPI_SHADER_USER_DATA_VS_9},
- {45216, R_00B158_SPI_SHADER_USER_DATA_VS_10},
- {45243, R_00B15C_SPI_SHADER_USER_DATA_VS_11},
- {45270, R_00B160_SPI_SHADER_USER_DATA_VS_12},
- {45297, R_00B164_SPI_SHADER_USER_DATA_VS_13},
- {45324, R_00B168_SPI_SHADER_USER_DATA_VS_14},
- {45351, R_00B16C_SPI_SHADER_USER_DATA_VS_15},
- {45378, R_00B200_SPI_SHADER_TBA_LO_GS},
- {45399, R_00B204_SPI_SHADER_TBA_HI_GS, 1, 825},
- {45420, R_00B208_SPI_SHADER_TMA_LO_GS},
- {45441, R_00B20C_SPI_SHADER_TMA_HI_GS, 1, 826},
- {45462, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 5, 827},
- {45486, R_00B220_SPI_SHADER_PGM_LO_GS},
- {45507, R_00B224_SPI_SHADER_PGM_HI_GS, 1, 832},
- {45528, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 13, 833},
- {45552, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, 10, 846},
- {45576, R_00B230_SPI_SHADER_USER_DATA_GS_0},
- {45602, R_00B234_SPI_SHADER_USER_DATA_GS_1},
- {45628, R_00B238_SPI_SHADER_USER_DATA_GS_2},
- {45654, R_00B23C_SPI_SHADER_USER_DATA_GS_3},
- {45680, R_00B240_SPI_SHADER_USER_DATA_GS_4},
- {45706, R_00B244_SPI_SHADER_USER_DATA_GS_5},
- {45732, R_00B248_SPI_SHADER_USER_DATA_GS_6},
- {45758, R_00B24C_SPI_SHADER_USER_DATA_GS_7},
- {45784, R_00B250_SPI_SHADER_USER_DATA_GS_8},
- {45810, R_00B254_SPI_SHADER_USER_DATA_GS_9},
- {45836, R_00B258_SPI_SHADER_USER_DATA_GS_10},
- {45863, R_00B25C_SPI_SHADER_USER_DATA_GS_11},
- {45890, R_00B260_SPI_SHADER_USER_DATA_GS_12},
- {45917, R_00B264_SPI_SHADER_USER_DATA_GS_13},
- {45944, R_00B268_SPI_SHADER_USER_DATA_GS_14},
- {45971, R_00B26C_SPI_SHADER_USER_DATA_GS_15},
- {45998, R_00B300_SPI_SHADER_TBA_LO_ES},
- {46019, R_00B304_SPI_SHADER_TBA_HI_ES, 1, 856},
- {46040, R_00B308_SPI_SHADER_TMA_LO_ES},
- {46061, R_00B30C_SPI_SHADER_TMA_HI_ES, 1, 857},
- {46082, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 4, 858},
- {46106, R_00B320_SPI_SHADER_PGM_LO_ES},
- {46127, R_00B324_SPI_SHADER_PGM_HI_ES, 1, 862},
- {46148, R_00B328_SPI_SHADER_PGM_RSRC1_ES, 12, 863},
- {46172, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, 7, 875},
- {46196, R_00B330_SPI_SHADER_USER_DATA_ES_0},
- {46222, R_00B334_SPI_SHADER_USER_DATA_ES_1},
- {46248, R_00B338_SPI_SHADER_USER_DATA_ES_2},
- {46274, R_00B33C_SPI_SHADER_USER_DATA_ES_3},
- {46300, R_00B340_SPI_SHADER_USER_DATA_ES_4},
- {46326, R_00B344_SPI_SHADER_USER_DATA_ES_5},
- {46352, R_00B348_SPI_SHADER_USER_DATA_ES_6},
- {46378, R_00B34C_SPI_SHADER_USER_DATA_ES_7},
- {46404, R_00B350_SPI_SHADER_USER_DATA_ES_8},
- {46430, R_00B354_SPI_SHADER_USER_DATA_ES_9},
- {46456, R_00B358_SPI_SHADER_USER_DATA_ES_10},
- {46483, R_00B35C_SPI_SHADER_USER_DATA_ES_11},
- {46510, R_00B360_SPI_SHADER_USER_DATA_ES_12},
- {46537, R_00B364_SPI_SHADER_USER_DATA_ES_13},
- {46564, R_00B368_SPI_SHADER_USER_DATA_ES_14},
- {46591, R_00B36C_SPI_SHADER_USER_DATA_ES_15},
- {46618, R_00B400_SPI_SHADER_TBA_LO_HS},
- {46639, R_00B404_SPI_SHADER_TBA_HI_HS, 1, 882},
- {46660, R_00B408_SPI_SHADER_TMA_LO_HS},
- {46681, R_00B40C_SPI_SHADER_TMA_HI_HS, 1, 883},
- {46702, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 5, 884},
- {46726, R_00B420_SPI_SHADER_PGM_LO_HS},
- {46747, R_00B424_SPI_SHADER_PGM_HI_HS, 1, 889},
- {46768, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 12, 890},
- {46792, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, 11, 902},
- {46816, R_00B430_SPI_SHADER_USER_DATA_HS_0},
- {46842, R_00B434_SPI_SHADER_USER_DATA_HS_1},
- {46868, R_00B438_SPI_SHADER_USER_DATA_HS_2},
- {46894, R_00B43C_SPI_SHADER_USER_DATA_HS_3},
- {46920, R_00B440_SPI_SHADER_USER_DATA_HS_4},
- {46946, R_00B444_SPI_SHADER_USER_DATA_HS_5},
- {46972, R_00B448_SPI_SHADER_USER_DATA_HS_6},
- {46998, R_00B44C_SPI_SHADER_USER_DATA_HS_7},
- {47024, R_00B450_SPI_SHADER_USER_DATA_HS_8},
- {47050, R_00B454_SPI_SHADER_USER_DATA_HS_9},
- {47076, R_00B458_SPI_SHADER_USER_DATA_HS_10},
- {47103, R_00B45C_SPI_SHADER_USER_DATA_HS_11},
- {47130, R_00B460_SPI_SHADER_USER_DATA_HS_12},
- {47157, R_00B464_SPI_SHADER_USER_DATA_HS_13},
- {47184, R_00B468_SPI_SHADER_USER_DATA_HS_14},
- {47211, R_00B46C_SPI_SHADER_USER_DATA_HS_15},
- {47238, R_00B500_SPI_SHADER_TBA_LO_LS},
- {47259, R_00B504_SPI_SHADER_TBA_HI_LS, 1, 913},
- {47280, R_00B508_SPI_SHADER_TMA_LO_LS},
- {47301, R_00B50C_SPI_SHADER_TMA_HI_LS, 1, 914},
- {47322, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 4, 915},
- {47346, R_00B520_SPI_SHADER_PGM_LO_LS},
- {47367, R_00B524_SPI_SHADER_PGM_HI_LS, 1, 919},
- {47388, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 11, 920},
- {47412, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, 6, 931},
- {47436, R_00B530_SPI_SHADER_USER_DATA_LS_0},
- {47462, R_00B534_SPI_SHADER_USER_DATA_LS_1},
- {47488, R_00B538_SPI_SHADER_USER_DATA_LS_2},
- {47514, R_00B53C_SPI_SHADER_USER_DATA_LS_3},
- {47540, R_00B540_SPI_SHADER_USER_DATA_LS_4},
- {47566, R_00B544_SPI_SHADER_USER_DATA_LS_5},
- {47592, R_00B548_SPI_SHADER_USER_DATA_LS_6},
- {47618, R_00B54C_SPI_SHADER_USER_DATA_LS_7},
- {47644, R_00B550_SPI_SHADER_USER_DATA_LS_8},
- {47670, R_00B554_SPI_SHADER_USER_DATA_LS_9},
- {47696, R_00B558_SPI_SHADER_USER_DATA_LS_10},
- {47723, R_00B55C_SPI_SHADER_USER_DATA_LS_11},
- {47750, R_00B560_SPI_SHADER_USER_DATA_LS_12},
- {47777, R_00B564_SPI_SHADER_USER_DATA_LS_13},
- {47804, R_00B568_SPI_SHADER_USER_DATA_LS_14},
- {47831, R_00B56C_SPI_SHADER_USER_DATA_LS_15},
- {47858, R_00B800_COMPUTE_DISPATCH_INITIATOR, 13, 937},
- {47885, R_00B804_COMPUTE_DIM_X},
- {47899, R_00B808_COMPUTE_DIM_Y},
- {47913, R_00B80C_COMPUTE_DIM_Z},
- {47927, R_00B810_COMPUTE_START_X},
- {47943, R_00B814_COMPUTE_START_Y},
- {47959, R_00B818_COMPUTE_START_Z},
- {47975, R_00B81C_COMPUTE_NUM_THREAD_X, 2, 950},
- {47996, R_00B820_COMPUTE_NUM_THREAD_Y, 2, 952},
- {48017, R_00B824_COMPUTE_NUM_THREAD_Z, 2, 954},
- {48038, R_00B82C_COMPUTE_MAX_WAVE_ID, 1, 956},
- {48058, R_00B828_COMPUTE_PIPELINESTAT_ENABLE, 1, 957},
- {48086, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 1, 958},
- {48111, R_00B830_COMPUTE_PGM_LO},
- {48126, R_00B834_COMPUTE_PGM_HI, 2, 959},
- {48141, R_00B838_COMPUTE_TBA_LO},
- {48156, R_00B83C_COMPUTE_TBA_HI, 1, 961},
- {48171, R_00B840_COMPUTE_TMA_LO},
- {48186, R_00B844_COMPUTE_TMA_HI, 1, 962},
- {48201, R_00B848_COMPUTE_PGM_RSRC1, 11, 963},
- {48219, R_00B84C_COMPUTE_PGM_RSRC2, 12, 974},
- {48237, R_00B850_COMPUTE_VMID, 1, 986},
- {48250, R_00B854_COMPUTE_RESOURCE_LIMITS, 8, 987},
- {48274, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2, 995},
- {48305, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, 2, 997},
- {48336, R_00B860_COMPUTE_TMPRING_SIZE, 2, 999},
- {48357, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2, 1001},
- {48388, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, 2, 1003},
- {48419, R_00B86C_COMPUTE_RESTART_X},
- {48437, R_00B870_COMPUTE_RESTART_Y},
- {48455, R_00B874_COMPUTE_RESTART_Z},
- {48473, R_00B87C_COMPUTE_MISC_RESERVED, 5, 1005},
- {48495, R_00B880_COMPUTE_DISPATCH_ID},
- {48515, R_00B884_COMPUTE_THREADGROUP_ID},
- {48538, R_00B888_COMPUTE_RELAUNCH, 3, 1010},
- {48555, R_00B88C_COMPUTE_WAVE_RESTORE_ADDR_LO},
- {48584, R_00B890_COMPUTE_WAVE_RESTORE_ADDR_HI, 1, 1013},
- {48613, R_00B894_COMPUTE_WAVE_RESTORE_CONTROL, 2, 1014},
- {48642, R_00B900_COMPUTE_USER_DATA_0},
- {48662, R_00B904_COMPUTE_USER_DATA_1},
- {48682, R_00B908_COMPUTE_USER_DATA_2},
- {48702, R_00B90C_COMPUTE_USER_DATA_3},
- {48722, R_00B910_COMPUTE_USER_DATA_4},
- {48742, R_00B914_COMPUTE_USER_DATA_5},
- {48762, R_00B918_COMPUTE_USER_DATA_6},
- {48782, R_00B91C_COMPUTE_USER_DATA_7},
- {48802, R_00B920_COMPUTE_USER_DATA_8},
- {48822, R_00B924_COMPUTE_USER_DATA_9},
- {48842, R_00B928_COMPUTE_USER_DATA_10},
- {48863, R_00B92C_COMPUTE_USER_DATA_11},
- {48884, R_00B930_COMPUTE_USER_DATA_12},
- {48905, R_00B934_COMPUTE_USER_DATA_13},
- {48926, R_00B938_COMPUTE_USER_DATA_14},
- {48947, R_00B93C_COMPUTE_USER_DATA_15},
- {48968, R_00B9FC_COMPUTE_NOWHERE},
- {48984, R_034000_CPG_PERFCOUNTER1_LO},
- {49004, R_034004_CPG_PERFCOUNTER1_HI},
- {49024, R_034008_CPG_PERFCOUNTER0_LO},
- {49044, R_03400C_CPG_PERFCOUNTER0_HI},
- {49064, R_034010_CPC_PERFCOUNTER1_LO},
- {49084, R_034014_CPC_PERFCOUNTER1_HI},
- {49104, R_034018_CPC_PERFCOUNTER0_LO},
- {49124, R_03401C_CPC_PERFCOUNTER0_HI},
- {49144, R_034020_CPF_PERFCOUNTER1_LO},
- {49164, R_034024_CPF_PERFCOUNTER1_HI},
- {49184, R_034028_CPF_PERFCOUNTER0_LO},
- {49204, R_03402C_CPF_PERFCOUNTER0_HI},
- {49224, R_034100_GRBM_PERFCOUNTER0_LO},
- {49245, R_034104_GRBM_PERFCOUNTER0_HI},
- {49266, R_03410C_GRBM_PERFCOUNTER1_LO},
- {49287, R_034110_GRBM_PERFCOUNTER1_HI},
- {49308, R_034114_GRBM_SE0_PERFCOUNTER_LO},
- {49332, R_034118_GRBM_SE0_PERFCOUNTER_HI},
- {49356, R_03411C_GRBM_SE1_PERFCOUNTER_LO},
- {49380, R_034120_GRBM_SE1_PERFCOUNTER_HI},
- {49404, R_034124_GRBM_SE2_PERFCOUNTER_LO},
- {49428, R_034128_GRBM_SE2_PERFCOUNTER_HI},
- {49452, R_03412C_GRBM_SE3_PERFCOUNTER_LO},
- {49476, R_034130_GRBM_SE3_PERFCOUNTER_HI},
- {49500, R_034200_WD_PERFCOUNTER0_LO},
- {49519, R_034204_WD_PERFCOUNTER0_HI},
- {49538, R_034208_WD_PERFCOUNTER1_LO},
- {49557, R_03420C_WD_PERFCOUNTER1_HI},
- {49576, R_034210_WD_PERFCOUNTER2_LO},
- {49595, R_034214_WD_PERFCOUNTER2_HI},
- {49614, R_034218_WD_PERFCOUNTER3_LO},
- {49633, R_03421C_WD_PERFCOUNTER3_HI},
- {49652, R_034220_IA_PERFCOUNTER0_LO},
- {49671, R_034224_IA_PERFCOUNTER0_HI},
- {49690, R_034228_IA_PERFCOUNTER1_LO},
- {49709, R_03422C_IA_PERFCOUNTER1_HI},
- {49728, R_034230_IA_PERFCOUNTER2_LO},
- {49747, R_034234_IA_PERFCOUNTER2_HI},
- {49766, R_034238_IA_PERFCOUNTER3_LO},
- {49785, R_03423C_IA_PERFCOUNTER3_HI},
- {49804, R_034240_VGT_PERFCOUNTER0_LO},
- {49824, R_034244_VGT_PERFCOUNTER0_HI},
- {49844, R_034248_VGT_PERFCOUNTER1_LO},
- {49864, R_03424C_VGT_PERFCOUNTER1_HI},
- {49884, R_034250_VGT_PERFCOUNTER2_LO},
- {49904, R_034254_VGT_PERFCOUNTER2_HI},
- {49924, R_034258_VGT_PERFCOUNTER3_LO},
- {49944, R_03425C_VGT_PERFCOUNTER3_HI},
- {49964, R_034400_PA_SU_PERFCOUNTER0_LO},
- {49986, R_034404_PA_SU_PERFCOUNTER0_HI, 1, 1016},
- {50008, R_034408_PA_SU_PERFCOUNTER1_LO},
- {50030, R_03440C_PA_SU_PERFCOUNTER1_HI, 1, 1016},
- {50052, R_034410_PA_SU_PERFCOUNTER2_LO},
- {50074, R_034414_PA_SU_PERFCOUNTER2_HI, 1, 1016},
- {50096, R_034418_PA_SU_PERFCOUNTER3_LO},
- {50118, R_03441C_PA_SU_PERFCOUNTER3_HI, 1, 1016},
- {50140, R_034500_PA_SC_PERFCOUNTER0_LO},
- {50162, R_034504_PA_SC_PERFCOUNTER0_HI},
- {50184, R_034508_PA_SC_PERFCOUNTER1_LO},
- {50206, R_03450C_PA_SC_PERFCOUNTER1_HI},
- {50228, R_034510_PA_SC_PERFCOUNTER2_LO},
- {50250, R_034514_PA_SC_PERFCOUNTER2_HI},
- {50272, R_034518_PA_SC_PERFCOUNTER3_LO},
- {50294, R_03451C_PA_SC_PERFCOUNTER3_HI},
- {50316, R_034520_PA_SC_PERFCOUNTER4_LO},
- {50338, R_034524_PA_SC_PERFCOUNTER4_HI},
- {50360, R_034528_PA_SC_PERFCOUNTER5_LO},
- {50382, R_03452C_PA_SC_PERFCOUNTER5_HI},
- {50404, R_034530_PA_SC_PERFCOUNTER6_LO},
- {50426, R_034534_PA_SC_PERFCOUNTER6_HI},
- {50448, R_034538_PA_SC_PERFCOUNTER7_LO},
- {50470, R_03453C_PA_SC_PERFCOUNTER7_HI},
- {50492, R_034600_SPI_PERFCOUNTER0_HI},
- {50512, R_034604_SPI_PERFCOUNTER0_LO},
- {50532, R_034608_SPI_PERFCOUNTER1_HI},
- {50552, R_03460C_SPI_PERFCOUNTER1_LO},
- {50572, R_034610_SPI_PERFCOUNTER2_HI},
- {50592, R_034614_SPI_PERFCOUNTER2_LO},
- {50612, R_034618_SPI_PERFCOUNTER3_HI},
- {50632, R_03461C_SPI_PERFCOUNTER3_LO},
- {50652, R_034620_SPI_PERFCOUNTER4_HI},
- {50672, R_034624_SPI_PERFCOUNTER4_LO},
- {50692, R_034628_SPI_PERFCOUNTER5_HI},
- {50712, R_03462C_SPI_PERFCOUNTER5_LO},
- {50732, R_034700_SQ_PERFCOUNTER0_LO},
- {50751, R_034704_SQ_PERFCOUNTER0_HI},
- {50770, R_034708_SQ_PERFCOUNTER1_LO},
- {50789, R_03470C_SQ_PERFCOUNTER1_HI},
- {50808, R_034710_SQ_PERFCOUNTER2_LO},
- {50827, R_034714_SQ_PERFCOUNTER2_HI},
- {50846, R_034718_SQ_PERFCOUNTER3_LO},
- {50865, R_03471C_SQ_PERFCOUNTER3_HI},
- {50884, R_034720_SQ_PERFCOUNTER4_LO},
- {50903, R_034724_SQ_PERFCOUNTER4_HI},
- {50922, R_034728_SQ_PERFCOUNTER5_LO},
- {50941, R_03472C_SQ_PERFCOUNTER5_HI},
- {50960, R_034730_SQ_PERFCOUNTER6_LO},
- {50979, R_034734_SQ_PERFCOUNTER6_HI},
- {50998, R_034738_SQ_PERFCOUNTER7_LO},
- {51017, R_03473C_SQ_PERFCOUNTER7_HI},
- {51036, R_034740_SQ_PERFCOUNTER8_LO},
- {51055, R_034744_SQ_PERFCOUNTER8_HI},
- {51074, R_034748_SQ_PERFCOUNTER9_LO},
- {51093, R_03474C_SQ_PERFCOUNTER9_HI},
- {51112, R_034750_SQ_PERFCOUNTER10_LO},
- {51132, R_034754_SQ_PERFCOUNTER10_HI},
- {51152, R_034758_SQ_PERFCOUNTER11_LO},
- {51172, R_03475C_SQ_PERFCOUNTER11_HI},
- {51192, R_034760_SQ_PERFCOUNTER12_LO},
- {51212, R_034764_SQ_PERFCOUNTER12_HI},
- {51232, R_034768_SQ_PERFCOUNTER13_LO},
- {51252, R_03476C_SQ_PERFCOUNTER13_HI},
- {51272, R_034770_SQ_PERFCOUNTER14_LO},
- {51292, R_034774_SQ_PERFCOUNTER14_HI},
- {51312, R_034778_SQ_PERFCOUNTER15_LO},
- {51332, R_03477C_SQ_PERFCOUNTER15_HI},
- {51352, R_034900_SX_PERFCOUNTER0_LO},
- {51371, R_034904_SX_PERFCOUNTER0_HI},
- {51390, R_034908_SX_PERFCOUNTER1_LO},
- {51409, R_03490C_SX_PERFCOUNTER1_HI},
- {51428, R_034910_SX_PERFCOUNTER2_LO},
- {51447, R_034914_SX_PERFCOUNTER2_HI},
- {51466, R_034918_SX_PERFCOUNTER3_LO},
- {51485, R_03491C_SX_PERFCOUNTER3_HI},
- {51504, R_034A00_GDS_PERFCOUNTER0_LO},
- {51524, R_034A04_GDS_PERFCOUNTER0_HI},
- {51544, R_034A08_GDS_PERFCOUNTER1_LO},
- {51564, R_034A0C_GDS_PERFCOUNTER1_HI},
- {51584, R_034A10_GDS_PERFCOUNTER2_LO},
- {51604, R_034A14_GDS_PERFCOUNTER2_HI},
- {51624, R_034A18_GDS_PERFCOUNTER3_LO},
- {51644, R_034A1C_GDS_PERFCOUNTER3_HI},
- {51664, R_034B00_TA_PERFCOUNTER0_LO},
- {51683, R_034B04_TA_PERFCOUNTER0_HI},
- {51702, R_034B08_TA_PERFCOUNTER1_LO},
- {51721, R_034B0C_TA_PERFCOUNTER1_HI},
- {51740, R_034C00_TD_PERFCOUNTER0_LO},
- {51759, R_034C04_TD_PERFCOUNTER0_HI},
- {51778, R_034C08_TD_PERFCOUNTER1_LO},
- {51797, R_034C0C_TD_PERFCOUNTER1_HI},
- {51816, R_034D00_TCP_PERFCOUNTER0_LO},
- {51836, R_034D04_TCP_PERFCOUNTER0_HI},
- {51856, R_034D08_TCP_PERFCOUNTER1_LO},
- {51876, R_034D0C_TCP_PERFCOUNTER1_HI},
- {51896, R_034D10_TCP_PERFCOUNTER2_LO},
- {51916, R_034D14_TCP_PERFCOUNTER2_HI},
- {51936, R_034D18_TCP_PERFCOUNTER3_LO},
- {51956, R_034D1C_TCP_PERFCOUNTER3_HI},
- {51976, R_034E00_TCC_PERFCOUNTER0_LO},
- {51996, R_034E04_TCC_PERFCOUNTER0_HI},
- {52016, R_034E08_TCC_PERFCOUNTER1_LO},
- {52036, R_034E0C_TCC_PERFCOUNTER1_HI},
- {52056, R_034E10_TCC_PERFCOUNTER2_LO},
- {52076, R_034E14_TCC_PERFCOUNTER2_HI},
- {52096, R_034E18_TCC_PERFCOUNTER3_LO},
- {52116, R_034E1C_TCC_PERFCOUNTER3_HI},
- {52136, R_034E40_TCA_PERFCOUNTER0_LO},
- {52156, R_034E44_TCA_PERFCOUNTER0_HI},
- {52176, R_034E48_TCA_PERFCOUNTER1_LO},
- {52196, R_034E4C_TCA_PERFCOUNTER1_HI},
- {52216, R_034E50_TCA_PERFCOUNTER2_LO},
- {52236, R_034E54_TCA_PERFCOUNTER2_HI},
- {52256, R_034E58_TCA_PERFCOUNTER3_LO},
- {52276, R_034E5C_TCA_PERFCOUNTER3_HI},
- {52296, R_035018_CB_PERFCOUNTER0_LO},
- {52315, R_03501C_CB_PERFCOUNTER0_HI},
- {52334, R_035020_CB_PERFCOUNTER1_LO},
- {52353, R_035024_CB_PERFCOUNTER1_HI},
- {52372, R_035028_CB_PERFCOUNTER2_LO},
- {52391, R_03502C_CB_PERFCOUNTER2_HI},
- {52410, R_035030_CB_PERFCOUNTER3_LO},
- {52429, R_035034_CB_PERFCOUNTER3_HI},
- {52448, R_035100_DB_PERFCOUNTER0_LO},
- {52467, R_035104_DB_PERFCOUNTER0_HI},
- {52486, R_035108_DB_PERFCOUNTER1_LO},
- {52505, R_03510C_DB_PERFCOUNTER1_HI},
- {52524, R_035110_DB_PERFCOUNTER2_LO},
- {52543, R_035114_DB_PERFCOUNTER2_HI},
- {52562, R_035118_DB_PERFCOUNTER3_LO},
- {52581, R_03511C_DB_PERFCOUNTER3_HI},
- {52600, R_035200_RLC_PERFCOUNTER0_LO},
- {52620, R_035204_RLC_PERFCOUNTER0_HI},
- {52640, R_035208_RLC_PERFCOUNTER1_LO},
- {52660, R_03520C_RLC_PERFCOUNTER1_HI},
- {52680, R_036000_CPG_PERFCOUNTER1_SELECT, 8, 1023},
- {52704, R_036004_CPG_PERFCOUNTER0_SELECT1, 6, 1017},
- {52729, R_036008_CPG_PERFCOUNTER0_SELECT, 8, 1023},
- {52753, R_03600C_CPC_PERFCOUNTER1_SELECT, 8, 1055},
- {52777, R_036010_CPC_PERFCOUNTER0_SELECT1, 6, 1031},
- {52802, R_036014_CPF_PERFCOUNTER1_SELECT, 8, 1043},
- {52826, R_036018_CPF_PERFCOUNTER0_SELECT1, 6, 1037},
- {52851, R_03601C_CPF_PERFCOUNTER0_SELECT, 8, 1043},
- {52875, R_036020_CP_PERFMON_CNTL, 4, 1051},
- {52891, R_036024_CPC_PERFCOUNTER0_SELECT, 8, 1055},
- {52915, R_036100_GRBM_PERFCOUNTER0_SELECT, 22, 1063},
- {52940, R_036104_GRBM_PERFCOUNTER1_SELECT, 22, 1063},
- {52965, R_036108_GRBM_SE0_PERFCOUNTER_SELECT, 13, 1085},
- {52993, R_03610C_GRBM_SE1_PERFCOUNTER_SELECT, 13, 1098},
- {53021, R_036110_GRBM_SE2_PERFCOUNTER_SELECT, 13, 1111},
- {53049, R_036114_GRBM_SE3_PERFCOUNTER_SELECT, 13, 1124},
- {53077, R_036200_WD_PERFCOUNTER0_SELECT, 2, 1137},
- {53100, R_036204_WD_PERFCOUNTER1_SELECT, 2, 1137},
- {53123, R_036208_WD_PERFCOUNTER2_SELECT, 2, 1137},
- {53146, R_03620C_WD_PERFCOUNTER3_SELECT, 2, 1137},
- {53169, R_036210_IA_PERFCOUNTER0_SELECT, 5, 1139},
- {53192, R_036214_IA_PERFCOUNTER1_SELECT, 5, 1139},
- {53215, R_036218_IA_PERFCOUNTER2_SELECT, 5, 1139},
- {53238, R_03621C_IA_PERFCOUNTER3_SELECT, 5, 1139},
- {53261, R_036220_IA_PERFCOUNTER0_SELECT1, 4, 1144},
- {53285, R_036230_VGT_PERFCOUNTER0_SELECT, 5, 1148},
- {53309, R_036234_VGT_PERFCOUNTER1_SELECT, 5, 1148},
- {53333, R_036238_VGT_PERFCOUNTER2_SELECT, 5, 1148},
- {53357, R_03623C_VGT_PERFCOUNTER3_SELECT, 5, 1148},
- {53381, R_036240_VGT_PERFCOUNTER0_SELECT1, 4, 1153},
- {53406, R_036244_VGT_PERFCOUNTER1_SELECT1},
- {53431, R_036250_VGT_PERFCOUNTER_SEID_MASK, 1, 1157},
- {53457, R_036400_PA_SU_PERFCOUNTER0_SELECT, 3, 1158},
- {53483, R_036404_PA_SU_PERFCOUNTER0_SELECT1, 2, 1161},
- {53510, R_036408_PA_SU_PERFCOUNTER1_SELECT, 3, 1158},
- {53536, R_03640C_PA_SU_PERFCOUNTER1_SELECT1},
- {53563, R_036410_PA_SU_PERFCOUNTER2_SELECT, 3, 1158},
- {53589, R_036414_PA_SU_PERFCOUNTER3_SELECT, 3, 1158},
- {53615, R_036500_PA_SC_PERFCOUNTER0_SELECT, 3, 1163},
- {53641, R_036504_PA_SC_PERFCOUNTER0_SELECT1, 2, 1166},
- {53668, R_036508_PA_SC_PERFCOUNTER1_SELECT, 3, 1163},
- {53694, R_03650C_PA_SC_PERFCOUNTER2_SELECT, 3, 1163},
- {53720, R_036510_PA_SC_PERFCOUNTER3_SELECT, 3, 1163},
- {53746, R_036514_PA_SC_PERFCOUNTER4_SELECT, 3, 1163},
- {53772, R_036518_PA_SC_PERFCOUNTER5_SELECT, 3, 1163},
- {53798, R_03651C_PA_SC_PERFCOUNTER6_SELECT, 3, 1163},
- {53824, R_036520_PA_SC_PERFCOUNTER7_SELECT, 3, 1163},
- {53850, R_036600_SPI_PERFCOUNTER0_SELECT, 5, 1168},
- {53874, R_036604_SPI_PERFCOUNTER1_SELECT, 5, 1168},
- {53898, R_036608_SPI_PERFCOUNTER2_SELECT, 5, 1168},
- {53922, R_03660C_SPI_PERFCOUNTER3_SELECT, 5, 1168},
- {53946, R_036610_SPI_PERFCOUNTER0_SELECT1, 4, 1173},
- {53971, R_036614_SPI_PERFCOUNTER1_SELECT1},
- {53996, R_036618_SPI_PERFCOUNTER2_SELECT1},
- {54021, R_03661C_SPI_PERFCOUNTER3_SELECT1},
- {54046, R_036620_SPI_PERFCOUNTER4_SELECT, 5, 1168},
- {54070, R_036624_SPI_PERFCOUNTER5_SELECT, 5, 1168},
- {54094, R_036628_SPI_PERFCOUNTER_BINS, 8, 1177},
- {54115, R_036700_SQ_PERFCOUNTER0_SELECT, 6, 1185},
- {54138, R_036704_SQ_PERFCOUNTER1_SELECT, 6, 1185},
- {54161, R_036708_SQ_PERFCOUNTER2_SELECT, 6, 1185},
- {54184, R_03670C_SQ_PERFCOUNTER3_SELECT, 6, 1185},
- {54207, R_036710_SQ_PERFCOUNTER4_SELECT, 6, 1185},
- {54230, R_036714_SQ_PERFCOUNTER5_SELECT, 6, 1185},
- {54253, R_036718_SQ_PERFCOUNTER6_SELECT, 6, 1185},
- {54276, R_03671C_SQ_PERFCOUNTER7_SELECT, 6, 1185},
- {54299, R_036720_SQ_PERFCOUNTER8_SELECT, 6, 1185},
- {54322, R_036724_SQ_PERFCOUNTER9_SELECT, 6, 1185},
- {54345, R_036728_SQ_PERFCOUNTER10_SELECT, 6, 1185},
- {54369, R_03672C_SQ_PERFCOUNTER11_SELECT, 6, 1185},
- {54393, R_036730_SQ_PERFCOUNTER12_SELECT, 6, 1185},
- {54417, R_036734_SQ_PERFCOUNTER13_SELECT, 6, 1185},
- {54441, R_036738_SQ_PERFCOUNTER14_SELECT, 6, 1185},
- {54465, R_03673C_SQ_PERFCOUNTER15_SELECT, 6, 1185},
- {54489, R_036780_SQ_PERFCOUNTER_CTRL, 9, 1191},
- {54509, R_036784_SQ_PERFCOUNTER_MASK, 2, 1200},
- {54529, R_036788_SQ_PERFCOUNTER_CTRL2, 1, 1202},
- {54550, R_036900_SX_PERFCOUNTER0_SELECT, 3, 1203},
- {54573, R_036904_SX_PERFCOUNTER1_SELECT, 3, 1203},
- {54596, R_036908_SX_PERFCOUNTER2_SELECT, 3, 1203},
- {54619, R_03690C_SX_PERFCOUNTER3_SELECT, 3, 1203},
- {54642, R_036910_SX_PERFCOUNTER0_SELECT1, 2, 1206},
- {54666, R_036914_SX_PERFCOUNTER1_SELECT1},
- {54690, R_036A00_GDS_PERFCOUNTER0_SELECT, 3, 1208},
- {54714, R_036A04_GDS_PERFCOUNTER1_SELECT, 3, 1208},
- {54738, R_036A08_GDS_PERFCOUNTER2_SELECT, 3, 1208},
- {54762, R_036A0C_GDS_PERFCOUNTER3_SELECT, 3, 1208},
- {54786, R_036A10_GDS_PERFCOUNTER0_SELECT1, 2, 1211},
- {54811, R_036B00_TA_PERFCOUNTER0_SELECT, 5, 1213},
- {54834, R_036B04_TA_PERFCOUNTER0_SELECT1, 4, 1218},
- {54858, R_036B08_TA_PERFCOUNTER1_SELECT, 5, 1213},
- {54881, R_036C00_TD_PERFCOUNTER0_SELECT, 5, 1222},
- {54904, R_036C04_TD_PERFCOUNTER0_SELECT1, 4, 1227},
- {54928, R_036C08_TD_PERFCOUNTER1_SELECT, 5, 1222},
- {54951, R_036D00_TCP_PERFCOUNTER0_SELECT, 5, 1231},
- {54975, R_036D04_TCP_PERFCOUNTER0_SELECT1, 4, 1236},
- {55000, R_036D08_TCP_PERFCOUNTER1_SELECT, 5, 1231},
- {55024, R_036D0C_TCP_PERFCOUNTER1_SELECT1},
- {55049, R_036D10_TCP_PERFCOUNTER2_SELECT, 5, 1231},
- {55073, R_036D14_TCP_PERFCOUNTER3_SELECT, 5, 1231},
- {55097, R_036E00_TCC_PERFCOUNTER0_SELECT, 5, 1240},
- {55121, R_036E04_TCC_PERFCOUNTER0_SELECT1, 4, 1245},
- {55146, R_036E08_TCC_PERFCOUNTER1_SELECT, 5, 1240},
- {55170, R_036E0C_TCC_PERFCOUNTER1_SELECT1},
- {55195, R_036E10_TCC_PERFCOUNTER2_SELECT, 5, 1240},
- {55219, R_036E14_TCC_PERFCOUNTER3_SELECT, 5, 1240},
- {55243, R_036E40_TCA_PERFCOUNTER0_SELECT, 5, 1249},
- {55267, R_036E44_TCA_PERFCOUNTER0_SELECT1, 4, 1254},
- {55292, R_036E48_TCA_PERFCOUNTER1_SELECT, 5, 1249},
- {55316, R_036E4C_TCA_PERFCOUNTER1_SELECT1},
- {55341, R_036E50_TCA_PERFCOUNTER2_SELECT, 5, 1249},
- {55365, R_036E54_TCA_PERFCOUNTER3_SELECT, 5, 1249},
- {55389, R_037000_CB_PERFCOUNTER_FILTER, 12, 1258},
- {55411, R_037004_CB_PERFCOUNTER0_SELECT, 5, 1270},
- {55434, R_037008_CB_PERFCOUNTER0_SELECT1, 4, 1275},
- {55458, R_03700C_CB_PERFCOUNTER1_SELECT, 5, 1270},
- {55481, R_037010_CB_PERFCOUNTER2_SELECT, 5, 1270},
- {55504, R_037014_CB_PERFCOUNTER3_SELECT, 5, 1270},
- {55527, R_037100_DB_PERFCOUNTER0_SELECT, 5, 1279},
- {55550, R_037104_DB_PERFCOUNTER0_SELECT1, 4, 1284},
- {55574, R_037108_DB_PERFCOUNTER1_SELECT, 5, 1279},
- {55597, R_03710C_DB_PERFCOUNTER1_SELECT1},
- {55621, R_037110_DB_PERFCOUNTER2_SELECT, 5, 1279},
- {55644, R_037118_DB_PERFCOUNTER3_SELECT, 5, 1279},
- {55667, R_028000_DB_RENDER_CONTROL, 10, 1288},
- {55685, R_028004_DB_COUNT_CONTROL, 9, 1298},
- {55702, R_028008_DB_DEPTH_VIEW, 5, 1307},
- {55716, R_02800C_DB_RENDER_OVERRIDE, 23, 1312},
- {55735, R_028010_DB_RENDER_OVERRIDE2, 16, 1335},
- {55755, R_028014_DB_HTILE_DATA_BASE},
- {55774, R_028020_DB_DEPTH_BOUNDS_MIN},
- {55794, R_028024_DB_DEPTH_BOUNDS_MAX},
- {55814, R_028028_DB_STENCIL_CLEAR, 1, 1351},
- {55831, R_02802C_DB_DEPTH_CLEAR},
- {55846, R_028030_PA_SC_SCREEN_SCISSOR_TL, 2, 1352},
- {55870, R_028034_PA_SC_SCREEN_SCISSOR_BR, 2, 1354},
- {55894, R_02803C_DB_DEPTH_INFO, 7, 1356},
- {55908, R_028040_DB_Z_INFO, 10, 1363},
- {55918, R_028044_DB_STENCIL_INFO, 6, 1373},
- {55934, R_028048_DB_Z_READ_BASE},
- {55949, R_02804C_DB_STENCIL_READ_BASE},
- {55970, R_028050_DB_Z_WRITE_BASE},
- {55986, R_028054_DB_STENCIL_WRITE_BASE},
- {56008, R_028058_DB_DEPTH_SIZE, 2, 1379},
- {56022, R_02805C_DB_DEPTH_SLICE, 1, 1381},
- {56037, R_028080_TA_BC_BASE_ADDR},
- {56053, R_028084_TA_BC_BASE_ADDR_HI, 1, 1382},
- {56072, R_0281E8_COHER_DEST_BASE_HI_0, 1, 1383},
- {56093, R_0281EC_COHER_DEST_BASE_HI_1, 1, 1384},
- {56114, R_0281F0_COHER_DEST_BASE_HI_2, 1, 1385},
- {56135, R_0281F4_COHER_DEST_BASE_HI_3, 1, 1386},
- {56156, R_0281F8_COHER_DEST_BASE_2},
- {56174, R_0281FC_COHER_DEST_BASE_3},
- {56192, R_028200_PA_SC_WINDOW_OFFSET, 2, 1387},
- {56212, R_028204_PA_SC_WINDOW_SCISSOR_TL, 3, 1389},
- {56236, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2, 1392},
- {56260, R_02820C_PA_SC_CLIPRECT_RULE, 1, 1394},
- {56280, R_028210_PA_SC_CLIPRECT_0_TL, 2, 1395},
- {56300, R_028214_PA_SC_CLIPRECT_0_BR, 2, 1397},
- {56320, R_028218_PA_SC_CLIPRECT_1_TL, 2, 1395},
- {56340, R_02821C_PA_SC_CLIPRECT_1_BR, 2, 1397},
- {56360, R_028220_PA_SC_CLIPRECT_2_TL, 2, 1395},
- {56380, R_028224_PA_SC_CLIPRECT_2_BR, 2, 1397},
- {56400, R_028228_PA_SC_CLIPRECT_3_TL, 2, 1395},
- {56420, R_02822C_PA_SC_CLIPRECT_3_BR, 2, 1397},
- {56440, R_028230_PA_SC_EDGERULE, 7, 1399},
- {56455, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 2, 1406},
- {56484, R_028238_CB_TARGET_MASK, 8, 1408},
- {56499, R_02823C_CB_SHADER_MASK, 8, 1416},
- {56514, R_028240_PA_SC_GENERIC_SCISSOR_TL, 3, 1424},
- {56539, R_028244_PA_SC_GENERIC_SCISSOR_BR, 2, 1427},
- {56564, R_028248_COHER_DEST_BASE_0},
- {56582, R_02824C_COHER_DEST_BASE_1},
- {56600, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 3, 1429},
- {56625, R_028254_PA_SC_VPORT_SCISSOR_0_BR, 2, 1432},
- {56650, R_028258_PA_SC_VPORT_SCISSOR_1_TL, 3, 1429},
- {56675, R_02825C_PA_SC_VPORT_SCISSOR_1_BR, 2, 1432},
- {56700, R_028260_PA_SC_VPORT_SCISSOR_2_TL, 3, 1429},
- {56725, R_028264_PA_SC_VPORT_SCISSOR_2_BR, 2, 1432},
- {56750, R_028268_PA_SC_VPORT_SCISSOR_3_TL, 3, 1429},
- {56775, R_02826C_PA_SC_VPORT_SCISSOR_3_BR, 2, 1432},
- {56800, R_028270_PA_SC_VPORT_SCISSOR_4_TL, 3, 1429},
- {56825, R_028274_PA_SC_VPORT_SCISSOR_4_BR, 2, 1432},
- {56850, R_028278_PA_SC_VPORT_SCISSOR_5_TL, 3, 1429},
- {56875, R_02827C_PA_SC_VPORT_SCISSOR_5_BR, 2, 1432},
- {56900, R_028280_PA_SC_VPORT_SCISSOR_6_TL, 3, 1429},
- {56925, R_028284_PA_SC_VPORT_SCISSOR_6_BR, 2, 1432},
- {56950, R_028288_PA_SC_VPORT_SCISSOR_7_TL, 3, 1429},
- {56975, R_02828C_PA_SC_VPORT_SCISSOR_7_BR, 2, 1432},
- {57000, R_028290_PA_SC_VPORT_SCISSOR_8_TL, 3, 1429},
- {57025, R_028294_PA_SC_VPORT_SCISSOR_8_BR, 2, 1432},
- {57050, R_028298_PA_SC_VPORT_SCISSOR_9_TL, 3, 1429},
- {57075, R_02829C_PA_SC_VPORT_SCISSOR_9_BR, 2, 1432},
- {57100, R_0282A0_PA_SC_VPORT_SCISSOR_10_TL, 3, 1429},
- {57126, R_0282A4_PA_SC_VPORT_SCISSOR_10_BR, 2, 1432},
- {57152, R_0282A8_PA_SC_VPORT_SCISSOR_11_TL, 3, 1429},
- {57178, R_0282AC_PA_SC_VPORT_SCISSOR_11_BR, 2, 1432},
- {57204, R_0282B0_PA_SC_VPORT_SCISSOR_12_TL, 3, 1429},
- {57230, R_0282B4_PA_SC_VPORT_SCISSOR_12_BR, 2, 1432},
- {57256, R_0282B8_PA_SC_VPORT_SCISSOR_13_TL, 3, 1429},
- {57282, R_0282BC_PA_SC_VPORT_SCISSOR_13_BR, 2, 1432},
- {57308, R_0282C0_PA_SC_VPORT_SCISSOR_14_TL, 3, 1429},
- {57334, R_0282C4_PA_SC_VPORT_SCISSOR_14_BR, 2, 1432},
- {57360, R_0282C8_PA_SC_VPORT_SCISSOR_15_TL, 3, 1429},
- {57386, R_0282CC_PA_SC_VPORT_SCISSOR_15_BR, 2, 1432},
- {57412, R_0282D0_PA_SC_VPORT_ZMIN_0},
- {57431, R_0282D4_PA_SC_VPORT_ZMAX_0},
- {57450, R_0282D8_PA_SC_VPORT_ZMIN_1},
- {57469, R_0282DC_PA_SC_VPORT_ZMAX_1},
- {57488, R_0282E0_PA_SC_VPORT_ZMIN_2},
- {57507, R_0282E4_PA_SC_VPORT_ZMAX_2},
- {57526, R_0282E8_PA_SC_VPORT_ZMIN_3},
- {57545, R_0282EC_PA_SC_VPORT_ZMAX_3},
- {57564, R_0282F0_PA_SC_VPORT_ZMIN_4},
- {57583, R_0282F4_PA_SC_VPORT_ZMAX_4},
- {57602, R_0282F8_PA_SC_VPORT_ZMIN_5},
- {57621, R_0282FC_PA_SC_VPORT_ZMAX_5},
- {57640, R_028300_PA_SC_VPORT_ZMIN_6},
- {57659, R_028304_PA_SC_VPORT_ZMAX_6},
- {57678, R_028308_PA_SC_VPORT_ZMIN_7},
- {57697, R_02830C_PA_SC_VPORT_ZMAX_7},
- {57716, R_028310_PA_SC_VPORT_ZMIN_8},
- {57735, R_028314_PA_SC_VPORT_ZMAX_8},
- {57754, R_028318_PA_SC_VPORT_ZMIN_9},
- {57773, R_02831C_PA_SC_VPORT_ZMAX_9},
- {57792, R_028320_PA_SC_VPORT_ZMIN_10},
- {57812, R_028324_PA_SC_VPORT_ZMAX_10},
- {57832, R_028328_PA_SC_VPORT_ZMIN_11},
- {57852, R_02832C_PA_SC_VPORT_ZMAX_11},
- {57872, R_028330_PA_SC_VPORT_ZMIN_12},
- {57892, R_028334_PA_SC_VPORT_ZMAX_12},
- {57912, R_028338_PA_SC_VPORT_ZMIN_13},
- {57932, R_02833C_PA_SC_VPORT_ZMAX_13},
- {57952, R_028340_PA_SC_VPORT_ZMIN_14},
- {57972, R_028344_PA_SC_VPORT_ZMAX_14},
- {57992, R_028348_PA_SC_VPORT_ZMIN_15},
- {58012, R_02834C_PA_SC_VPORT_ZMAX_15},
- {58032, R_028350_PA_SC_RASTER_CONFIG, 17, 1434},
- {58052, R_028354_PA_SC_RASTER_CONFIG_1, 5, 1451},
- {58074, R_028358_PA_SC_SCREEN_EXTENT_CONTROL, 2, 1456},
- {58102, R_028400_VGT_MAX_VTX_INDX},
- {58119, R_028404_VGT_MIN_VTX_INDX},
- {58136, R_028408_VGT_INDX_OFFSET},
- {58152, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX},
- {58181, R_028414_CB_BLEND_RED},
- {58194, R_028418_CB_BLEND_GREEN},
- {58209, R_02841C_CB_BLEND_BLUE},
- {58223, R_028420_CB_BLEND_ALPHA},
- {58238, R_028424_CB_DCC_CONTROL, 3, 1458},
- {58253, R_02842C_DB_STENCIL_CONTROL, 6, 1461},
- {58272, R_028430_DB_STENCILREFMASK, 4, 1467},
- {58290, R_028434_DB_STENCILREFMASK_BF, 4, 1471},
- {58311, R_02843C_PA_CL_VPORT_XSCALE},
- {58330, R_028440_PA_CL_VPORT_XOFFSET},
- {58350, R_028444_PA_CL_VPORT_YSCALE},
- {58369, R_028448_PA_CL_VPORT_YOFFSET},
- {58389, R_02844C_PA_CL_VPORT_ZSCALE},
- {58408, R_028450_PA_CL_VPORT_ZOFFSET},
- {58428, R_028454_PA_CL_VPORT_XSCALE_1},
- {58449, R_028458_PA_CL_VPORT_XOFFSET_1},
- {58471, R_02845C_PA_CL_VPORT_YSCALE_1},
- {58492, R_028460_PA_CL_VPORT_YOFFSET_1},
- {58514, R_028464_PA_CL_VPORT_ZSCALE_1},
- {58535, R_028468_PA_CL_VPORT_ZOFFSET_1},
- {58557, R_02846C_PA_CL_VPORT_XSCALE_2},
- {58578, R_028470_PA_CL_VPORT_XOFFSET_2},
- {58600, R_028474_PA_CL_VPORT_YSCALE_2},
- {58621, R_028478_PA_CL_VPORT_YOFFSET_2},
- {58643, R_02847C_PA_CL_VPORT_ZSCALE_2},
- {58664, R_028480_PA_CL_VPORT_ZOFFSET_2},
- {58686, R_028484_PA_CL_VPORT_XSCALE_3},
- {58707, R_028488_PA_CL_VPORT_XOFFSET_3},
- {58729, R_02848C_PA_CL_VPORT_YSCALE_3},
- {58750, R_028490_PA_CL_VPORT_YOFFSET_3},
- {58772, R_028494_PA_CL_VPORT_ZSCALE_3},
- {58793, R_028498_PA_CL_VPORT_ZOFFSET_3},
- {58815, R_02849C_PA_CL_VPORT_XSCALE_4},
- {58836, R_0284A0_PA_CL_VPORT_XOFFSET_4},
- {58858, R_0284A4_PA_CL_VPORT_YSCALE_4},
- {58879, R_0284A8_PA_CL_VPORT_YOFFSET_4},
- {58901, R_0284AC_PA_CL_VPORT_ZSCALE_4},
- {58922, R_0284B0_PA_CL_VPORT_ZOFFSET_4},
- {58944, R_0284B4_PA_CL_VPORT_XSCALE_5},
- {58965, R_0284B8_PA_CL_VPORT_XOFFSET_5},
- {58987, R_0284BC_PA_CL_VPORT_YSCALE_5},
- {59008, R_0284C0_PA_CL_VPORT_YOFFSET_5},
- {59030, R_0284C4_PA_CL_VPORT_ZSCALE_5},
- {59051, R_0284C8_PA_CL_VPORT_ZOFFSET_5},
- {59073, R_0284CC_PA_CL_VPORT_XSCALE_6},
- {59094, R_0284D0_PA_CL_VPORT_XOFFSET_6},
- {59116, R_0284D4_PA_CL_VPORT_YSCALE_6},
- {59137, R_0284D8_PA_CL_VPORT_YOFFSET_6},
- {59159, R_0284DC_PA_CL_VPORT_ZSCALE_6},
- {59180, R_0284E0_PA_CL_VPORT_ZOFFSET_6},
- {59202, R_0284E4_PA_CL_VPORT_XSCALE_7},
- {59223, R_0284E8_PA_CL_VPORT_XOFFSET_7},
- {59245, R_0284EC_PA_CL_VPORT_YSCALE_7},
- {59266, R_0284F0_PA_CL_VPORT_YOFFSET_7},
- {59288, R_0284F4_PA_CL_VPORT_ZSCALE_7},
- {59309, R_0284F8_PA_CL_VPORT_ZOFFSET_7},
- {59331, R_0284FC_PA_CL_VPORT_XSCALE_8},
- {59352, R_028500_PA_CL_VPORT_XOFFSET_8},
- {59374, R_028504_PA_CL_VPORT_YSCALE_8},
- {59395, R_028508_PA_CL_VPORT_YOFFSET_8},
- {59417, R_02850C_PA_CL_VPORT_ZSCALE_8},
- {59438, R_028510_PA_CL_VPORT_ZOFFSET_8},
- {59460, R_028514_PA_CL_VPORT_XSCALE_9},
- {59481, R_028518_PA_CL_VPORT_XOFFSET_9},
- {59503, R_02851C_PA_CL_VPORT_YSCALE_9},
- {59524, R_028520_PA_CL_VPORT_YOFFSET_9},
- {59546, R_028524_PA_CL_VPORT_ZSCALE_9},
- {59567, R_028528_PA_CL_VPORT_ZOFFSET_9},
- {59589, R_02852C_PA_CL_VPORT_XSCALE_10},
- {59611, R_028530_PA_CL_VPORT_XOFFSET_10},
- {59634, R_028534_PA_CL_VPORT_YSCALE_10},
- {59656, R_028538_PA_CL_VPORT_YOFFSET_10},
- {59679, R_02853C_PA_CL_VPORT_ZSCALE_10},
- {59701, R_028540_PA_CL_VPORT_ZOFFSET_10},
- {59724, R_028544_PA_CL_VPORT_XSCALE_11},
- {59746, R_028548_PA_CL_VPORT_XOFFSET_11},
- {59769, R_02854C_PA_CL_VPORT_YSCALE_11},
- {59791, R_028550_PA_CL_VPORT_YOFFSET_11},
- {59814, R_028554_PA_CL_VPORT_ZSCALE_11},
- {59836, R_028558_PA_CL_VPORT_ZOFFSET_11},
- {59859, R_02855C_PA_CL_VPORT_XSCALE_12},
- {59881, R_028560_PA_CL_VPORT_XOFFSET_12},
- {59904, R_028564_PA_CL_VPORT_YSCALE_12},
- {59926, R_028568_PA_CL_VPORT_YOFFSET_12},
- {59949, R_02856C_PA_CL_VPORT_ZSCALE_12},
- {59971, R_028570_PA_CL_VPORT_ZOFFSET_12},
- {59994, R_028574_PA_CL_VPORT_XSCALE_13},
- {60016, R_028578_PA_CL_VPORT_XOFFSET_13},
- {60039, R_02857C_PA_CL_VPORT_YSCALE_13},
- {60061, R_028580_PA_CL_VPORT_YOFFSET_13},
- {60084, R_028584_PA_CL_VPORT_ZSCALE_13},
- {60106, R_028588_PA_CL_VPORT_ZOFFSET_13},
- {60129, R_02858C_PA_CL_VPORT_XSCALE_14},
- {60151, R_028590_PA_CL_VPORT_XOFFSET_14},
- {60174, R_028594_PA_CL_VPORT_YSCALE_14},
- {60196, R_028598_PA_CL_VPORT_YOFFSET_14},
- {60219, R_02859C_PA_CL_VPORT_ZSCALE_14},
- {60241, R_0285A0_PA_CL_VPORT_ZOFFSET_14},
- {60264, R_0285A4_PA_CL_VPORT_XSCALE_15},
- {60286, R_0285A8_PA_CL_VPORT_XOFFSET_15},
- {60309, R_0285AC_PA_CL_VPORT_YSCALE_15},
- {60331, R_0285B0_PA_CL_VPORT_YOFFSET_15},
- {60354, R_0285B4_PA_CL_VPORT_ZSCALE_15},
- {60376, R_0285B8_PA_CL_VPORT_ZOFFSET_15},
- {60399, R_0285BC_PA_CL_UCP_0_X},
- {60413, R_0285C0_PA_CL_UCP_0_Y},
- {60427, R_0285C4_PA_CL_UCP_0_Z},
- {60441, R_0285C8_PA_CL_UCP_0_W},
- {60455, R_0285CC_PA_CL_UCP_1_X},
- {60469, R_0285D0_PA_CL_UCP_1_Y},
- {60483, R_0285D4_PA_CL_UCP_1_Z},
- {60497, R_0285D8_PA_CL_UCP_1_W},
- {60511, R_0285DC_PA_CL_UCP_2_X},
- {60525, R_0285E0_PA_CL_UCP_2_Y},
- {60539, R_0285E4_PA_CL_UCP_2_Z},
- {60553, R_0285E8_PA_CL_UCP_2_W},
- {60567, R_0285EC_PA_CL_UCP_3_X},
- {60581, R_0285F0_PA_CL_UCP_3_Y},
- {60595, R_0285F4_PA_CL_UCP_3_Z},
- {60609, R_0285F8_PA_CL_UCP_3_W},
- {60623, R_0285FC_PA_CL_UCP_4_X},
- {60637, R_028600_PA_CL_UCP_4_Y},
- {60651, R_028604_PA_CL_UCP_4_Z},
- {60665, R_028608_PA_CL_UCP_4_W},
- {60679, R_02860C_PA_CL_UCP_5_X},
- {60693, R_028610_PA_CL_UCP_5_Y},
- {60707, R_028614_PA_CL_UCP_5_Z},
- {60721, R_028618_PA_CL_UCP_5_W},
- {60735, R_028644_SPI_PS_INPUT_CNTL_0, 12, 1475},
- {60755, R_028648_SPI_PS_INPUT_CNTL_1, 12, 1475},
- {60775, R_02864C_SPI_PS_INPUT_CNTL_2, 12, 1475},
- {60795, R_028650_SPI_PS_INPUT_CNTL_3, 12, 1475},
- {60815, R_028654_SPI_PS_INPUT_CNTL_4, 12, 1475},
- {60835, R_028658_SPI_PS_INPUT_CNTL_5, 12, 1475},
- {60855, R_02865C_SPI_PS_INPUT_CNTL_6, 12, 1475},
- {60875, R_028660_SPI_PS_INPUT_CNTL_7, 12, 1475},
- {60895, R_028664_SPI_PS_INPUT_CNTL_8, 12, 1475},
- {60915, R_028668_SPI_PS_INPUT_CNTL_9, 12, 1475},
- {60935, R_02866C_SPI_PS_INPUT_CNTL_10, 12, 1475},
- {60956, R_028670_SPI_PS_INPUT_CNTL_11, 12, 1475},
- {60977, R_028674_SPI_PS_INPUT_CNTL_12, 12, 1475},
- {60998, R_028678_SPI_PS_INPUT_CNTL_13, 12, 1475},
- {61019, R_02867C_SPI_PS_INPUT_CNTL_14, 12, 1475},
- {61040, R_028680_SPI_PS_INPUT_CNTL_15, 12, 1475},
- {61061, R_028684_SPI_PS_INPUT_CNTL_16, 12, 1475},
- {61082, R_028688_SPI_PS_INPUT_CNTL_17, 12, 1475},
- {61103, R_02868C_SPI_PS_INPUT_CNTL_18, 12, 1475},
- {61124, R_028690_SPI_PS_INPUT_CNTL_19, 12, 1475},
- {61145, R_028694_SPI_PS_INPUT_CNTL_20, 12, 1475},
- {61166, R_028698_SPI_PS_INPUT_CNTL_21, 12, 1475},
- {61187, R_02869C_SPI_PS_INPUT_CNTL_22, 12, 1475},
- {61208, R_0286A0_SPI_PS_INPUT_CNTL_23, 12, 1475},
- {61229, R_0286A4_SPI_PS_INPUT_CNTL_24, 12, 1475},
- {61250, R_0286A8_SPI_PS_INPUT_CNTL_25, 12, 1475},
- {61271, R_0286AC_SPI_PS_INPUT_CNTL_26, 12, 1475},
- {61292, R_0286B0_SPI_PS_INPUT_CNTL_27, 12, 1475},
- {61313, R_0286B4_SPI_PS_INPUT_CNTL_28, 12, 1475},
- {61334, R_0286B8_SPI_PS_INPUT_CNTL_29, 12, 1475},
- {61355, R_0286BC_SPI_PS_INPUT_CNTL_30, 12, 1475},
- {61376, R_0286C0_SPI_PS_INPUT_CNTL_31, 12, 1475},
- {61397, R_0286C4_SPI_VS_OUT_CONFIG, 4, 1487},
- {61415, R_0286CC_SPI_PS_INPUT_ENA, 16, 1491},
- {61432, R_0286D0_SPI_PS_INPUT_ADDR, 16, 1507},
- {61450, R_0286D4_SPI_INTERP_CONTROL_0, 7, 1523},
- {61471, R_0286D8_SPI_PS_IN_CONTROL, 7, 1530},
- {61489, R_0286E0_SPI_BARYC_CNTL, 7, 1537},
- {61504, R_0286E8_SPI_TMPRING_SIZE, 2, 1544},
- {61521, R_028704_SPI_WAVE_MGMT_1, 5, 1546},
- {61537, R_028708_SPI_WAVE_MGMT_2, 1, 1551},
- {61553, R_02870C_SPI_SHADER_POS_FORMAT, 4, 1552},
- {61575, R_028710_SPI_SHADER_Z_FORMAT, 1, 1556},
- {61595, R_028714_SPI_SHADER_COL_FORMAT, 8, 1557},
- {61617, R_028754_SX_PS_DOWNCONVERT, 8, 1565},
- {61635, R_028758_SX_BLEND_OPT_EPSILON, 8, 1573},
- {61656, R_02875C_SX_BLEND_OPT_CONTROL, 17, 1581},
- {61677, R_028760_SX_MRT0_BLEND_OPT, 6, 1598},
- {61695, R_028764_SX_MRT1_BLEND_OPT, 6, 1604},
- {61713, R_028768_SX_MRT2_BLEND_OPT, 6, 1610},
- {61731, R_02876C_SX_MRT3_BLEND_OPT, 6, 1616},
- {61749, R_028770_SX_MRT4_BLEND_OPT, 6, 1622},
- {61767, R_028774_SX_MRT5_BLEND_OPT, 6, 1628},
- {61785, R_028778_SX_MRT6_BLEND_OPT, 6, 1634},
- {61803, R_02877C_SX_MRT7_BLEND_OPT, 6, 1640},
- {61821, R_028780_CB_BLEND0_CONTROL, 9, 1646},
- {61839, R_028784_CB_BLEND1_CONTROL, 9, 1646},
- {61857, R_028788_CB_BLEND2_CONTROL, 9, 1646},
- {61875, R_02878C_CB_BLEND3_CONTROL, 9, 1646},
- {61893, R_028790_CB_BLEND4_CONTROL, 9, 1646},
- {61911, R_028794_CB_BLEND5_CONTROL, 9, 1646},
- {61929, R_028798_CB_BLEND6_CONTROL, 9, 1646},
- {61947, R_02879C_CB_BLEND7_CONTROL, 9, 1646},
- {61965, R_0287CC_CS_COPY_STATE, 1, 1655},
- {61979, R_0287D4_PA_CL_POINT_X_RAD},
- {61997, R_0287D8_PA_CL_POINT_Y_RAD},
- {62015, R_0287DC_PA_CL_POINT_SIZE},
- {62032, R_0287E0_PA_CL_POINT_CULL_RAD},
- {62053, R_0287E4_VGT_DMA_BASE_HI, 2, 1656},
- {62069, R_0287E8_VGT_DMA_BASE},
- {62082, R_0287F0_VGT_DRAW_INITIATOR, 8, 1658},
- {62101, R_0287F4_VGT_IMMED_DATA},
- {62116, R_0287F8_VGT_EVENT_ADDRESS_REG, 1, 1666},
- {62138, R_028800_DB_DEPTH_CONTROL, 10, 1667},
- {62155, R_028804_DB_EQAA, 12, 1677},
- {62163, R_028808_CB_COLOR_CONTROL, 4, 1689},
- {62180, R_02880C_DB_SHADER_CONTROL, 16, 1693},
- {62198, R_028810_PA_CL_CLIP_CNTL, 19, 1709},
- {62214, R_028814_PA_SU_SC_MODE_CNTL, 15, 1728},
- {62233, R_028818_PA_CL_VTE_CNTL, 10, 1743},
- {62248, R_02881C_PA_CL_VS_OUT_CNTL, 28, 1753},
- {62266, R_028820_PA_CL_NANINF_CNTL, 16, 1781},
- {62284, R_028824_PA_SU_LINE_STIPPLE_CNTL, 4, 1797},
- {62308, R_028828_PA_SU_LINE_STIPPLE_SCALE},
- {62333, R_02882C_PA_SU_PRIM_FILTER_CNTL, 11, 1801},
- {62356, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 6, 1812},
- {62385, R_028A00_PA_SU_POINT_SIZE, 2, 1818},
- {62402, R_028A04_PA_SU_POINT_MINMAX, 2, 1820},
- {62421, R_028A08_PA_SU_LINE_CNTL, 1, 1822},
- {62437, R_028A0C_PA_SC_LINE_STIPPLE, 4, 1823},
- {62456, R_028A10_VGT_OUTPUT_PATH_CNTL, 1, 1827},
- {62477, R_028A14_VGT_HOS_CNTL, 1, 1828},
- {62490, R_028A18_VGT_HOS_MAX_TESS_LEVEL},
- {62513, R_028A1C_VGT_HOS_MIN_TESS_LEVEL},
- {62536, R_028A20_VGT_HOS_REUSE_DEPTH, 1, 1829},
- {62556, R_028A24_VGT_GROUP_PRIM_TYPE, 4, 1830},
- {62576, R_028A28_VGT_GROUP_FIRST_DECR, 1, 1834},
- {62597, R_028A2C_VGT_GROUP_DECR, 1, 1835},
- {62612, R_028A30_VGT_GROUP_VECT_0_CNTL, 6, 1836},
- {62634, R_028A34_VGT_GROUP_VECT_1_CNTL, 6, 1842},
- {62656, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 8, 1848},
- {62682, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 8, 1856},
- {62708, R_028A40_VGT_GS_MODE, 18, 1864},
- {62720, R_028A44_VGT_GS_ONCHIP_CNTL, 3, 1882},
- {62739, R_028A48_PA_SC_MODE_CNTL_0, 7, 1885},
- {62757, R_028A4C_PA_SC_MODE_CNTL_1, 24, 1892},
- {62775, R_028A50_VGT_ENHANCE},
- {62787, R_028A54_VGT_GS_PER_ES, 1, 1916},
- {62801, R_028A58_VGT_ES_PER_GS, 1, 1917},
- {62815, R_028A5C_VGT_GS_PER_VS, 1, 1918},
- {62829, R_028A60_VGT_GSVS_RING_OFFSET_1, 1, 1919},
- {62852, R_028A64_VGT_GSVS_RING_OFFSET_2, 1, 1920},
- {62875, R_028A68_VGT_GSVS_RING_OFFSET_3, 1, 1921},
- {62898, R_028A6C_VGT_GS_OUT_PRIM_TYPE, 5, 1922},
- {62919, R_028A70_IA_ENHANCE},
- {62930, R_028A74_VGT_DMA_SIZE},
- {62943, R_028A78_VGT_DMA_MAX_SIZE},
- {62960, R_028A7C_VGT_DMA_INDEX_TYPE, 10, 1927},
- {62979, R_028A80_WD_ENHANCE},
- {62990, R_028A84_VGT_PRIMITIVEID_EN, 3, 1937},
- {63009, R_028A88_VGT_DMA_NUM_INSTANCES},
- {63031, R_028A8C_VGT_PRIMITIVEID_RESET},
- {63053, R_028A90_VGT_EVENT_INITIATOR, 4, 1940},
- {63073, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 1, 1944},
- {63100, R_028AA0_VGT_INSTANCE_STEP_RATE_0},
- {63125, R_028AA4_VGT_INSTANCE_STEP_RATE_1},
- {63150, R_028AA8_IA_MULTI_VGT_PARAM, 7, 1945},
- {63169, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1, 1952},
- {63192, R_028AB0_VGT_GSVS_RING_ITEMSIZE, 1, 1953},
- {63215, R_028AB4_VGT_REUSE_OFF, 1, 1954},
- {63229, R_028AB8_VGT_VTX_CNT_EN, 1, 1955},
- {63244, R_028ABC_DB_HTILE_SURFACE, 10, 1956},
- {63261, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 4, 1966},
- {63288, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 4, 1970},
- {63315, R_028AC8_DB_PRELOAD_CONTROL, 4, 1974},
- {63334, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0},
- {63360, R_028AD4_VGT_STRMOUT_VTX_STRIDE_0, 1, 1978},
- {63385, R_028ADC_VGT_STRMOUT_BUFFER_OFFSET_0},
- {63413, R_028AE0_VGT_STRMOUT_BUFFER_SIZE_1},
- {63439, R_028AE4_VGT_STRMOUT_VTX_STRIDE_1, 1, 1979},
- {63464, R_028AEC_VGT_STRMOUT_BUFFER_OFFSET_1},
- {63492, R_028AF0_VGT_STRMOUT_BUFFER_SIZE_2},
- {63518, R_028AF4_VGT_STRMOUT_VTX_STRIDE_2, 1, 1980},
- {63543, R_028AFC_VGT_STRMOUT_BUFFER_OFFSET_2},
- {63571, R_028B00_VGT_STRMOUT_BUFFER_SIZE_3},
- {63597, R_028B04_VGT_STRMOUT_VTX_STRIDE_3, 1, 1981},
- {63622, R_028B0C_VGT_STRMOUT_BUFFER_OFFSET_3},
- {63650, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET},
- {63681, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE},
- {63724, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, 1, 1982},
- {63762, R_028B38_VGT_GS_MAX_VERT_OUT, 1, 1983},
- {63782, R_028B50_VGT_TESS_DISTRIBUTION, 5, 1984},
- {63804, R_028B54_VGT_SHADER_STAGES_EN, 14, 1989},
- {63825, R_028B58_VGT_LS_HS_CONFIG, 3, 2003},
- {63842, R_028B5C_VGT_GS_VERT_ITEMSIZE, 1, 2006},
- {63863, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 1, 2007},
- {63886, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 1, 2008},
- {63909, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 1, 2009},
- {63932, R_028B6C_VGT_TF_PARAM, 11, 2010},
- {63945, R_028B70_DB_ALPHA_TO_MASK, 6, 2021},
- {63962, R_028B74_VGT_DISPATCH_DRAW_INDEX},
- {63986, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2, 2027},
- {64016, R_028B7C_PA_SU_POLY_OFFSET_CLAMP},
- {64040, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE},
- {64070, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET},
- {64101, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE},
- {64130, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET},
- {64160, R_028B90_VGT_GS_INSTANCE_CNT, 2, 2029},
- {64180, R_028B94_VGT_STRMOUT_CONFIG, 8, 2031},
- {64199, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 4, 2039},
- {64225, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 8, 2043},
- {64251, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 8, 2051},
- {64277, R_028BDC_PA_SC_LINE_CNTL, 4, 2059},
- {64293, R_028BE0_PA_SC_AA_CONFIG, 6, 2063},
- {64309, R_028BE4_PA_SU_VTX_CNTL, 3, 2069},
- {64324, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ},
- {64347, R_028BEC_PA_CL_GB_VERT_DISC_ADJ},
- {64370, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ},
- {64393, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ},
- {64416, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 8, 2072},
- {64450, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, 8, 2080},
- {64484, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 8, 2088},
- {64518, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 8, 2096},
- {64552, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 8, 2104},
- {64586, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, 8, 2112},
- {64620, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 8, 2120},
- {64654, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 8, 2128},
- {64688, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 8, 2136},
- {64722, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, 8, 2144},
- {64756, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 8, 2152},
- {64790, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 8, 2160},
- {64824, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 8, 2168},
- {64858, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, 8, 2176},
- {64892, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, 8, 2184},
- {64926, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, 8, 2192},
- {64960, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2, 2200},
- {64984, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 2, 2202},
- {65008, R_028C40_PA_SC_SHADER_CONTROL, 3, 2204},
- {65029, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 1, 2207},
- {65057, R_028C5C_VGT_OUT_DEALLOC_CNTL, 1, 2208},
- {65078, R_028C60_CB_COLOR0_BASE},
- {65093, R_028C64_CB_COLOR0_PITCH, 2, 2209},
- {65109, R_028C68_CB_COLOR0_SLICE, 1, 2211},
- {65125, R_028C6C_CB_COLOR0_VIEW, 3, 2212},
- {65140, R_028C70_CB_COLOR0_INFO, 18, 2215},
- {65155, R_028C74_CB_COLOR0_ATTRIB, 13, 2233},
- {65172, R_028C78_CB_COLOR0_DCC_CONTROL, 9, 2246},
- {65194, R_028C7C_CB_COLOR0_CMASK},
- {65210, R_028C80_CB_COLOR0_CMASK_SLICE, 1, 2255},
- {65232, R_028C84_CB_COLOR0_FMASK},
- {65248, R_028C88_CB_COLOR0_FMASK_SLICE, 1, 2256},
- {65270, R_028C8C_CB_COLOR0_CLEAR_WORD0},
- {65292, R_028C90_CB_COLOR0_CLEAR_WORD1},
- {65314, R_028C94_CB_COLOR0_DCC_BASE},
- {65333, R_028C9C_CB_COLOR1_BASE},
- {65348, R_028CA0_CB_COLOR1_PITCH, 2, 2209},
- {65364, R_028CA4_CB_COLOR1_SLICE, 1, 2211},
- {65380, R_028CA8_CB_COLOR1_VIEW, 3, 2212},
- {65395, R_028CAC_CB_COLOR1_INFO, 18, 2215},
- {65410, R_028CB0_CB_COLOR1_ATTRIB, 13, 2233},
- {65427, R_028CB4_CB_COLOR1_DCC_CONTROL, 9, 2246},
- {65449, R_028CB8_CB_COLOR1_CMASK},
- {65465, R_028CBC_CB_COLOR1_CMASK_SLICE, 1, 2255},
- {65487, R_028CC0_CB_COLOR1_FMASK},
- {65503, R_028CC4_CB_COLOR1_FMASK_SLICE, 1, 2256},
- {65525, R_028CC8_CB_COLOR1_CLEAR_WORD0},
- {65547, R_028CCC_CB_COLOR1_CLEAR_WORD1},
- {65569, R_028CD0_CB_COLOR1_DCC_BASE},
- {65588, R_028CD8_CB_COLOR2_BASE},
- {65603, R_028CDC_CB_COLOR2_PITCH, 2, 2209},
- {65619, R_028CE0_CB_COLOR2_SLICE, 1, 2211},
- {65635, R_028CE4_CB_COLOR2_VIEW, 3, 2212},
- {65650, R_028CE8_CB_COLOR2_INFO, 18, 2215},
- {65665, R_028CEC_CB_COLOR2_ATTRIB, 13, 2233},
- {65682, R_028CF0_CB_COLOR2_DCC_CONTROL, 9, 2246},
- {65704, R_028CF4_CB_COLOR2_CMASK},
- {65720, R_028CF8_CB_COLOR2_CMASK_SLICE, 1, 2255},
- {65742, R_028CFC_CB_COLOR2_FMASK},
- {65758, R_028D00_CB_COLOR2_FMASK_SLICE, 1, 2256},
- {65780, R_028D04_CB_COLOR2_CLEAR_WORD0},
- {65802, R_028D08_CB_COLOR2_CLEAR_WORD1},
- {65824, R_028D0C_CB_COLOR2_DCC_BASE},
- {65843, R_028D14_CB_COLOR3_BASE},
- {65858, R_028D18_CB_COLOR3_PITCH, 2, 2209},
- {65874, R_028D1C_CB_COLOR3_SLICE, 1, 2211},
- {65890, R_028D20_CB_COLOR3_VIEW, 3, 2212},
- {65905, R_028D24_CB_COLOR3_INFO, 18, 2215},
- {65920, R_028D28_CB_COLOR3_ATTRIB, 13, 2233},
- {65937, R_028D2C_CB_COLOR3_DCC_CONTROL, 9, 2246},
- {65959, R_028D30_CB_COLOR3_CMASK},
- {65975, R_028D34_CB_COLOR3_CMASK_SLICE, 1, 2255},
- {65997, R_028D38_CB_COLOR3_FMASK},
- {66013, R_028D3C_CB_COLOR3_FMASK_SLICE, 1, 2256},
- {66035, R_028D40_CB_COLOR3_CLEAR_WORD0},
- {66057, R_028D44_CB_COLOR3_CLEAR_WORD1},
- {66079, R_028D48_CB_COLOR3_DCC_BASE},
- {66098, R_028D50_CB_COLOR4_BASE},
- {66113, R_028D54_CB_COLOR4_PITCH, 2, 2209},
- {66129, R_028D58_CB_COLOR4_SLICE, 1, 2211},
- {66145, R_028D5C_CB_COLOR4_VIEW, 3, 2212},
- {66160, R_028D60_CB_COLOR4_INFO, 18, 2215},
- {66175, R_028D64_CB_COLOR4_ATTRIB, 13, 2233},
- {66192, R_028D68_CB_COLOR4_DCC_CONTROL, 9, 2246},
- {66214, R_028D6C_CB_COLOR4_CMASK},
- {66230, R_028D70_CB_COLOR4_CMASK_SLICE, 1, 2255},
- {66252, R_028D74_CB_COLOR4_FMASK},
- {66268, R_028D78_CB_COLOR4_FMASK_SLICE, 1, 2256},
- {66290, R_028D7C_CB_COLOR4_CLEAR_WORD0},
- {66312, R_028D80_CB_COLOR4_CLEAR_WORD1},
- {66334, R_028D84_CB_COLOR4_DCC_BASE},
- {66353, R_028D8C_CB_COLOR5_BASE},
- {66368, R_028D90_CB_COLOR5_PITCH, 2, 2209},
- {66384, R_028D94_CB_COLOR5_SLICE, 1, 2211},
- {66400, R_028D98_CB_COLOR5_VIEW, 3, 2212},
- {66415, R_028D9C_CB_COLOR5_INFO, 18, 2215},
- {66430, R_028DA0_CB_COLOR5_ATTRIB, 13, 2233},
- {66447, R_028DA4_CB_COLOR5_DCC_CONTROL, 9, 2246},
- {66469, R_028DA8_CB_COLOR5_CMASK},
- {66485, R_028DAC_CB_COLOR5_CMASK_SLICE, 1, 2255},
- {66507, R_028DB0_CB_COLOR5_FMASK},
- {66523, R_028DB4_CB_COLOR5_FMASK_SLICE, 1, 2256},
- {66545, R_028DB8_CB_COLOR5_CLEAR_WORD0},
- {66567, R_028DBC_CB_COLOR5_CLEAR_WORD1},
- {66589, R_028DC0_CB_COLOR5_DCC_BASE},
- {66608, R_028DC8_CB_COLOR6_BASE},
- {66623, R_028DCC_CB_COLOR6_PITCH, 2, 2209},
- {66639, R_028DD0_CB_COLOR6_SLICE, 1, 2211},
- {66655, R_028DD4_CB_COLOR6_VIEW, 3, 2212},
- {66670, R_028DD8_CB_COLOR6_INFO, 18, 2215},
- {66685, R_028DDC_CB_COLOR6_ATTRIB, 13, 2233},
- {66702, R_028DE0_CB_COLOR6_DCC_CONTROL, 9, 2246},
- {66724, R_028DE4_CB_COLOR6_CMASK},
- {66740, R_028DE8_CB_COLOR6_CMASK_SLICE, 1, 2255},
- {66762, R_028DEC_CB_COLOR6_FMASK},
- {66778, R_028DF0_CB_COLOR6_FMASK_SLICE, 1, 2256},
- {66800, R_028DF4_CB_COLOR6_CLEAR_WORD0},
- {66822, R_028DF8_CB_COLOR6_CLEAR_WORD1},
- {66844, R_028DFC_CB_COLOR6_DCC_BASE},
- {66863, R_028E04_CB_COLOR7_BASE},
- {66878, R_028E08_CB_COLOR7_PITCH, 2, 2209},
- {66894, R_028E0C_CB_COLOR7_SLICE, 1, 2211},
- {66910, R_028E10_CB_COLOR7_VIEW, 3, 2212},
- {66925, R_028E14_CB_COLOR7_INFO, 18, 2215},
- {66940, R_028E18_CB_COLOR7_ATTRIB, 13, 2233},
- {66957, R_028E1C_CB_COLOR7_DCC_CONTROL, 9, 2246},
- {66979, R_028E20_CB_COLOR7_CMASK},
- {66995, R_028E24_CB_COLOR7_CMASK_SLICE, 1, 2255},
- {67017, R_028E28_CB_COLOR7_FMASK},
- {67033, R_028E2C_CB_COLOR7_FMASK_SLICE, 1, 2256},
- {67055, R_028E30_CB_COLOR7_CLEAR_WORD0},
- {67077, R_028E34_CB_COLOR7_CLEAR_WORD1},
- {67099, R_028E38_CB_COLOR7_DCC_BASE},
- {58102, R_030920_VGT_MAX_VTX_INDX},
- {58119, R_030924_VGT_MIN_VTX_INDX},
- {58136, R_030928_VGT_INDX_OFFSET},
- {63073, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, 2, 2257},
- {67118, R_030944_VGT_TF_MEMORY_BASE_HI, 1, 2259},
- {67140, R_030948_WD_POS_BUF_BASE},
- {67156, R_03094C_WD_POS_BUF_BASE_HI, 1, 2260},
- {67175, R_030950_WD_CNTL_SB_BUF_BASE},
- {67195, R_030954_WD_CNTL_SB_BUF_BASE_HI, 1, 2261},
- {67218, R_030958_WD_INDEX_BUF_BASE},
- {67236, R_03095C_WD_INDEX_BUF_BASE_HI, 1, 2262},
- {63150, R_030960_IA_MULTI_VGT_PARAM, 9, 2263},
- {67257, R_030964_VGT_OBJECT_ID},
- {67271, R_030968_VGT_INSTANCE_BASE_ID},
- {67292, R_030D24_SQC_WRITEBACK, 2, 2272},
- {67306, R_030E08_TA_GRAD_ADJ_UCONFIG, 4, 2274},
- {43236, R_031100_SPI_CONFIG_CNTL, 9, 2278},
- {43252, R_031104_SPI_CONFIG_CNTL_1, 11, 2287},
- {67326, R_031108_SPI_CONFIG_CNTL_2, 2, 2298},
- {67344, R_00B070_SPI_SHADER_USER_DATA_PS_16},
- {67371, R_00B074_SPI_SHADER_USER_DATA_PS_17},
- {67398, R_00B078_SPI_SHADER_USER_DATA_PS_18},
- {67425, R_00B07C_SPI_SHADER_USER_DATA_PS_19},
- {67452, R_00B080_SPI_SHADER_USER_DATA_PS_20},
- {67479, R_00B084_SPI_SHADER_USER_DATA_PS_21},
- {67506, R_00B088_SPI_SHADER_USER_DATA_PS_22},
- {67533, R_00B08C_SPI_SHADER_USER_DATA_PS_23},
- {67560, R_00B090_SPI_SHADER_USER_DATA_PS_24},
- {67587, R_00B094_SPI_SHADER_USER_DATA_PS_25},
- {67614, R_00B098_SPI_SHADER_USER_DATA_PS_26},
- {67641, R_00B09C_SPI_SHADER_USER_DATA_PS_27},
- {67668, R_00B0A0_SPI_SHADER_USER_DATA_PS_28},
- {67695, R_00B0A4_SPI_SHADER_USER_DATA_PS_29},
- {67722, R_00B0A8_SPI_SHADER_USER_DATA_PS_30},
- {67749, R_00B0AC_SPI_SHADER_USER_DATA_PS_31},
- {67776, R_00B170_SPI_SHADER_USER_DATA_VS_16},
- {67803, R_00B174_SPI_SHADER_USER_DATA_VS_17},
- {67830, R_00B178_SPI_SHADER_USER_DATA_VS_18},
- {67857, R_00B17C_SPI_SHADER_USER_DATA_VS_19},
- {67884, R_00B180_SPI_SHADER_USER_DATA_VS_20},
- {67911, R_00B184_SPI_SHADER_USER_DATA_VS_21},
- {67938, R_00B188_SPI_SHADER_USER_DATA_VS_22},
- {67965, R_00B18C_SPI_SHADER_USER_DATA_VS_23},
- {67992, R_00B190_SPI_SHADER_USER_DATA_VS_24},
- {68019, R_00B194_SPI_SHADER_USER_DATA_VS_25},
- {68046, R_00B198_SPI_SHADER_USER_DATA_VS_26},
- {68073, R_00B19C_SPI_SHADER_USER_DATA_VS_27},
- {68100, R_00B1A0_SPI_SHADER_USER_DATA_VS_28},
- {68127, R_00B1A4_SPI_SHADER_USER_DATA_VS_29},
- {68154, R_00B1A8_SPI_SHADER_USER_DATA_VS_30},
- {68181, R_00B1AC_SPI_SHADER_USER_DATA_VS_31},
- {68208, R_00B1F0_SPI_SHADER_PGM_RSRC2_GS_VS, 9, 2300},
- {68235, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 2, 2309},
- {68259, R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS},
- {68291, R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS},
- {46106, R_00B210_SPI_SHADER_PGM_LO_ES},
- {46127, R_00B214_SPI_SHADER_PGM_HI_ES, 1, 2311},
- {68323, R_00B370_SPI_SHADER_USER_DATA_ES_16},
- {68350, R_00B374_SPI_SHADER_USER_DATA_ES_17},
- {68377, R_00B378_SPI_SHADER_USER_DATA_ES_18},
- {68404, R_00B37C_SPI_SHADER_USER_DATA_ES_19},
- {68431, R_00B380_SPI_SHADER_USER_DATA_ES_20},
- {68458, R_00B384_SPI_SHADER_USER_DATA_ES_21},
- {68485, R_00B388_SPI_SHADER_USER_DATA_ES_22},
- {68512, R_00B38C_SPI_SHADER_USER_DATA_ES_23},
- {68539, R_00B390_SPI_SHADER_USER_DATA_ES_24},
- {68566, R_00B394_SPI_SHADER_USER_DATA_ES_25},
- {68593, R_00B398_SPI_SHADER_USER_DATA_ES_26},
- {68620, R_00B39C_SPI_SHADER_USER_DATA_ES_27},
- {68647, R_00B3A0_SPI_SHADER_USER_DATA_ES_28},
- {68674, R_00B3A4_SPI_SHADER_USER_DATA_ES_29},
- {68701, R_00B3A8_SPI_SHADER_USER_DATA_ES_30},
- {68728, R_00B3AC_SPI_SHADER_USER_DATA_ES_31},
- {68755, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 1, 2312},
- {68779, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS},
- {68811, R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS},
- {47346, R_00B410_SPI_SHADER_PGM_LO_LS},
- {47367, R_00B414_SPI_SHADER_PGM_HI_LS, 1, 2313},
- {47436, R_00B430_SPI_SHADER_USER_DATA_LS_0},
- {47462, R_00B434_SPI_SHADER_USER_DATA_LS_1},
- {47488, R_00B438_SPI_SHADER_USER_DATA_LS_2},
- {47514, R_00B43C_SPI_SHADER_USER_DATA_LS_3},
- {47540, R_00B440_SPI_SHADER_USER_DATA_LS_4},
- {47566, R_00B444_SPI_SHADER_USER_DATA_LS_5},
- {47592, R_00B448_SPI_SHADER_USER_DATA_LS_6},
- {47618, R_00B44C_SPI_SHADER_USER_DATA_LS_7},
- {47644, R_00B450_SPI_SHADER_USER_DATA_LS_8},
- {47670, R_00B454_SPI_SHADER_USER_DATA_LS_9},
- {47696, R_00B458_SPI_SHADER_USER_DATA_LS_10},
- {47723, R_00B45C_SPI_SHADER_USER_DATA_LS_11},
- {47750, R_00B460_SPI_SHADER_USER_DATA_LS_12},
- {47777, R_00B464_SPI_SHADER_USER_DATA_LS_13},
- {47804, R_00B468_SPI_SHADER_USER_DATA_LS_14},
- {47831, R_00B46C_SPI_SHADER_USER_DATA_LS_15},
- {68843, R_00B470_SPI_SHADER_USER_DATA_LS_16},
- {68870, R_00B474_SPI_SHADER_USER_DATA_LS_17},
- {68897, R_00B478_SPI_SHADER_USER_DATA_LS_18},
- {68924, R_00B47C_SPI_SHADER_USER_DATA_LS_19},
- {68951, R_00B480_SPI_SHADER_USER_DATA_LS_20},
- {68978, R_00B484_SPI_SHADER_USER_DATA_LS_21},
- {69005, R_00B488_SPI_SHADER_USER_DATA_LS_22},
- {69032, R_00B48C_SPI_SHADER_USER_DATA_LS_23},
- {69059, R_00B490_SPI_SHADER_USER_DATA_LS_24},
- {69086, R_00B494_SPI_SHADER_USER_DATA_LS_25},
- {69113, R_00B498_SPI_SHADER_USER_DATA_LS_26},
- {69140, R_00B49C_SPI_SHADER_USER_DATA_LS_27},
- {69167, R_00B4A0_SPI_SHADER_USER_DATA_LS_28},
- {69194, R_00B4A4_SPI_SHADER_USER_DATA_LS_29},
- {69221, R_00B4A8_SPI_SHADER_USER_DATA_LS_30},
- {69248, R_00B4AC_SPI_SHADER_USER_DATA_LS_31},
- {69275, R_00B530_SPI_SHADER_USER_DATA_COMMON_0},
- {69305, R_00B534_SPI_SHADER_USER_DATA_COMMON_1},
- {69335, R_00B538_SPI_SHADER_USER_DATA_COMMON_2},
- {69365, R_00B53C_SPI_SHADER_USER_DATA_COMMON_3},
- {69395, R_00B540_SPI_SHADER_USER_DATA_COMMON_4},
- {69425, R_00B544_SPI_SHADER_USER_DATA_COMMON_5},
- {69455, R_00B548_SPI_SHADER_USER_DATA_COMMON_6},
- {69485, R_00B54C_SPI_SHADER_USER_DATA_COMMON_7},
- {69515, R_00B550_SPI_SHADER_USER_DATA_COMMON_8},
- {69545, R_00B554_SPI_SHADER_USER_DATA_COMMON_9},
- {69575, R_00B558_SPI_SHADER_USER_DATA_COMMON_10},
- {69606, R_00B55C_SPI_SHADER_USER_DATA_COMMON_11},
- {69637, R_00B560_SPI_SHADER_USER_DATA_COMMON_12},
- {69668, R_00B564_SPI_SHADER_USER_DATA_COMMON_13},
- {69699, R_00B568_SPI_SHADER_USER_DATA_COMMON_14},
- {69730, R_00B56C_SPI_SHADER_USER_DATA_COMMON_15},
- {69761, R_00B570_SPI_SHADER_USER_DATA_COMMON_16},
- {69792, R_00B574_SPI_SHADER_USER_DATA_COMMON_17},
- {69823, R_00B578_SPI_SHADER_USER_DATA_COMMON_18},
- {69854, R_00B57C_SPI_SHADER_USER_DATA_COMMON_19},
- {69885, R_00B580_SPI_SHADER_USER_DATA_COMMON_20},
- {69916, R_00B584_SPI_SHADER_USER_DATA_COMMON_21},
- {69947, R_00B588_SPI_SHADER_USER_DATA_COMMON_22},
- {69978, R_00B58C_SPI_SHADER_USER_DATA_COMMON_23},
- {70009, R_00B590_SPI_SHADER_USER_DATA_COMMON_24},
- {70040, R_00B594_SPI_SHADER_USER_DATA_COMMON_25},
- {70071, R_00B598_SPI_SHADER_USER_DATA_COMMON_26},
- {70102, R_00B59C_SPI_SHADER_USER_DATA_COMMON_27},
- {70133, R_00B5A0_SPI_SHADER_USER_DATA_COMMON_28},
- {70164, R_00B5A4_SPI_SHADER_USER_DATA_COMMON_29},
- {70195, R_00B5A8_SPI_SHADER_USER_DATA_COMMON_30},
- {70226, R_00B5AC_SPI_SHADER_USER_DATA_COMMON_31},
- {70257, R_00B838_COMPUTE_DISPATCH_PKT_ADDR_LO},
- {70286, R_00B83C_COMPUTE_DISPATCH_PKT_ADDR_HI, 1, 2314},
- {70315, R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO},
- {70348, R_00B844_COMPUTE_DISPATCH_SCRATCH_BASE_HI, 1, 2315},
- {70381, R_034030_CPF_LATENCY_STATS_DATA},
- {70404, R_034034_CPG_LATENCY_STATS_DATA},
- {70427, R_034038_CPC_LATENCY_STATS_DATA},
- {70450, R_036028_CPF_TC_PERF_COUNTER_WINDOW_SELECT, 3, 2316},
- {70484, R_03602C_CPG_TC_PERF_COUNTER_WINDOW_SELECT, 3, 2319},
- {70518, R_036030_CPF_LATENCY_STATS_SELECT, 3, 2322},
- {70543, R_036034_CPG_LATENCY_STATS_SELECT, 3, 2325},
- {70568, R_036038_CPC_LATENCY_STATS_SELECT, 3, 2328},
- {70593, R_028018_DB_HTILE_DATA_BASE_HI, 1, 2331},
- {56008, R_02801C_DB_DEPTH_SIZE, 2, 2332},
- {55908, R_028038_DB_Z_INFO, 13, 2334},
- {55918, R_02803C_DB_STENCIL_INFO, 8, 2347},
- {55934, R_028040_DB_Z_READ_BASE},
- {70615, R_028044_DB_Z_READ_BASE_HI, 1, 2355},
- {55949, R_028048_DB_STENCIL_READ_BASE},
- {70633, R_02804C_DB_STENCIL_READ_BASE_HI, 1, 2356},
- {70657, R_028054_DB_Z_WRITE_BASE_HI, 1, 2357},
- {55986, R_028058_DB_STENCIL_WRITE_BASE},
- {70676, R_02805C_DB_STENCIL_WRITE_BASE_HI, 1, 2358},
- {70701, R_028060_DB_DFSM_CONTROL, 3, 2359},
- {70717, R_028064_DB_RENDER_FILTER, 1, 2362},
- {70734, R_028068_DB_Z_INFO2, 1, 2363},
- {70745, R_02806C_DB_STENCIL_INFO2, 1, 2364},
- {70762, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 4, 2365},
- {70791, R_028360_CP_PERFMON_CNTX_CNTL, 1, 2369},
- {70812, R_0283A0_PA_SC_RIGHT_VERT_GRID, 4, 2370},
- {70834, R_0283A4_PA_SC_LEFT_VERT_GRID, 4, 2374},
- {70855, R_0283A8_PA_SC_HORIZ_GRID, 4, 2378},
- {70872, R_0283AC_PA_SC_FOV_WINDOW_LR, 4, 2382},
- {70892, R_0283B0_PA_SC_FOV_WINDOW_TB, 2, 2386},
- {70912, R_0287A0_CB_MRT0_EPITCH, 1, 2388},
- {70927, R_0287A4_CB_MRT1_EPITCH, 1, 2389},
- {70942, R_0287A8_CB_MRT2_EPITCH, 1, 2390},
- {70957, R_0287AC_CB_MRT3_EPITCH, 1, 2391},
- {70972, R_0287B0_CB_MRT4_EPITCH, 1, 2392},
- {70987, R_0287B4_CB_MRT5_EPITCH, 1, 2393},
- {71002, R_0287B8_CB_MRT6_EPITCH, 1, 2394},
- {71017, R_0287BC_CB_MRT7_EPITCH, 1, 2395},
- {71032, R_028834_PA_CL_OBJPRIM_ID_CNTL, 3, 2396},
- {71054, R_028838_PA_CL_NGG_CNTL, 2, 2399},
- {71069, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 5, 2401},
- {71099, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, 1, 2406},
- {71129, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 4, 2407},
- {71151, R_028A9C_VGT_INDEX_PAYLOAD_CNTL, 1, 2411},
- {71174, R_028B9C_VGT_DMA_EVENT_INITIATOR, 3, 2412},
- {71198, R_028C44_PA_SC_BINNER_CNTL_0, 10, 2415},
- {71218, R_028C48_PA_SC_BINNER_CNTL_1, 2, 2425},
- {71238, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 18, 2427},
- {71276, R_028C50_PA_SC_NGG_MODE_CNTL, 1, 2445},
- {71296, R_028C64_CB_COLOR0_BASE_EXT, 1, 2446},
- {71315, R_028C68_CB_COLOR0_ATTRIB2, 3, 2447},
- {71333, R_028C80_CB_COLOR0_CMASK_BASE_EXT, 1, 2450},
- {71358, R_028C88_CB_COLOR0_FMASK_BASE_EXT, 1, 2451},
- {71383, R_028C98_CB_COLOR0_DCC_BASE_EXT, 1, 2452},
- {71406, R_028CA0_CB_COLOR1_BASE_EXT, 1, 2446},
- {71425, R_028CA4_CB_COLOR1_ATTRIB2},
- {71443, R_028CBC_CB_COLOR1_CMASK_BASE_EXT, 1, 2450},
- {71468, R_028CC4_CB_COLOR1_FMASK_BASE_EXT, 1, 2451},
- {71493, R_028CD4_CB_COLOR1_DCC_BASE_EXT, 1, 2452},
- {71516, R_028CDC_CB_COLOR2_BASE_EXT, 1, 2446},
- {71535, R_028CE0_CB_COLOR2_ATTRIB2},
- {71553, R_028CF8_CB_COLOR2_CMASK_BASE_EXT, 1, 2450},
- {71578, R_028D00_CB_COLOR2_FMASK_BASE_EXT, 1, 2451},
- {71603, R_028D10_CB_COLOR2_DCC_BASE_EXT, 1, 2452},
- {71626, R_028D18_CB_COLOR3_BASE_EXT, 1, 2446},
- {71645, R_028D1C_CB_COLOR3_ATTRIB2},
- {71663, R_028D34_CB_COLOR3_CMASK_BASE_EXT, 1, 2450},
- {71688, R_028D3C_CB_COLOR3_FMASK_BASE_EXT, 1, 2451},
- {71713, R_028D4C_CB_COLOR3_DCC_BASE_EXT, 1, 2452},
- {71736, R_028D54_CB_COLOR4_BASE_EXT, 1, 2446},
- {71755, R_028D58_CB_COLOR4_ATTRIB2},
- {71773, R_028D70_CB_COLOR4_CMASK_BASE_EXT, 1, 2450},
- {71798, R_028D78_CB_COLOR4_FMASK_BASE_EXT, 1, 2451},
- {71823, R_028D88_CB_COLOR4_DCC_BASE_EXT, 1, 2452},
- {71846, R_028D90_CB_COLOR5_BASE_EXT, 1, 2446},
- {71865, R_028D94_CB_COLOR5_ATTRIB2},
- {71883, R_028DAC_CB_COLOR5_CMASK_BASE_EXT, 1, 2450},
- {71908, R_028DB4_CB_COLOR5_FMASK_BASE_EXT, 1, 2451},
- {71933, R_028DC4_CB_COLOR5_DCC_BASE_EXT, 1, 2452},
- {71956, R_028DCC_CB_COLOR6_BASE_EXT, 1, 2446},
- {71975, R_028DD0_CB_COLOR6_ATTRIB2},
- {71993, R_028DE8_CB_COLOR6_CMASK_BASE_EXT, 1, 2450},
- {72018, R_028DF0_CB_COLOR6_FMASK_BASE_EXT, 1, 2451},
- {72043, R_028E00_CB_COLOR6_DCC_BASE_EXT, 1, 2452},
- {72066, R_028E08_CB_COLOR7_BASE_EXT, 1, 2446},
- {72085, R_028E0C_CB_COLOR7_ATTRIB2},
- {72103, R_028E24_CB_COLOR7_CMASK_BASE_EXT, 1, 2450},
- {72128, R_028E2C_CB_COLOR7_FMASK_BASE_EXT, 1, 2451},
- {72153, R_028E3C_CB_COLOR7_DCC_BASE_EXT, 1, 2452},
+ {46558, S_00B12C_SCRATCH_EN(~0u)},
+ {46569, S_00B12C_USER_SGPR(~0u)},
+ {46579, S_00B12C_TRAP_PRESENT(~0u)},
+ {46668, S_00B12C_OC_LDS_EN(~0u)},
+ {46678, S_00B12C_SO_BASE0_EN(~0u)},
+ {46690, S_00B12C_SO_BASE1_EN(~0u)},
+ {46702, S_00B12C_SO_BASE2_EN(~0u)},
+ {46714, S_00B12C_SO_BASE3_EN(~0u)},
+ {46726, S_00B12C_SO_EN(~0u)},
+ {46630, S_00B12C_EXCP_EN(~0u)},
+ {69219, S_00B12C_PC_BASE_EN(~0u)},
+ {46732, S_00B12C_DISPATCH_DRAW_EN(~0u)},
+ {69193, S_00B12C_SKIP_USGPR0(~0u)},
+ {69205, S_00B12C_USER_SGPR_MSB(~0u)},
+ /* 2465 */
+ {46558, S_00B1F0_SCRATCH_EN(~0u)},
+ {46569, S_00B1F0_USER_SGPR(~0u)},
+ {46579, S_00B1F0_TRAP_PRESENT(~0u)},
+ {46630, S_00B1F0_EXCP_EN(~0u)},
+ {46638, S_00B1F0_VGPR_COMP_CNT(~0u)},
+ {46668, S_00B1F0_OC_LDS_EN(~0u)},
+ {46610, S_00B1F0_LDS_SIZE(~0u)},
+ {69193, S_00B1F0_SKIP_USGPR0(~0u)},
+ {69205, S_00B1F0_USER_SGPR_MSB(~0u)},
+ /* 2474 */
+ {46749, S_00B204_GROUP_FIFO_DEPTH(~0u)},
+ {69230, S_00B204_SPI_SHADER_LATE_ALLOC_GS(~0u)},
+ /* 2476 */
+ {46379, S_00B214_MEM_BASE(~0u)},
+ /* 2477 */
+ {43975, S_00B21C_CU_EN(~0u)},
+ {46388, S_00B21C_WAVE_LIMIT(~0u)},
+ {46399, S_00B21C_LOCK_LOW_THRESHOLD(~0u)},
+ {69123, S_00B21C_SIMD_DISABLE(~0u)},
+ /* 2481 */
+ {46418, S_00B228_VGPRS(~0u)},
+ {46424, S_00B228_SGPRS(~0u)},
+ {2676, S_00B228_PRIORITY(~0u)},
+ {46473, S_00B228_FLOAT_MODE(~0u)},
+ {46484, S_00B228_PRIV(~0u)},
+ {46489, S_00B228_DX10_CLAMP(~0u)},
+ {46500, S_00B228_DEBUG_MODE(~0u)},
+ {46511, S_00B228_IEEE_MODE(~0u)},
+ {46652, S_00B228_CU_GROUP_ENABLE(~0u)},
+ {46548, S_00B228_CDBG_USER(~0u)},
+ {69255, S_00B228_GS_VGPR_COMP_CNT(~0u)},
+ {69136, S_00B228_FP16_OVFL(~0u)},
+ /* 2493 */
+ {46558, S_00B22C_SCRATCH_EN(~0u)},
+ {46569, S_00B22C_USER_SGPR(~0u)},
+ {46579, S_00B22C_TRAP_PRESENT(~0u)},
+ {46630, S_00B22C_EXCP_EN(~0u)},
+ {69272, S_00B22C_ES_VGPR_COMP_CNT(~0u)},
+ {46668, S_00B22C_OC_LDS_EN(~0u)},
+ {46610, S_00B22C_LDS_SIZE(~0u)},
+ {69193, S_00B22C_SKIP_USGPR0(~0u)},
+ {69205, S_00B22C_USER_SGPR_MSB(~0u)},
+ /* 2502 */
+ {46749, S_00B404_GROUP_FIFO_DEPTH(~0u)},
+ /* 2503 */
+ {46379, S_00B414_MEM_BASE(~0u)},
+ /* 2504 */
+ {46388, S_00B41C_WAVE_LIMIT(~0u)},
+ {46399, S_00B41C_LOCK_LOW_THRESHOLD(~0u)},
+ {69123, S_00B41C_SIMD_DISABLE(~0u)},
+ {43975, S_00B41C_CU_EN(~0u)},
+ /* 2508 */
+ {46418, S_00B428_VGPRS(~0u)},
+ {46424, S_00B428_SGPRS(~0u)},
+ {2676, S_00B428_PRIORITY(~0u)},
+ {46473, S_00B428_FLOAT_MODE(~0u)},
+ {46484, S_00B428_PRIV(~0u)},
+ {46489, S_00B428_DX10_CLAMP(~0u)},
+ {46500, S_00B428_DEBUG_MODE(~0u)},
+ {46511, S_00B428_IEEE_MODE(~0u)},
+ {46548, S_00B428_CDBG_USER(~0u)},
+ {69289, S_00B428_LS_VGPR_COMP_CNT(~0u)},
+ {69136, S_00B428_FP16_OVFL(~0u)},
+ /* 2519 */
+ {46558, S_00B42C_SCRATCH_EN(~0u)},
+ {46569, S_00B42C_USER_SGPR(~0u)},
+ {46579, S_00B42C_TRAP_PRESENT(~0u)},
+ {46630, S_00B42C_EXCP_EN(~0u)},
+ {46610, S_00B42C_LDS_SIZE(~0u)},
+ {69193, S_00B42C_SKIP_USGPR0(~0u)},
+ {69205, S_00B42C_USER_SGPR_MSB(~0u)},
+ /* 2526 */
+ {46792, S_00B800_COMPUTE_SHADER_EN(~0u)},
+ {46810, S_00B800_PARTIAL_TG_EN(~0u)},
+ {46824, S_00B800_FORCE_START_AT_000(~0u)},
+ {46843, S_00B800_ORDERED_APPEND_ENBL(~0u)},
+ {46863, S_00B800_ORDERED_APPEND_MODE(~0u)},
+ {46883, S_00B800_USE_THREAD_DIMENSIONS(~0u)},
+ {46905, S_00B800_ORDER_MODE(~0u)},
+ {46936, S_00B800_SCALAR_L1_INV_VOL(~0u)},
+ {46954, S_00B800_VECTOR_L1_INV_VOL(~0u)},
+ {8050, S_00B800_RESERVED(~0u)},
+ {46981, S_00B800_RESTORE(~0u)},
+ /* 2537 */
+ {46418, S_00B848_VGPRS(~0u)},
+ {46424, S_00B848_SGPRS(~0u)},
+ {2676, S_00B848_PRIORITY(~0u)},
+ {46473, S_00B848_FLOAT_MODE(~0u)},
+ {46484, S_00B848_PRIV(~0u)},
+ {46489, S_00B848_DX10_CLAMP(~0u)},
+ {46500, S_00B848_DEBUG_MODE(~0u)},
+ {46511, S_00B848_IEEE_MODE(~0u)},
+ {47033, S_00B848_BULKY(~0u)},
+ {46548, S_00B848_CDBG_USER(~0u)},
+ {69136, S_00B848_FP16_OVFL(~0u)},
+ /* 2548 */
+ {46558, S_00B84C_SCRATCH_EN(~0u)},
+ {46569, S_00B84C_USER_SGPR(~0u)},
+ {46579, S_00B84C_TRAP_PRESENT(~0u)},
+ {47039, S_00B84C_TGID_X_EN(~0u)},
+ {47049, S_00B84C_TGID_Y_EN(~0u)},
+ {47059, S_00B84C_TGID_Z_EN(~0u)},
+ {46766, S_00B84C_TG_SIZE_EN(~0u)},
+ {47069, S_00B84C_TIDIG_COMP_CNT(~0u)},
+ {47084, S_00B84C_EXCP_EN_MSB(~0u)},
+ {46610, S_00B84C_LDS_SIZE(~0u)},
+ {46630, S_00B84C_EXCP_EN(~0u)},
+ {69193, S_00B84C_SKIP_USGPR0(~0u)},
+ /* 2560 */
+ {47112, S_00B854_WAVES_PER_SH(~0u)},
+ {47125, S_00B854_TG_PER_CU(~0u)},
+ {47135, S_00B854_LOCK_THRESHOLD(~0u)},
+ {47150, S_00B854_SIMD_DEST_CNTL(~0u)},
+ {47165, S_00B854_FORCE_SIMD_DIST(~0u)},
+ {47181, S_00B854_CU_GROUP_COUNT(~0u)},
+ {69123, S_00B854_SIMD_DISABLE(~0u)},
+ /* 2567 */
+ {69306, S_036008_CNTR_SEL0(~0u)},
+ {69316, S_036008_CNTR_SEL1(~0u)},
+ {48114, S_036008_SPM_MODE(~0u)},
+ {69326, S_036008_CNTR_MODE1(~0u)},
+ {69337, S_036008_CNTR_MODE0(~0u)},
+ /* 2572 */
+ {69348, S_036004_CNTR_SEL2(~0u)},
+ {69358, S_036004_CNTR_SEL3(~0u)},
+ {69368, S_036004_CNTR_MODE3(~0u)},
+ {69379, S_036004_CNTR_MODE2(~0u)},
+ /* 2576 */
+ {69306, S_036024_CNTR_SEL0(~0u)},
+ {69316, S_036024_CNTR_SEL1(~0u)},
+ {48114, S_036024_SPM_MODE(~0u)},
+ {69326, S_036024_CNTR_MODE1(~0u)},
+ {69337, S_036024_CNTR_MODE0(~0u)},
+ /* 2581 */
+ {69348, S_036010_CNTR_SEL2(~0u)},
+ {69358, S_036010_CNTR_SEL3(~0u)},
+ {69368, S_036010_CNTR_MODE3(~0u)},
+ {69379, S_036010_CNTR_MODE2(~0u)},
+ /* 2585 */
+ {69306, S_03601C_CNTR_SEL0(~0u)},
+ {69316, S_03601C_CNTR_SEL1(~0u)},
+ {48114, S_03601C_SPM_MODE(~0u)},
+ {69326, S_03601C_CNTR_MODE1(~0u)},
+ {69337, S_03601C_CNTR_MODE0(~0u)},
+ /* 2590 */
+ {69348, S_036018_CNTR_SEL2(~0u)},
+ {69358, S_036018_CNTR_SEL3(~0u)},
+ {69368, S_036018_CNTR_MODE3(~0u)},
+ {69379, S_036018_CNTR_MODE2(~0u)},
+ /* 2594 */
+ {441, S_036028_INDEX(~0u)},
+ {43218, S_036028_ALWAYS(~0u)},
+ {7643, S_036028_ENABLE(~0u)},
+ /* 2597 */
+ {441, S_03602C_INDEX(~0u)},
+ {43218, S_03602C_ALWAYS(~0u)},
+ {7643, S_03602C_ENABLE(~0u)},
+ /* 2600 */
+ {441, S_036030_INDEX(~0u)},
+ {15389, S_036030_CLEAR(~0u)},
+ {7643, S_036030_ENABLE(~0u)},
+ /* 2603 */
+ {441, S_036034_INDEX(~0u)},
+ {15389, S_036034_CLEAR(~0u)},
+ {7643, S_036034_ENABLE(~0u)},
+ /* 2606 */
+ {441, S_036038_INDEX(~0u)},
+ {15389, S_036038_CLEAR(~0u)},
+ {7643, S_036038_ENABLE(~0u)},
+ /* 2609 */
+ {47300, S_036100_PERF_SEL(~0u)},
+ {47470, S_036100_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_036100_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47524, S_036100_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47551, S_036100_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_036100_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_036100_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_036100_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_036100_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47682, S_036100_GRBM_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_036100_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_036100_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47762, S_036100_CP_BUSY_USER_DEFINED_MASK(~0u)},
+ {47788, S_036100_IA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47814, S_036100_GDS_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_036100_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47868, S_036100_RLC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47895, S_036100_TC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47921, S_036100_WD_BUSY_USER_DEFINED_MASK(~0u)},
+ {69390, S_036100_UTCL2_BUSY_USER_DEFINED_MASK(~0u)},
+ {69419, S_036100_EA_BUSY_USER_DEFINED_MASK(~0u)},
+ {69445, S_036100_RMI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 2631 */
+ {47300, S_036108_PERF_SEL(~0u)},
+ {47470, S_036108_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_036108_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47551, S_036108_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_036108_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_036108_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_036108_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_036108_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_036108_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47524, S_036108_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_036108_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_036108_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ {69445, S_036108_RMI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 2644 */
+ {47300, S_03610C_PERF_SEL(~0u)},
+ {47470, S_03610C_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_03610C_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47551, S_03610C_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_03610C_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_03610C_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_03610C_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_03610C_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_03610C_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47524, S_03610C_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_03610C_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_03610C_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ {69445, S_03610C_RMI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 2657 */
+ {47300, S_036110_PERF_SEL(~0u)},
+ {47470, S_036110_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_036110_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47551, S_036110_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_036110_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_036110_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_036110_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_036110_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_036110_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47524, S_036110_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_036110_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_036110_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ {69445, S_036110_RMI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 2670 */
+ {47300, S_036114_PERF_SEL(~0u)},
+ {47470, S_036114_DB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47497, S_036114_CB_CLEAN_USER_DEFINED_MASK(~0u)},
+ {47551, S_036114_TA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47577, S_036114_SX_BUSY_USER_DEFINED_MASK(~0u)},
+ {47603, S_036114_SPI_BUSY_USER_DEFINED_MASK(~0u)},
+ {47630, S_036114_SC_BUSY_USER_DEFINED_MASK(~0u)},
+ {47710, S_036114_DB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47736, S_036114_CB_BUSY_USER_DEFINED_MASK(~0u)},
+ {47524, S_036114_VGT_BUSY_USER_DEFINED_MASK(~0u)},
+ {47656, S_036114_PA_BUSY_USER_DEFINED_MASK(~0u)},
+ {47841, S_036114_BCI_BUSY_USER_DEFINED_MASK(~0u)},
+ {69445, S_036114_RMI_BUSY_USER_DEFINED_MASK(~0u)},
+ /* 2683 */
+ {47300, S_036600_PERF_SEL(~0u)},
+ {47309, S_036600_PERF_SEL1(~0u)},
+ {47319, S_036600_CNTR_MODE(~0u)},
+ {47957, S_036600_PERF_MODE1(~0u)},
+ {47947, S_036600_PERF_MODE(~0u)},
+ /* 2688 */
+ {47329, S_036610_PERF_SEL2(~0u)},
+ {47339, S_036610_PERF_SEL3(~0u)},
+ {47968, S_036610_PERF_MODE3(~0u)},
+ {47979, S_036610_PERF_MODE2(~0u)},
+ /* 2692 */
+ {48842, S_028008_SLICE_START(~0u)},
+ {48854, S_028008_SLICE_MAX(~0u)},
+ {48864, S_028008_Z_READ_ONLY(~0u)},
+ {48876, S_028008_STENCIL_READ_ONLY(~0u)},
+ {69472, S_028008_MIPID(~0u)},
+ /* 2697 */
+ {49493, S_028010_PARTIAL_SQUAD_LAUNCH_CONTROL(~0u), 4, 678},
+ {49522, S_028010_PARTIAL_SQUAD_LAUNCH_COUNTDOWN(~0u)},
+ {49553, S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(~0u)},
+ {49589, S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(~0u)},
+ {49624, S_028010_DISABLE_COLOR_ON_VALIDATION(~0u)},
+ {49652, S_028010_DECOMPRESS_Z_ON_FLUSH(~0u)},
+ {49674, S_028010_DISABLE_REG_SNOOP(~0u)},
+ {49692, S_028010_DEPTH_BOUNDS_HIER_DEPTH_DISABLE(~0u)},
+ {49724, S_028010_SEPARATE_HIZS_FUNC_ENABLE(~0u)},
+ {49750, S_028010_HIZ_ZFUNC(~0u)},
+ {49760, S_028010_HIS_SFUNC_FF(~0u)},
+ {49773, S_028010_HIS_SFUNC_BF(~0u)},
+ {49786, S_028010_PRESERVE_ZRANGE(~0u)},
+ {49802, S_028010_PRESERVE_SRESULTS(~0u)},
+ {49820, S_028010_DISABLE_FAST_PASS(~0u)},
+ {69478, S_028010_ALLOW_PARTIAL_RES_HIER_KILL(~0u)},
+ /* 2713 */
+ {880, S_028018_BASE_HI(~0u)},
+ /* 2714 */
+ {69506, S_02801C_X_MAX(~0u)},
+ {69512, S_02801C_Y_MAX(~0u)},
+ /* 2716 */
+ {21132, S_028038_FORMAT(~0u), 4, 707},
+ {50010, S_028038_NUM_SAMPLES(~0u)},
+ {68503, S_028038_SW_MODE(~0u)},
+ {69518, S_028038_PARTIALLY_RESIDENT(~0u)},
+ {69537, S_028038_FAULT_BEHAVIOR(~0u)},
+ {69552, S_028038_ITERATE_FLUSH(~0u)},
+ {69566, S_028038_MAXMIP(~0u)},
+ {50038, S_028038_DECOMPRESS_ON_N_ZPLANES(~0u)},
+ {50062, S_028038_ALLOW_EXPCLEAR(~0u)},
+ {50077, S_028038_READ_SIZE(~0u)},
+ {50087, S_028038_TILE_SURFACE_ENABLE(~0u)},
+ {50107, S_028038_CLEAR_DISALLOWED(~0u)},
+ {50124, S_028038_ZRANGE_PRECISION(~0u)},
+ /* 2729 */
+ {21132, S_02803C_FORMAT(~0u), 2, 711},
+ {68503, S_02803C_SW_MODE(~0u)},
+ {69518, S_02803C_PARTIALLY_RESIDENT(~0u)},
+ {69537, S_02803C_FAULT_BEHAVIOR(~0u)},
+ {69552, S_02803C_ITERATE_FLUSH(~0u)},
+ {50062, S_02803C_ALLOW_EXPCLEAR(~0u)},
+ {50167, S_02803C_TILE_STENCIL_DISABLE(~0u)},
+ {50107, S_02803C_CLEAR_DISALLOWED(~0u)},
+ /* 2737 */
+ {880, S_028044_BASE_HI(~0u)},
+ /* 2738 */
+ {880, S_02804C_BASE_HI(~0u)},
+ /* 2739 */
+ {880, S_028054_BASE_HI(~0u)},
+ /* 2740 */
+ {880, S_02805C_BASE_HI(~0u)},
+ /* 2741 */
+ {69582, S_028060_PUNCHOUT_MODE(~0u), 4, 1507},
+ {69596, S_028060_POPS_DRAIN_PS_ON_OVERLAP(~0u)},
+ {69621, S_028060_DISALLOW_OVERFLOW(~0u)},
+ /* 2744 */
+ {69639, S_028064_PS_INVOKE_MASK(~0u)},
+ /* 2745 */
+ {30484, S_028068_EPITCH(~0u)},
+ /* 2746 */
+ {30484, S_02806C_EPITCH(~0u)},
+ /* 2747 */
+ {69654, S_0281E8_DEST_BASE_HI_256B(~0u)},
+ /* 2748 */
+ {69654, S_0281EC_DEST_BASE_HI_256B(~0u)},
+ /* 2749 */
+ {69654, S_0281F0_DEST_BASE_HI_256B(~0u)},
+ /* 2750 */
+ {69654, S_0281F4_DEST_BASE_HI_256B(~0u)},
+ /* 2751 */
+ {50727, S_028350_RB_MAP_PKR0(~0u), 4, 713},
+ {50739, S_028350_RB_MAP_PKR1(~0u), 4, 713},
+ {50851, S_028350_RB_XSEL2(~0u), 4, 717},
+ {50860, S_028350_RB_XSEL(~0u)},
+ {50868, S_028350_RB_YSEL(~0u)},
+ {50972, S_028350_PKR_MAP(~0u), 4, 721},
+ {51080, S_028350_PKR_XSEL(~0u), 4, 725},
+ {51189, S_028350_PKR_YSEL(~0u), 4, 729},
+ {51302, S_028350_PKR_XSEL2(~0u), 4, 733},
+ {51404, S_028350_SC_MAP(~0u), 4, 737},
+ {51550, S_028350_SC_XSEL(~0u), 4, 741},
+ {51697, S_028350_SC_YSEL(~0u), 4, 745},
+ {51797, S_028350_SE_MAP(~0u), 4, 749},
+ {69672, S_028350_SE_XSEL_GFX9(~0u)},
+ {69685, S_028350_SE_YSEL_GFX9(~0u)},
+ /* 2766 */
+ {52220, S_028354_SE_PAIR_MAP(~0u), 4, 761},
+ {69698, S_028354_SE_PAIR_XSEL_GFX9(~0u)},
+ {69716, S_028354_SE_PAIR_YSEL_GFX9(~0u)},
+ /* 2769 */
+ {7643, S_02835C_ENABLE(~0u)},
+ {69734, S_02835C_NUM_SE(~0u)},
+ {69099, S_02835C_NUM_RB_PER_SE(~0u)},
+ {69741, S_02835C_DISABLE_SRBSL_DB_OPTIMIZED_PACKING(~0u)},
+ /* 2773 */
+ {69776, S_028360_PERFMON_ENABLE(~0u)},
+ /* 2774 */
+ {69791, S_0283A0_LEFT_QTR(~0u)},
+ {69800, S_0283A0_LEFT_HALF(~0u)},
+ {69810, S_0283A0_RIGHT_HALF(~0u)},
+ {69821, S_0283A0_RIGHT_QTR(~0u)},
+ /* 2778 */
+ {69791, S_0283A4_LEFT_QTR(~0u)},
+ {69800, S_0283A4_LEFT_HALF(~0u)},
+ {69810, S_0283A4_RIGHT_HALF(~0u)},
+ {69821, S_0283A4_RIGHT_QTR(~0u)},
+ /* 2782 */
+ {69831, S_0283A8_TOP_QTR(~0u)},
+ {69839, S_0283A8_TOP_HALF(~0u)},
+ {69848, S_0283A8_BOT_HALF(~0u)},
+ {69857, S_0283A8_BOT_QTR(~0u)},
+ /* 2786 */
+ {69865, S_0283AC_LEFT_EYE_FOV_LEFT(~0u)},
+ {69883, S_0283AC_LEFT_EYE_FOV_RIGHT(~0u)},
+ {69902, S_0283AC_RIGHT_EYE_FOV_LEFT(~0u)},
+ {69921, S_0283AC_RIGHT_EYE_FOV_RIGHT(~0u)},
+ /* 2790 */
+ {69941, S_0283B0_FOV_TOP(~0u)},
+ {69949, S_0283B0_FOV_BOT(~0u)},
+ /* 2792 */
+ {53855, S_0286D8_NUM_INTERP(~0u)},
+ {53866, S_0286D8_PARAM_GEN(~0u)},
+ {69957, S_0286D8_OFFCHIP_PARAM_EN(~0u)},
+ {69974, S_0286D8_LATE_PC_DEALLOC(~0u)},
+ {53885, S_0286D8_BC_OPTIMIZE_DISABLE(~0u)},
+ /* 2797 */
+ {30484, S_0287A0_EPITCH(~0u)},
+ /* 2798 */
+ {30484, S_0287A4_EPITCH(~0u)},
+ /* 2799 */
+ {30484, S_0287A8_EPITCH(~0u)},
+ /* 2800 */
+ {30484, S_0287AC_EPITCH(~0u)},
+ /* 2801 */
+ {30484, S_0287B0_EPITCH(~0u)},
+ /* 2802 */
+ {30484, S_0287B4_EPITCH(~0u)},
+ /* 2803 */
+ {30484, S_0287B8_EPITCH(~0u)},
+ /* 2804 */
+ {30484, S_0287BC_EPITCH(~0u)},
+ /* 2805 */
+ {69990, S_0287E4_BASE_ADDR_GFX9(~0u)},
+ /* 2806 */
+ {56739, S_0287F0_SOURCE_SELECT(~0u), 4, 880},
+ {56785, S_0287F0_MAJOR_MODE(~0u), 2, 884},
+ {70005, S_0287F0_SPRITE_EN_R6XX(~0u)},
+ {56796, S_0287F0_NOT_EOP(~0u)},
+ {56804, S_0287F0_USE_OPAQUE(~0u)},
+ {70020, S_0287F0_UNROLLED_INST(~0u)},
+ {70034, S_0287F0_GRBM_SKEW_NO_DEC(~0u)},
+ {70051, S_0287F0_REG_RT_INDEX(~0u)},
+ /* 2814 */
+ {57775, S_02880C_Z_EXPORT_ENABLE(~0u)},
+ {57791, S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(~0u)},
+ {57822, S_02880C_STENCIL_OP_VAL_EXPORT_ENABLE(~0u)},
+ {49012, S_02880C_Z_ORDER(~0u), 4, 1165},
+ {57901, S_02880C_KILL_ENABLE(~0u)},
+ {57913, S_02880C_COVERAGE_TO_MASK_ENABLE(~0u)},
+ {57937, S_02880C_MASK_EXPORT_ENABLE(~0u)},
+ {57956, S_02880C_EXEC_ON_HIER_FAIL(~0u)},
+ {57974, S_02880C_EXEC_ON_NOOP(~0u)},
+ {57987, S_02880C_ALPHA_TO_MASK_DISABLE(~0u)},
+ {58009, S_02880C_DEPTH_BEFORE_SHADER(~0u)},
+ {58099, S_02880C_CONSERVATIVE_Z_EXPORT(~0u), 4, 1169},
+ {58121, S_02880C_DUAL_QUAD_DISABLE(~0u)},
+ {70064, S_02880C_PRIMITIVE_ORDERED_PIXEL_SHADER(~0u)},
+ {70095, S_02880C_EXEC_IF_OVERLAPPED(~0u)},
+ {70114, S_02880C_POPS_OVERLAP_NUM_SAMPLES(~0u)},
+ /* 2830 */
+ {58443, S_028814_CULL_FRONT(~0u)},
+ {58454, S_028814_CULL_BACK(~0u)},
+ {22820, S_028814_FACE(~0u)},
+ {58474, S_028814_POLY_MODE(~0u), 2, 1173},
+ {58540, S_028814_POLYMODE_FRONT_PTYPE(~0u), 3, 1175},
+ {58561, S_028814_POLYMODE_BACK_PTYPE(~0u), 3, 1175},
+ {58581, S_028814_POLY_OFFSET_FRONT_ENABLE(~0u)},
+ {58606, S_028814_POLY_OFFSET_BACK_ENABLE(~0u)},
+ {58630, S_028814_POLY_OFFSET_PARA_ENABLE(~0u)},
+ {58654, S_028814_VTX_WINDOW_OFFSET_ENABLE(~0u)},
+ {58679, S_028814_PROVOKING_VTX_LAST(~0u)},
+ {58698, S_028814_PERSP_CORR_DIS(~0u)},
+ {58713, S_028814_MULTI_PRIM_IB_ENA(~0u)},
+ {70139, S_028814_RIGHT_TRIANGLE_ALTERNATE_GRADIENT_REF(~0u)},
+ {70177, S_028814_NEW_QUAD_DECOMPOSITION(~0u)},
+ /* 2845 */
+ {58731, S_028818_VPORT_X_SCALE_ENA(~0u)},
+ {58749, S_028818_VPORT_X_OFFSET_ENA(~0u)},
+ {58768, S_028818_VPORT_Y_SCALE_ENA(~0u)},
+ {58786, S_028818_VPORT_Y_OFFSET_ENA(~0u)},
+ {58805, S_028818_VPORT_Z_SCALE_ENA(~0u)},
+ {58823, S_028818_VPORT_Z_OFFSET_ENA(~0u)},
+ {58842, S_028818_VTX_XY_FMT(~0u)},
+ {58853, S_028818_VTX_Z_FMT(~0u)},
+ {58863, S_028818_VTX_W0_FMT(~0u)},
+ {70200, S_028818_PERFCOUNTER_REF(~0u)},
+ /* 2855 */
+ {58874, S_02881C_CLIP_DIST_ENA_0(~0u)},
+ {58890, S_02881C_CLIP_DIST_ENA_1(~0u)},
+ {58906, S_02881C_CLIP_DIST_ENA_2(~0u)},
+ {58922, S_02881C_CLIP_DIST_ENA_3(~0u)},
+ {58938, S_02881C_CLIP_DIST_ENA_4(~0u)},
+ {58954, S_02881C_CLIP_DIST_ENA_5(~0u)},
+ {58970, S_02881C_CLIP_DIST_ENA_6(~0u)},
+ {58986, S_02881C_CLIP_DIST_ENA_7(~0u)},
+ {59002, S_02881C_CULL_DIST_ENA_0(~0u)},
+ {59018, S_02881C_CULL_DIST_ENA_1(~0u)},
+ {59034, S_02881C_CULL_DIST_ENA_2(~0u)},
+ {59050, S_02881C_CULL_DIST_ENA_3(~0u)},
+ {59066, S_02881C_CULL_DIST_ENA_4(~0u)},
+ {59082, S_02881C_CULL_DIST_ENA_5(~0u)},
+ {59098, S_02881C_CULL_DIST_ENA_6(~0u)},
+ {59114, S_02881C_CULL_DIST_ENA_7(~0u)},
+ {59130, S_02881C_USE_VTX_POINT_SIZE(~0u)},
+ {59149, S_02881C_USE_VTX_EDGE_FLAG(~0u)},
+ {59167, S_02881C_USE_VTX_RENDER_TARGET_INDX(~0u)},
+ {59194, S_02881C_USE_VTX_VIEWPORT_INDX(~0u)},
+ {59216, S_02881C_USE_VTX_KILL_FLAG(~0u)},
+ {59234, S_02881C_VS_OUT_MISC_VEC_ENA(~0u)},
+ {59254, S_02881C_VS_OUT_CCDIST0_VEC_ENA(~0u)},
+ {59277, S_02881C_VS_OUT_CCDIST1_VEC_ENA(~0u)},
+ {59300, S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(~0u)},
+ {59325, S_02881C_USE_VTX_GS_CUT_FLAG(~0u)},
+ {59345, S_02881C_USE_VTX_LINE_WIDTH(~0u)},
+ {70216, S_02881C_USE_VTX_SHD_OBJPRIM_ID(~0u)},
+ /* 2883 */
+ {59951, S_028830_SMALL_PRIM_FILTER_ENABLE(~0u)},
+ {59723, S_028830_TRIANGLE_FILTER_DISABLE(~0u)},
+ {59747, S_028830_LINE_FILTER_DISABLE(~0u)},
+ {59767, S_028830_POINT_FILTER_DISABLE(~0u)},
+ {59788, S_028830_RECTANGLE_FILTER_DISABLE(~0u)},
+ {70239, S_028830_SRBSL_ENABLE(~0u)},
+ /* 2889 */
+ {70252, S_028834_OBJ_ID_SEL(~0u)},
+ {70263, S_028834_ADD_PIPED_PRIM_ID(~0u)},
+ {70281, S_028834_EN_32BIT_OBJPRIMID(~0u)},
+ /* 2892 */
+ {70300, S_028838_VERTEX_REUSE_OFF(~0u)},
+ {70317, S_028838_INDEX_BUF_EDGE_FLAG_ENA(~0u)},
+ /* 2894 */
+ {70341, S_02883C_DISCARD_0_AREA_TRIANGLES(~0u)},
+ {70366, S_02883C_DISCARD_0_AREA_LINES(~0u)},
+ {70387, S_02883C_DISCARD_0_AREA_POINTS(~0u)},
+ {70409, S_02883C_DISCARD_0_AREA_RECTANGLES(~0u)},
+ {70435, S_02883C_USE_PROVOKING_ZW(~0u)},
+ /* 2899 */
+ {22279, S_028A40_MODE(~0u), 6, 1215},
+ {39383, S_028A40_RESERVED_0(~0u)},
+ {60992, S_028A40_CUT_MODE(~0u), 4, 1221},
+ {39401, S_028A40_RESERVED_1(~0u)},
+ {61001, S_028A40_GS_C_PACK_EN(~0u)},
+ {61014, S_028A40_RESERVED_2(~0u)},
+ {61025, S_028A40_ES_PASSTHRU(~0u)},
+ {70452, S_028A40_RESERVED_3(~0u)},
+ {70463, S_028A40_RESERVED_4(~0u)},
+ {70474, S_028A40_RESERVED_5(~0u)},
+ {61084, S_028A40_PARTIAL_THD_AT_EOI(~0u)},
+ {61103, S_028A40_SUPPRESS_CUTS(~0u)},
+ {61117, S_028A40_ES_WRITE_OPTIMIZE(~0u)},
+ {61135, S_028A40_GS_WRITE_OPTIMIZE(~0u)},
+ {61186, S_028A40_ONCHIP(~0u), 4, 1225},
+ /* 2914 */
+ {61193, S_028A44_ES_VERTS_PER_SUBGRP(~0u)},
+ {61213, S_028A44_GS_PRIMS_PER_SUBGRP(~0u)},
+ {70485, S_028A44_GS_INST_PRIMS_IN_SUBGRP(~0u)},
+ /* 2917 */
+ {61233, S_028A48_MSAA_ENABLE(~0u)},
+ {61245, S_028A48_VPORT_SCISSOR_ENABLE(~0u)},
+ {61266, S_028A48_LINE_STIPPLE_ENABLE(~0u)},
+ {61286, S_028A48_SEND_UNLIT_STILES_TO_PKR(~0u)},
+ {70509, S_028A48_SCALE_LINE_WIDTH_PAD(~0u)},
+ {70530, S_028A48_ALTERNATE_RBS_PER_TILE(~0u)},
+ {70553, S_028A48_COARSE_TILE_STARTS_ON_EVEN_RB(~0u)},
+ /* 2924 */
+ {203, S_028A7C_INDEX_TYPE(~0u), 3, 1232},
+ {62070, S_028A7C_SWAP_MODE(~0u), 4, 1235},
+ {62131, S_028A7C_BUF_TYPE(~0u), 3, 1239},
+ {62190, S_028A7C_RDREQ_POLICY(~0u)},
+ {66942, S_028A7C_PRIMGEN_EN(~0u)},
+ {56796, S_028A7C_NOT_EOP(~0u)},
+ {62203, S_028A7C_REQ_PATH(~0u)},
+ /* 2931 */
+ {22558, S_028A84_PRIMITIVEID_EN(~0u)},
+ {62212, S_028A84_DISABLE_RESET_ON_EOI(~0u)},
+ {70583, S_028A84_NGG_DISABLE_PROVOK_REUSE(~0u)},
+ /* 2934 */
+ {63168, S_028A90_EVENT_TYPE(~0u), 63, 1511},
+ {70708, S_028A90_ADDRESS_HI_GFX9(~0u)},
+ {63195, S_028A90_EXTENDED_EVENT(~0u)},
+ /* 2937 */
+ {30670, S_028A94_MAX_PRIMS_PER_SUBGROUP(~0u)},
+ /* 2938 */
+ {70724, S_028A98_OBJPRIM_ID_EN(~0u)},
+ {70738, S_028A98_EN_REG_RT_INDEX(~0u)},
+ {70754, S_028A98_EN_PIPELINE_PRIMID(~0u)},
+ {70773, S_028A98_OBJECT_ID_INST_EN(~0u)},
+ /* 2942 */
+ {70791, S_028A9C_COMPOUND_INDEX_EN(~0u)},
+ /* 2943 */
+ {63328, S_028ABC_FULL_CACHE(~0u)},
+ {63339, S_028ABC_HTILE_USES_PRELOAD_WIN(~0u)},
+ {63362, S_028ABC_PRELOAD(~0u)},
+ {63370, S_028ABC_PREFETCH_WIDTH(~0u)},
+ {63385, S_028ABC_PREFETCH_HEIGHT(~0u)},
+ {63401, S_028ABC_DST_OUTSIDE_ZERO_TO_ONE(~0u)},
+ {68676, S_028ABC_PIPE_ALIGNED(~0u)},
+ {68694, S_028ABC_RB_ALIGNED(~0u)},
+ /* 2951 */
+ {48157, S_028B54_LS_EN(~0u), 3, 1303},
+ {48151, S_028B54_HS_EN(~0u)},
+ {23383, S_028B54_ES_EN(~0u), 3, 1306},
+ {48145, S_028B54_GS_EN(~0u)},
+ {48139, S_028B54_VS_EN(~0u), 3, 1309},
+ {46732, S_028B54_DISPATCH_DRAW_EN(~0u)},
+ {63723, S_028B54_DIS_DEALLOC_ACCUM_0(~0u)},
+ {63743, S_028B54_DIS_DEALLOC_ACCUM_1(~0u)},
+ {63763, S_028B54_VS_WAVE_ID_EN(~0u)},
+ {66942, S_028B54_PRIMGEN_EN(~0u)},
+ {70809, S_028B54_ORDERED_ID_MODE(~0u)},
+ {63308, S_028B54_MAX_PRIMGRP_IN_WAVE(~0u)},
+ {70825, S_028B54_GS_FAST_LAUNCH(~0u)},
+ /* 2964 */
+ {209, S_028B6C_TYPE(~0u), 3, 1312},
+ {63911, S_028B6C_PARTITIONING(~0u), 4, 1315},
+ {63988, S_028B6C_TOPOLOGY(~0u), 4, 1319},
+ {63997, S_028B6C_RESERVED_REDUC_AXIS(~0u)},
+ {64017, S_028B6C_DEPRECATED(~0u)},
+ {64050, S_028B6C_DISABLE_DONUTS(~0u)},
+ {62190, S_028B6C_RDREQ_POLICY(~0u)},
+ {64189, S_028B6C_DISTRIBUTION_MODE(~0u), 4, 1326},
+ /* 2972 */
+ {64385, S_028B94_STREAMOUT_0_EN(~0u)},
+ {64400, S_028B94_STREAMOUT_1_EN(~0u)},
+ {64415, S_028B94_STREAMOUT_2_EN(~0u)},
+ {64430, S_028B94_STREAMOUT_3_EN(~0u)},
+ {64445, S_028B94_RAST_STREAM(~0u)},
+ {70840, S_028B94_EN_PRIMS_NEEDED_CNT(~0u)},
+ {64457, S_028B94_RAST_STREAM_MASK(~0u)},
+ {64474, S_028B94_USE_RAST_STREAM_MASK(~0u)},
+ /* 2980 */
+ {63168, S_028B9C_EVENT_TYPE(~0u)},
+ {39318, S_028B9C_ADDRESS_HI(~0u)},
+ {63195, S_028B9C_EXTENDED_EVENT(~0u)},
+ /* 2983 */
+ {64829, S_028BE0_MSAA_NUM_SAMPLES(~0u)},
+ {64846, S_028BE0_AA_MASK_CENTROID_DTMN(~0u)},
+ {64868, S_028BE0_MAX_SAMPLE_DIST(~0u)},
+ {64884, S_028BE0_MSAA_EXPOSED_SAMPLES(~0u)},
+ {64905, S_028BE0_DETAIL_TO_EXPOSED_MODE(~0u)},
+ {70860, S_028BE0_COVERAGE_TO_SHADER_SELECT(~0u)},
+ /* 2989 */
+ {65440, S_028C40_REALIGN_DQUADS_AFTER_N_WAVES(~0u)},
+ {69146, S_028C40_LOAD_COLLISION_WAVEID(~0u)},
+ {69168, S_028C40_LOAD_INTRAWAVE_COLLISION(~0u)},
+ /* 2992 */
+ {70976, S_028C44_BINNING_MODE(~0u), 4, 1574},
+ {70989, S_028C44_BIN_SIZE_X(~0u)},
+ {71000, S_028C44_BIN_SIZE_Y(~0u)},
+ {71011, S_028C44_BIN_SIZE_X_EXTEND(~0u)},
+ {71029, S_028C44_BIN_SIZE_Y_EXTEND(~0u)},
+ {71047, S_028C44_CONTEXT_STATES_PER_BIN(~0u)},
+ {71070, S_028C44_PERSISTENT_STATES_PER_BIN(~0u)},
+ {71096, S_028C44_DISABLE_START_OF_PRIM(~0u)},
+ {71118, S_028C44_FPOVS_PER_BATCH(~0u)},
+ {71134, S_028C44_OPTIMAL_BIN_SELECTION(~0u)},
+ /* 3002 */
+ {71156, S_028C48_MAX_ALLOC_COUNT(~0u)},
+ {71172, S_028C48_MAX_PRIM_PER_BATCH(~0u)},
+ /* 3004 */
+ {71191, S_028C4C_OVER_RAST_ENABLE(~0u)},
+ {71208, S_028C4C_OVER_RAST_SAMPLE_SELECT(~0u)},
+ {71232, S_028C4C_UNDER_RAST_ENABLE(~0u)},
+ {71250, S_028C4C_UNDER_RAST_SAMPLE_SELECT(~0u)},
+ {71275, S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(~0u)},
+ {71305, S_028C4C_ZMM_TRI_EXTENT(~0u)},
+ {71320, S_028C4C_ZMM_TRI_OFFSET(~0u)},
+ {71335, S_028C4C_OVERRIDE_OVER_RAST_INNER_TO_NORMAL(~0u)},
+ {71370, S_028C4C_OVERRIDE_UNDER_RAST_INNER_TO_NORMAL(~0u)},
+ {71406, S_028C4C_DEGENERATE_OVERRIDE_INNER_TO_NORMAL_DISABLE(~0u)},
+ {71450, S_028C4C_UNCERTAINTY_REGION_MODE(~0u)},
+ {71474, S_028C4C_OUTER_UNCERTAINTY_EDGERULE_OVERRIDE(~0u)},
+ {71510, S_028C4C_INNER_UNCERTAINTY_EDGERULE_OVERRIDE(~0u)},
+ {71546, S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(~0u)},
+ {71572, S_028C4C_COVERAGE_AA_MASK_ENABLE(~0u)},
+ {71596, S_028C4C_PREZ_AA_MASK_ENABLE(~0u)},
+ {71616, S_028C4C_POSTZ_AA_MASK_ENABLE(~0u)},
+ {71637, S_028C4C_CENTROID_SAMPLE_OVERRIDE(~0u)},
+ /* 3022 */
+ {71662, S_028C50_MAX_DEALLOCS_IN_WAVE(~0u)},
+ /* 3023 */
+ {71683, S_028C64_BASE_256B(~0u)},
+ /* 3024 */
+ {71693, S_028C68_MIP0_HEIGHT(~0u)},
+ {71705, S_028C68_MIP0_WIDTH(~0u)},
+ {68705, S_028C68_MAX_MIP(~0u)},
+ /* 3027 */
+ {48842, S_028C6C_SLICE_START(~0u)},
+ {48854, S_028C6C_SLICE_MAX(~0u)},
+ {71716, S_028C6C_MIP_LEVEL(~0u)},
+ /* 3030 */
+ {65564, S_028C70_ENDIAN(~0u), 4, 1342},
+ {21132, S_028C70_FORMAT(~0u), 23, 1346},
+ {65931, S_028C70_NUMBER_TYPE(~0u), 8, 1369},
+ {65987, S_028C70_COMP_SWAP(~0u), 4, 1377},
+ {57519, S_028C70_FAST_CLEAR(~0u)},
+ {49428, S_028C70_COMPRESSION(~0u)},
+ {65997, S_028C70_BLEND_CLAMP(~0u)},
+ {66009, S_028C70_BLEND_BYPASS(~0u)},
+ {66022, S_028C70_SIMPLE_FLOAT(~0u)},
+ {64989, S_028C70_ROUND_MODE(~0u)},
+ {66262, S_028C70_BLEND_OPT_DONT_RD_DST(~0u), 8, 1381},
+ {66284, S_028C70_BLEND_OPT_DISCARD_PIXEL(~0u), 8, 1381},
+ {66308, S_028C70_FMASK_COMPRESSION_DISABLE(~0u)},
+ {66334, S_028C70_FMASK_COMPRESS_1FRAG_ONLY(~0u)},
+ {66360, S_028C70_DCC_ENABLE(~0u)},
+ {66371, S_028C70_CMASK_ADDR_TYPE(~0u)},
+ /* 3046 */
+ {71726, S_028C74_MIP0_DEPTH(~0u)},
+ {68659, S_028C74_META_LINEAR(~0u)},
+ {50010, S_028C74_NUM_SAMPLES(~0u)},
+ {66427, S_028C74_NUM_FRAGMENTS(~0u)},
+ {66441, S_028C74_FORCE_DST_ALPHA_1(~0u)},
+ {71737, S_028C74_COLOR_SW_MODE(~0u)},
+ {71751, S_028C74_FMASK_SW_MODE(~0u)},
+ {71765, S_028C74_RESOURCE_TYPE(~0u), 4, 1578},
+ {68694, S_028C74_RB_ALIGNED(~0u)},
+ {68676, S_028C74_PIPE_ALIGNED(~0u)},
+ /* 3056 */
+ {71683, S_028C80_BASE_256B(~0u)},
+ /* 3057 */
+ {71683, S_028C88_BASE_256B(~0u)},
+ /* 3058 */
+ {71683, S_028C98_BASE_256B(~0u)},
};
static const char sid_strings[] =
@@ -4564,3927 +5405,3889 @@
"IT_OPCODE_C\0" /* 799 */
"CP_DMA\0" /* 811 */
"DMA_DATA\0" /* 818 */
- "COUNT_INDIRECT_ENABLE\0" /* 842, 827 */
- "DRAW_INDEX_ENABLE\0" /* 849 */
- "PFP\0" /* 867 */
- "CE\0" /* 871 */
- "DE\0" /* 874 */
- "ENGINE_SEL\0" /* 877 */
- "WR_CONFIRM\0" /* 888 */
- "WR_ONE_ADDR\0" /* 906, 899 */
- "MEM_MAPPED_REGISTER\0" /* 922, 911 */
- "MEMORY_SYNC\0" /* 931 */
- "TC_L2\0" /* 943 */
- "GDS\0" /* 949 */
- "RESERVED\0" /* 953 */
- "MEM_ASYNC\0" /* 962 */
- "DST_SEL\0" /* 972 */
- "IB_SIZE\0" /* 980 */
- "CHAIN\0" /* 988 */
- "VALID\0" /* 994 */
- "SRC_ADDR_LO\0" /* 1000 */
- "CP_SYNC\0" /* 1012 */
- "SRC_ADDR\0" /* 1020 */
- "SRC_ADDR_TC_L2\0" /* 1029 */
- "SRC_SEL\0" /* 1044 */
- "ENGINE\0" /* 1052 */
- "DST_ADDR\0" /* 1059 */
- "NOWHERE\0" /* 1068 */
- "DST_ADDR_TC_L2\0" /* 1076 */
- "DSL_SEL\0" /* 1091 */
- "SRC_ADDR_HI\0" /* 1099 */
- "DST_ADDR_LO\0" /* 1111 */
- "DST_ADDR_HI\0" /* 1123 */
- "BYTE_COUNT_GFX6\0" /* 1135 */
- "BYTE_COUNT_GFX9\0" /* 1151 */
- "DISABLE_WR_CONFIRM_GFX6\0" /* 1167 */
- "NONE\0" /* 1191 */
- "8_IN_16\0" /* 1196 */
- "8_IN_32\0" /* 1204 */
- "8_IN_64\0" /* 1212 */
- "SRC_SWAP\0" /* 1220 */
- "DST_SWAP\0" /* 1229 */
- "MEMORY\0" /* 1238 */
- "SAS\0" /* 1245 */
- "DAS\0" /* 1249 */
- "INCREMENT\0" /* 1253 */
- "NO_INCREMENT\0" /* 1263 */
- "SAIC\0" /* 1276 */
- "DAIC\0" /* 1281 */
- "RAW_WAIT\0" /* 1286 */
- "DISABLE_WR_CONFIRM_GFX9\0" /* 1295 */
- "SDMA_RQ_PENDING\0" /* 1319 */
- "TST_RQ_PENDING\0" /* 1335 */
- "SDMA1_RQ_PENDING\0" /* 1350 */
- "VCE0_RQ_PENDING\0" /* 1367 */
- "VP8_BUSY\0" /* 1383 */
- "SDMA_BUSY\0" /* 1392, 1393 */
- "SDMA1_BUSY\0" /* 1402 */
- "VCE0_BUSY\0" /* 1413 */
- "XDMA_BUSY\0" /* 1423 */
- "CHUB_BUSY\0" /* 1433 */
- "SDMA2_BUSY\0" /* 1443 */
- "SDMA3_BUSY\0" /* 1454 */
- "SAMSCP_BUSY\0" /* 1465, 1469 */
- "ISP_BUSY\0" /* 1477 */
- "VCE1_BUSY\0" /* 1486 */
- "ODE_BUSY\0" /* 1496 */
- "SDMA2_RQ_PENDING\0" /* 1505 */
- "SDMA3_RQ_PENDING\0" /* 1522 */
- "SAMSCP_RQ_PENDING\0" /* 1539 */
- "ISP_RQ_PENDING\0" /* 1557 */
- "VCE1_RQ_PENDING\0" /* 1572 */
- "UVD_RQ_PENDING\0" /* 1588 */
- "SAMMSP_RQ_PENDING\0" /* 1603 */
- "ACP_RQ_PENDING\0" /* 1621 */
- "SMU_RQ_PENDING\0" /* 1636 */
- "GRBM_RQ_PENDING\0" /* 1651 */
- "HI_RQ_PENDING\0" /* 1667 */
- "VMC_BUSY\0" /* 1681 */
- "MCB_BUSY\0" /* 1690, 1691 */
- "MCB_NON_DISPLAY_BUSY\0" /* 1699 */
- "MCC_BUSY\0" /* 1720 */
- "MCD_BUSY\0" /* 1729 */
- "VMC1_BUSY\0" /* 1738 */
- "SEM_BUSY\0" /* 1748 */
- "ACP_BUSY\0" /* 1757 */
- "IH_BUSY\0" /* 1766 */
- "UVD_BUSY\0" /* 1774 */
- "SAMMSP_BUSY\0" /* 1783 */
- "GCATCL2_BUSY\0" /* 1795 */
- "OSATCL2_BUSY\0" /* 1808 */
- "BIF_BUSY\0" /* 1821 */
- "MCC0_BUSY\0" /* 1830 */
- "MCC1_BUSY\0" /* 1840 */
- "MCC2_BUSY\0" /* 1850 */
- "MCC3_BUSY\0" /* 1860 */
- "MCC4_BUSY\0" /* 1870 */
- "MCC5_BUSY\0" /* 1880 */
- "MCC6_BUSY\0" /* 1890 */
- "MCC7_BUSY\0" /* 1900 */
- "MCD0_BUSY\0" /* 1910 */
- "MCD1_BUSY\0" /* 1920 */
- "MCD2_BUSY\0" /* 1930 */
- "MCD3_BUSY\0" /* 1940 */
- "MCD4_BUSY\0" /* 1950 */
- "MCD5_BUSY\0" /* 1960 */
- "MCD6_BUSY\0" /* 1970 */
- "MCD7_BUSY\0" /* 1980 */
- "IDLE\0" /* 1990 */
- "REG_IDLE\0" /* 1995 */
- "RB_EMPTY\0" /* 2004 */
- "RB_FULL\0" /* 2013 */
- "RB_CMD_IDLE\0" /* 2021 */
- "RB_CMD_FULL\0" /* 2033 */
- "IB_CMD_IDLE\0" /* 2045 */
- "IB_CMD_FULL\0" /* 2057 */
- "BLOCK_IDLE\0" /* 2069 */
- "INSIDE_IB\0" /* 2080 */
- "EX_IDLE\0" /* 2090 */
- "EX_IDLE_POLL_TIMER_EXPIRE\0" /* 2098 */
- "PACKET_READY\0" /* 2124 */
- "MC_WR_IDLE\0" /* 2137 */
- "SRBM_IDLE\0" /* 2148 */
- "CONTEXT_EMPTY\0" /* 2158 */
- "DELTA_RPTR_FULL\0" /* 2172 */
- "RB_MC_RREQ_IDLE\0" /* 2188 */
- "IB_MC_RREQ_IDLE\0" /* 2204 */
- "MC_RD_IDLE\0" /* 2220 */
- "DELTA_RPTR_EMPTY\0" /* 2231 */
- "MC_RD_RET_STALL\0" /* 2248 */
- "MC_RD_NO_POLL_IDLE\0" /* 2264 */
- "PREV_CMD_IDLE\0" /* 2283 */
- "SEM_IDLE\0" /* 2297 */
- "SEM_REQ_STALL\0" /* 2306 */
- "SEM_RESP_STATE\0" /* 2320 */
- "INT_IDLE\0" /* 2335 */
- "INT_REQ_STALL\0" /* 2344 */
- "ME0PIPE1_CMDFIFO_AVAIL\0" /* 2358 */
- "ME0PIPE1_CF_RQ_PENDING\0" /* 2381 */
- "ME0PIPE1_PF_RQ_PENDING\0" /* 2404 */
- "ME1PIPE0_RQ_PENDING\0" /* 2427 */
- "ME1PIPE1_RQ_PENDING\0" /* 2447 */
- "ME1PIPE2_RQ_PENDING\0" /* 2467 */
- "ME1PIPE3_RQ_PENDING\0" /* 2487 */
- "ME2PIPE0_RQ_PENDING\0" /* 2507 */
- "ME2PIPE1_RQ_PENDING\0" /* 2527 */
- "ME2PIPE2_RQ_PENDING\0" /* 2547 */
- "ME2PIPE3_RQ_PENDING\0" /* 2567 */
- "RLC_RQ_PENDING\0" /* 2587 */
- "RLC_BUSY\0" /* 2602 */
- "TC_BUSY\0" /* 2611 */
- "TCC_CC_RESIDENT\0" /* 2619 */
- "CPF_BUSY\0" /* 2635 */
- "CPC_BUSY\0" /* 2644 */
- "CPG_BUSY\0" /* 2653 */
- "UTCL2_BUSY\0" /* 2662 */
- "EA_BUSY\0" /* 2673 */
- "RMI_BUSY\0" /* 2681 */
- "UTCL2_RQ_PENDING\0" /* 2690 */
- "CPF_RQ_PENDING\0" /* 2707 */
- "EA_LINK_BUSY\0" /* 2722 */
- "CPAXI_BUSY\0" /* 2735 */
- "ME0PIPE0_CMDFIFO_AVAIL\0" /* 2746 */
- "SRBM_RQ_PENDING\0" /* 2769 */
- "ME0PIPE0_CF_RQ_PENDING\0" /* 2785 */
- "ME0PIPE0_PF_RQ_PENDING\0" /* 2808 */
- "GDS_DMA_RQ_PENDING\0" /* 2831 */
- "DB_CLEAN\0" /* 2850 */
- "CB_CLEAN\0" /* 2859 */
- "TA_BUSY\0" /* 2868 */
- "GDS_BUSY\0" /* 2876 */
- "WD_BUSY_NO_DMA\0" /* 2885 */
- "VGT_BUSY\0" /* 2900 */
- "IA_BUSY_NO_DMA\0" /* 2909 */
- "IA_BUSY\0" /* 2924 */
- "SX_BUSY\0" /* 2932 */
- "WD_BUSY\0" /* 2940 */
- "SPI_BUSY\0" /* 2948 */
- "BCI_BUSY\0" /* 2957 */
- "SC_BUSY\0" /* 2966 */
- "PA_BUSY\0" /* 2974 */
- "DB_BUSY\0" /* 2982 */
- "CP_COHERENCY_BUSY\0" /* 2990 */
- "GUI_ACTIVE\0" /* 3008 */
- "RSMU_RQ_PENDING\0" /* 3019 */
- "OFFSET_UPDATE_DONE\0" /* 3049, 3035 */
- "DEST_BASE_0_ENA\0" /* 3054 */
- "DEST_BASE_1_ENA\0" /* 3070 */
- "CB0_DEST_BASE_ENA\0" /* 3086 */
- "CB1_DEST_BASE_ENA\0" /* 3104 */
- "CB2_DEST_BASE_ENA\0" /* 3122 */
- "CB3_DEST_BASE_ENA\0" /* 3140 */
- "CB4_DEST_BASE_ENA\0" /* 3158 */
- "CB5_DEST_BASE_ENA\0" /* 3176 */
- "CB6_DEST_BASE_ENA\0" /* 3194 */
- "CB7_DEST_BASE_ENA\0" /* 3212 */
- "DB_DEST_BASE_ENA\0" /* 3230 */
- "DEST_BASE_2_ENA\0" /* 3247 */
- "DEST_BASE_3_ENA\0" /* 3263 */
- "TCL1_ACTION_ENA\0" /* 3279 */
- "TC_ACTION_ENA\0" /* 3295 */
- "CB_ACTION_ENA\0" /* 3309 */
- "DB_ACTION_ENA\0" /* 3323 */
- "SH_KCACHE_ACTION_ENA\0" /* 3337 */
- "SH_ICACHE_ACTION_ENA\0" /* 3358 */
- "COHER_BASE_HI_256B\0" /* 3379 */
- "START_DELAY_COUNT\0" /* 3398 */
- "TC_SD_ACTION_ENA\0" /* 3416 */
- "TC_NC_ACTION_ENA\0" /* 3433 */
- "TCL1_VOL_ACTION_ENA\0" /* 3450 */
- "TC_VOL_ACTION_ENA\0" /* 3470 */
- "TC_WB_ACTION_ENA\0" /* 3488 */
- "SH_KCACHE_VOL_ACTION_ENA\0" /* 3505 */
- "SH_KCACHE_WB_ACTION_ENA\0" /* 3530 */
- "SH_SD_ACTION_ENA\0" /* 3554 */
- "TC_WC_ACTION_ENA\0" /* 3571 */
- "TC_INV_METADATA_ACTION_ENA\0" /* 3588 */
- "MATCHING_GFX_CNTX\0" /* 3615 */
- "MEID\0" /* 3633 */
- "PHASE1_STATUS\0" /* 3645, 3638 */
- "MEC1_BUSY\0" /* 3652 */
- "MEC2_BUSY\0" /* 3662 */
- "DC0_BUSY\0" /* 3672 */
- "DC1_BUSY\0" /* 3681 */
- "RCIU1_BUSY\0" /* 3690 */
- "RCIU2_BUSY\0" /* 3701 */
- "ROQ1_BUSY\0" /* 3712 */
- "ROQ2_BUSY\0" /* 3722 */
- "TCIU_BUSY\0" /* 3732 */
- "SCRATCH_RAM_BUSY\0" /* 3742 */
- "QU_BUSY\0" /* 3759 */
- "ATCL2IU_BUSY\0" /* 3767 */
- "CPG_CPC_BUSY\0" /* 3780 */
- "CPF_CPC_BUSY\0" /* 3793 */
- "UTCL2IU_BUSY\0" /* 3806 */
- "SAVE_RESTORE_BUSY\0" /* 3819 */
- "MEC1_LOAD_BUSY\0" /* 3837 */
- "MEC1_SEMAPOHRE_BUSY\0" /* 3852 */
- "MEC1_MUTEX_BUSY\0" /* 3872 */
- "MEC1_MESSAGE_BUSY\0" /* 3888 */
- "MEC1_EOP_QUEUE_BUSY\0" /* 3906 */
- "MEC1_IQ_QUEUE_BUSY\0" /* 3926 */
- "MEC1_IB_QUEUE_BUSY\0" /* 3945 */
- "MEC1_TC_BUSY\0" /* 3964 */
- "MEC1_DMA_BUSY\0" /* 3977 */
- "MEC1_PARTIAL_FLUSH_BUSY\0" /* 3991 */
- "MEC1_PIPE0_BUSY\0" /* 4015 */
- "MEC1_PIPE1_BUSY\0" /* 4031 */
- "MEC1_PIPE2_BUSY\0" /* 4047 */
- "MEC1_PIPE3_BUSY\0" /* 4063 */
- "MEC2_LOAD_BUSY\0" /* 4079 */
- "MEC2_SEMAPOHRE_BUSY\0" /* 4094 */
- "MEC2_MUTEX_BUSY\0" /* 4114 */
- "MEC2_MESSAGE_BUSY\0" /* 4130 */
- "MEC2_EOP_QUEUE_BUSY\0" /* 4148 */
- "MEC2_IQ_QUEUE_BUSY\0" /* 4168 */
- "MEC2_IB_QUEUE_BUSY\0" /* 4187 */
- "MEC2_TC_BUSY\0" /* 4206 */
- "MEC2_DMA_BUSY\0" /* 4219 */
- "MEC2_PARTIAL_FLUSH_BUSY\0" /* 4233 */
- "MEC2_PIPE0_BUSY\0" /* 4257 */
- "MEC2_PIPE1_BUSY\0" /* 4273 */
- "MEC2_PIPE2_BUSY\0" /* 4289 */
- "MEC2_PIPE3_BUSY\0" /* 4305 */
- "RCIU_TX_FREE_STALL\0" /* 4321 */
- "RCIU_PRIV_VIOLATION\0" /* 4340 */
- "TCIU_TX_FREE_STALL\0" /* 4360 */
- "MEC1_DECODING_PACKET\0" /* 4379 */
- "MEC1_WAIT_ON_RCIU\0" /* 4400 */
- "MEC1_WAIT_ON_RCIU_READ\0" /* 4418 */
- "MEC1_WAIT_ON_ROQ_DATA\0" /* 4441 */
- "MEC2_DECODING_PACKET\0" /* 4463 */
- "MEC2_WAIT_ON_RCIU\0" /* 4484 */
- "MEC2_WAIT_ON_RCIU_READ\0" /* 4502 */
- "MEC2_WAIT_ON_ROQ_DATA\0" /* 4525 */
- "ATCL2IU_WAITING_ON_FREE\0" /* 4547 */
- "ATCL2IU_WAITING_ON_TAGS\0" /* 4571 */
- "ATCL1_WAITING_ON_TRANS\0" /* 4595 */
- "UTCL2IU_WAITING_ON_FREE\0" /* 4618 */
- "UTCL2IU_WAITING_ON_TAGS\0" /* 4642 */
- "UTCL1_WAITING_ON_TRANS\0" /* 4666 */
- "POST_WPTR_GFX_BUSY\0" /* 4689 */
- "CSF_BUSY\0" /* 4708 */
- "ROQ_ALIGN_BUSY\0" /* 4717 */
- "ROQ_RING_BUSY\0" /* 4732 */
- "ROQ_INDIRECT1_BUSY\0" /* 4746 */
- "ROQ_INDIRECT2_BUSY\0" /* 4765 */
- "ROQ_STATE_BUSY\0" /* 4784 */
- "ROQ_CE_RING_BUSY\0" /* 4799 */
- "ROQ_CE_INDIRECT1_BUSY\0" /* 4816 */
- "ROQ_CE_INDIRECT2_BUSY\0" /* 4838 */
- "SEMAPHORE_BUSY\0" /* 4860 */
- "INTERRUPT_BUSY\0" /* 4875 */
- "HQD_BUSY\0" /* 4890 */
- "PRT_BUSY\0" /* 4899 */
- "CPF_GFX_BUSY\0" /* 4908 */
- "CPF_CMP_BUSY\0" /* 4921 */
- "GRBM_CPF_STAT_BUSY\0" /* 4934 */
- "CPC_CPF_BUSY\0" /* 4953 */
- "REG_BUS_FIFO_BUSY\0" /* 4966 */
- "CSF_RING_BUSY\0" /* 4984 */
- "CSF_INDIRECT1_BUSY\0" /* 4998 */
- "CSF_INDIRECT2_BUSY\0" /* 5017 */
- "CSF_STATE_BUSY\0" /* 5036 */
- "CSF_CE_INDR1_BUSY\0" /* 5051 */
- "CSF_CE_INDR2_BUSY\0" /* 5069 */
- "CSF_ARBITER_BUSY\0" /* 5087 */
- "CSF_INPUT_BUSY\0" /* 5104 */
- "OUTSTANDING_READ_TAGS\0" /* 5119 */
- "HPD_PROCESSING_EOP_BUSY\0" /* 5141 */
- "HQD_DISPATCH_BUSY\0" /* 5165 */
- "HQD_IQ_TIMER_BUSY\0" /* 5183 */
- "HQD_DMA_OFFLOAD_BUSY\0" /* 5201 */
- "HQD_WAIT_SEMAPHORE_BUSY\0" /* 5222 */
- "HQD_SIGNAL_SEMAPHORE_BUSY\0" /* 5246 */
- "HQD_MESSAGE_BUSY\0" /* 5272 */
- "HQD_PQ_FETCHER_BUSY\0" /* 5289 */
- "HQD_IB_FETCHER_BUSY\0" /* 5309 */
- "HQD_IQ_FETCHER_BUSY\0" /* 5329 */
- "HQD_EOP_FETCHER_BUSY\0" /* 5349 */
- "HQD_CONSUMED_RPTR_BUSY\0" /* 5370 */
- "HQD_FETCHER_ARB_BUSY\0" /* 5393 */
- "HQD_ROQ_ALIGN_BUSY\0" /* 5414 */
- "HQD_ROQ_EOP_BUSY\0" /* 5433 */
- "HQD_ROQ_IQ_BUSY\0" /* 5450 */
- "HQD_ROQ_PQ_BUSY\0" /* 5466 */
- "HQD_ROQ_IB_BUSY\0" /* 5482 */
- "HQD_WPTR_POLL_BUSY\0" /* 5498 */
- "HQD_PQ_BUSY\0" /* 5517 */
- "HQD_IB_BUSY\0" /* 5529 */
- "RING_FETCHING_DATA\0" /* 5541 */
- "INDR1_FETCHING_DATA\0" /* 5560 */
- "INDR2_FETCHING_DATA\0" /* 5580 */
- "STATE_FETCHING_DATA\0" /* 5600 */
- "TCIU_WAITING_ON_FREE\0" /* 5620 */
- "TCIU_WAITING_ON_TAGS\0" /* 5641 */
- "GFX_UTCL1_WAITING_ON_TRANS\0" /* 5662 */
- "CMP_UTCL1_WAITING_ON_TRANS\0" /* 5689 */
- "RCIU_WAITING_ON_FREE\0" /* 5716 */
- "COHER_SIZE_HI_256B\0" /* 5737 */
- "PRIM_COUNT\0" /* 5756 */
- "VS_NO_EXTRA_BUFFER\0" /* 5767 */
- "STREAMOUT_FULL_FLUSH\0" /* 5786 */
- "ES_LIMIT\0" /* 5810, 5807 */
- "VERT_REUSE\0" /* 5816 */
- "DI_PT_NONE\0" /* 5827 */
- "DI_PT_POINTLIST\0" /* 5838 */
- "DI_PT_LINELIST\0" /* 5854 */
- "DI_PT_LINESTRIP\0" /* 5869 */
- "DI_PT_TRILIST\0" /* 5885 */
- "DI_PT_TRIFAN\0" /* 5899 */
- "DI_PT_TRISTRIP\0" /* 5912 */
- "DI_PT_UNUSED_0\0" /* 5927 */
- "DI_PT_UNUSED_1\0" /* 5942 */
- "DI_PT_PATCH\0" /* 5957 */
- "DI_PT_LINELIST_ADJ\0" /* 5969 */
- "DI_PT_LINESTRIP_ADJ\0" /* 5988 */
- "DI_PT_TRILIST_ADJ\0" /* 6008 */
- "DI_PT_TRISTRIP_ADJ\0" /* 6026 */
- "DI_PT_UNUSED_3\0" /* 6045 */
- "DI_PT_UNUSED_4\0" /* 6060 */
- "DI_PT_TRI_WITH_WFLAGS\0" /* 6075 */
- "DI_PT_RECTLIST\0" /* 6097 */
- "DI_PT_LINELOOP\0" /* 6112 */
- "DI_PT_QUADLIST\0" /* 6127 */
- "DI_PT_QUADSTRIP\0" /* 6142 */
- "DI_PT_POLYGON\0" /* 6158 */
- "DI_PT_2D_COPY_RECT_LIST_V0\0" /* 6172 */
- "DI_PT_2D_COPY_RECT_LIST_V1\0" /* 6199 */
- "DI_PT_2D_COPY_RECT_LIST_V2\0" /* 6226 */
- "DI_PT_2D_COPY_RECT_LIST_V3\0" /* 6253 */
- "DI_PT_2D_FILL_RECT_LIST\0" /* 6280 */
- "DI_PT_2D_LINE_STRIP\0" /* 6304 */
- "DI_PT_2D_TRI_STRIP\0" /* 6324 */
- "PRIM_TYPE\0" /* 6343 */
- "DI_INDEX_SIZE_16_BIT\0" /* 6353 */
- "DI_INDEX_SIZE_32_BIT\0" /* 6374 */
- "OFFCHIP_BUFFERING\0" /* 6395 */
- "CLIP_VTX_REORDER_ENA\0" /* 6413 */
- "NUM_CLIP_SEQ\0" /* 6434 */
- "CLIPPED_PRIM_SEQ_STALL\0" /* 6447 */
- "VE_NAN_PROC_DISABLE\0" /* 6470 */
- "LINE_STIPPLE_VALUE\0" /* 6490 */
- "CURRENT_PTR\0" /* 6509 */
- "CURRENT_COUNT\0" /* 6521 */
- "CE_TO_CSF_NOT_RDY_TO_RCV\0" /* 6535 */
- "CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV\0" /* 6560 */
- "CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER\0" /* 6598 */
- "CE_TO_RAM_INIT_NOT_RDY\0" /* 6639 */
- "CE_TO_RAM_DUMP_NOT_RDY\0" /* 6662 */
- "CE_TO_RAM_WRITE_NOT_RDY\0" /* 6685 */
- "CE_TO_INC_FIFO_NOT_RDY_TO_RCV\0" /* 6709 */
- "CE_TO_WR_FIFO_NOT_RDY_TO_RCV\0" /* 6739 */
- "CE_WAITING_ON_BUFFER_DATA\0" /* 6768 */
- "CE_WAITING_ON_CE_BUFFER_FLAG\0" /* 6794 */
- "CE_WAITING_ON_DE_COUNTER\0" /* 6823 */
- "CE_WAITING_ON_DE_COUNTER_UNDERFLOW\0" /* 6848 */
- "CE_STALLED_ON_TC_WR_CONFIRM\0" /* 6883 */
- "CE_STALLED_ON_ATOMIC_RTN_DATA\0" /* 6911 */
- "RBIU_TO_DMA_NOT_RDY_TO_RCV\0" /* 6941 */
- "RBIU_TO_SEM_NOT_RDY_TO_RCV\0" /* 6968 */
- "RBIU_TO_MEMWR_NOT_RDY_TO_RCV\0" /* 6995 */
- "ME_HAS_ACTIVE_CE_BUFFER_FLAG\0" /* 7024 */
- "ME_HAS_ACTIVE_DE_BUFFER_FLAG\0" /* 7053 */
- "ME_STALLED_ON_TC_WR_CONFIRM\0" /* 7082 */
- "ME_STALLED_ON_ATOMIC_RTN_DATA\0" /* 7110 */
- "ME_WAITING_ON_TC_READ_DATA\0" /* 7140 */
- "ME_WAITING_ON_REG_READ_DATA\0" /* 7167 */
- "RCIU_WAITING_ON_GDS_FREE\0" /* 7195 */
- "RCIU_WAITING_ON_GRBM_FREE\0" /* 7220 */
- "RCIU_WAITING_ON_VGT_FREE\0" /* 7246 */
- "RCIU_STALLED_ON_ME_READ\0" /* 7271 */
- "RCIU_STALLED_ON_DMA_READ\0" /* 7295 */
- "RCIU_STALLED_ON_APPEND_READ\0" /* 7320 */
- "RCIU_HALTED_BY_REG_VIOLATION\0" /* 7348 */
- "PFP_TO_CSF_NOT_RDY_TO_RCV\0" /* 7377 */
- "PFP_TO_MEQ_NOT_RDY_TO_RCV\0" /* 7403 */
- "PFP_TO_RCIU_NOT_RDY_TO_RCV\0" /* 7429 */
- "PFP_TO_VGT_WRITES_PENDING\0" /* 7456 */
- "PFP_RCIU_READ_PENDING\0" /* 7482 */
- "PFP_WAITING_ON_BUFFER_DATA\0" /* 7504 */
- "ME_WAIT_ON_CE_COUNTER\0" /* 7531 */
- "ME_WAIT_ON_AVAIL_BUFFER\0" /* 7553 */
- "GFX_CNTX_NOT_AVAIL_TO_ME\0" /* 7577 */
- "ME_RCIU_NOT_RDY_TO_RCV\0" /* 7602 */
- "ME_TO_CONST_NOT_RDY_TO_RCV\0" /* 7625 */
- "ME_WAITING_DATA_FROM_PFP\0" /* 7652 */
- "ME_WAITING_ON_PARTIAL_FLUSH\0" /* 7677 */
- "MEQ_TO_ME_NOT_RDY_TO_RCV\0" /* 7705 */
- "STQ_TO_ME_NOT_RDY_TO_RCV\0" /* 7730 */
- "ME_WAITING_DATA_FROM_STQ\0" /* 7755 */
- "PFP_STALLED_ON_TC_WR_CONFIRM\0" /* 7780 */
- "PFP_STALLED_ON_ATOMIC_RTN_DATA\0" /* 7809 */
- "EOPD_FIFO_NEEDS_SC_EOP_DONE\0" /* 7840 */
- "EOPD_FIFO_NEEDS_WR_CONFIRM\0" /* 7868 */
- "STRMO_WR_OF_PRIM_DATA_PENDING\0" /* 7895 */
- "PIPE_STATS_WR_DATA_PENDING\0" /* 7925 */
- "APPEND_RDY_WAIT_ON_CS_DONE\0" /* 7952, 7971 */
- "APPEND_RDY_WAIT_ON_PS_DONE\0" /* 7979, 7998 */
- "APPEND_WAIT_ON_WR_CONFIRM\0" /* 8006 */
- "APPEND_ACTIVE_PARTITION\0" /* 8032 */
- "APPEND_WAITING_TO_SEND_MEMWRITE\0" /* 8056 */
- "SURF_SYNC_NEEDS_IDLE_CNTXS\0" /* 8088 */
- "SURF_SYNC_NEEDS_ALL_CLEAN\0" /* 8115 */
- "DC_BUSY\0" /* 8141 */
- "PFP_BUSY\0" /* 8149 */
- "MEQ_BUSY\0" /* 8158 */
- "ME_BUSY\0" /* 8167 */
- "QUERY_BUSY\0" /* 8175 */
- "SURFACE_SYNC_BUSY\0" /* 8186 */
- "RCIU_BUSY\0" /* 8204 */
- "CPC_CPG_BUSY\0" /* 8214 */
- "CE_BUSY\0" /* 8227 */
- "INSTANCE_INDEX\0" /* 8235 */
- "SH_INDEX\0" /* 8250 */
- "SE_INDEX\0" /* 8259 */
- "SH_BROADCAST_WRITES\0" /* 8268 */
- "INSTANCE_BROADCAST_WRITES\0" /* 8288 */
- "SE_BROADCAST_WRITES\0" /* 8314 */
- "PRIMGEN_EN\0" /* 8342, 8334 */
- "X_8K_DWORDS\0" /* 8345 */
- "X_4K_DWORDS\0" /* 8357 */
- "X_2K_DWORDS\0" /* 8369 */
- "X_1K_DWORDS\0" /* 8381 */
- "OFFCHIP_GRANULARITY\0" /* 8393 */
- "ENABLE_PA_SC_OUT_OF_ORDER\0" /* 8413 */
- "DISABLE_SC_DB_TILE_FIX\0" /* 8439 */
- "DISABLE_AA_MASK_FULL_FIX\0" /* 8462 */
- "ENABLE_1XMSAA_SAMPLE_LOCATIONS\0" /* 8487 */
- "ENABLE_1XMSAA_SAMPLE_LOC_CENTROID\0" /* 8518 */
- "DISABLE_SCISSOR_FIX\0" /* 8552 */
- "DISABLE_PW_BUBBLE_COLLAPSE\0" /* 8572 */
- "SEND_UNLIT_STILES_TO_PACKER\0" /* 8599 */
- "DISABLE_DUALGRAD_PERF_OPTIMIZATION\0" /* 8627 */
- "INST_INVALIDATE\0" /* 8667, 8662 */
- "DATA_INVALIDATE\0" /* 8678 */
- "INVALIDATE_VOLATILE\0" /* 8694 */
- "TARGET_INST\0" /* 8714 */
- "TARGET_DATA\0" /* 8726 */
- "WRITEBACK\0" /* 8738 */
- "VOL\0" /* 8748 */
- "COMPLETE\0" /* 8752 */
- "RET\0" /* 8761 */
- "RUI\0" /* 8765 */
- "RNG\0" /* 8769 */
- "SQ_EXP_MRT\0" /* 8773 */
- "SQ_EXP_MRTZ\0" /* 8784 */
- "SQ_EXP_NULL\0" /* 8796 */
- "SQ_EXP_POS\0" /* 8808 */
- "SQ_EXP_PARAM\0" /* 8819 */
- "TGT\0" /* 8832 */
- "COMPR\0" /* 8836 */
- "VM\0" /* 8842 */
- "SQ_ENC_EXP_FIELD\0" /* 8845 */
- "ENCODING\0" /* 8862 */
- "ADDRESS\0" /* 8871 */
- "COUNT_HI\0" /* 8879 */
- "BASE_ADDRESS_HI\0" /* 8888, 8893 */
- "STRIDE\0" /* 8904 */
- "CACHE_SWIZZLE\0" /* 8911 */
- "SWIZZLE_ENABLE\0" /* 8925 */
- "SQ_SEL_0\0" /* 8940 */
- "SQ_SEL_1\0" /* 8949 */
- "SQ_SEL_RESERVED_0\0" /* 8965, 8958 */
- "SQ_SEL_RESERVED_1\0" /* 8976, 8983 */
- "SQ_SEL_X\0" /* 8994 */
- "SQ_SEL_Y\0" /* 9003 */
- "SQ_SEL_Z\0" /* 9012 */
- "SQ_SEL_W\0" /* 9021 */
- "DST_SEL_X\0" /* 9030 */
- "DST_SEL_Y\0" /* 9040 */
- "DST_SEL_Z\0" /* 9050 */
- "DST_SEL_W\0" /* 9060 */
- "BUF_NUM_FORMAT_UNORM\0" /* 9070 */
- "BUF_NUM_FORMAT_SNORM\0" /* 9091 */
- "BUF_NUM_FORMAT_USCALED\0" /* 9112 */
- "BUF_NUM_FORMAT_SSCALED\0" /* 9135 */
- "BUF_NUM_FORMAT_UINT\0" /* 9158 */
- "BUF_NUM_FORMAT_SINT\0" /* 9178 */
- "BUF_NUM_FORMAT_SNORM_OGL\0" /* 9198 */
- "BUF_NUM_FORMAT_FLOAT\0" /* 9223 */
- "NUM_FORMAT\0" /* 9248, 9244 */
- "BUF_DATA_FORMAT_INVALID\0" /* 9255 */
- "BUF_DATA_FORMAT_8\0" /* 9279 */
- "BUF_DATA_FORMAT_16\0" /* 9297 */
- "BUF_DATA_FORMAT_8_8\0" /* 9316 */
- "BUF_DATA_FORMAT_32\0" /* 9336 */
- "BUF_DATA_FORMAT_16_16\0" /* 9355 */
- "BUF_DATA_FORMAT_10_11_11\0" /* 9377 */
- "BUF_DATA_FORMAT_11_11_10\0" /* 9402 */
- "BUF_DATA_FORMAT_10_10_10_2\0" /* 9427 */
- "BUF_DATA_FORMAT_2_10_10_10\0" /* 9454 */
- "BUF_DATA_FORMAT_8_8_8_8\0" /* 9481 */
- "BUF_DATA_FORMAT_32_32\0" /* 9505 */
- "BUF_DATA_FORMAT_16_16_16_16\0" /* 9527 */
- "BUF_DATA_FORMAT_32_32_32\0" /* 9555 */
- "BUF_DATA_FORMAT_32_32_32_32\0" /* 9580 */
- "BUF_DATA_FORMAT_RESERVED_15\0" /* 9608 */
- "DATA_FORMAT\0" /* 9636 */
- "ELEMENT_SIZE\0" /* 9648 */
- "INDEX_STRIDE\0" /* 9661 */
- "ADD_TID_ENABLE\0" /* 9674 */
- "ATC\0" /* 9689 */
- "HASH_ENABLE\0" /* 9693 */
- "HEAP\0" /* 9705 */
- "MTYPE\0" /* 9710 */
- "SQ_RSRC_BUF\0" /* 9716 */
- "SQ_RSRC_BUF_RSVD_1\0" /* 9728 */
- "SQ_RSRC_BUF_RSVD_2\0" /* 9747 */
- "SQ_RSRC_BUF_RSVD_3\0" /* 9766 */
- "USER_VM_ENABLE\0" /* 9785 */
- "USER_VM_MODE\0" /* 9800, 9808 */
- "NV\0" /* 9813 */
- "MIN_LOD\0" /* 9816 */
- "IMG_DATA_FORMAT_INVALID\0" /* 9824 */
- "IMG_DATA_FORMAT_8\0" /* 9848 */
- "IMG_DATA_FORMAT_16\0" /* 9866 */
- "IMG_DATA_FORMAT_8_8\0" /* 9885 */
- "IMG_DATA_FORMAT_32\0" /* 9905 */
- "IMG_DATA_FORMAT_16_16\0" /* 9924 */
- "IMG_DATA_FORMAT_10_11_11\0" /* 9946 */
- "IMG_DATA_FORMAT_11_11_10\0" /* 9971 */
- "IMG_DATA_FORMAT_10_10_10_2\0" /* 9996 */
- "IMG_DATA_FORMAT_2_10_10_10\0" /* 10023 */
- "IMG_DATA_FORMAT_8_8_8_8\0" /* 10050 */
- "IMG_DATA_FORMAT_32_32\0" /* 10074 */
- "IMG_DATA_FORMAT_16_16_16_16\0" /* 10096 */
- "IMG_DATA_FORMAT_32_32_32\0" /* 10124 */
- "IMG_DATA_FORMAT_32_32_32_32\0" /* 10149 */
- "IMG_DATA_FORMAT_RESERVED_15\0" /* 10177 */
- "IMG_DATA_FORMAT_5_6_5\0" /* 10205 */
- "IMG_DATA_FORMAT_1_5_5_5\0" /* 10227 */
- "IMG_DATA_FORMAT_5_5_5_1\0" /* 10251 */
- "IMG_DATA_FORMAT_4_4_4_4\0" /* 10275 */
- "IMG_DATA_FORMAT_8_24\0" /* 10299 */
- "IMG_DATA_FORMAT_24_8\0" /* 10320 */
- "IMG_DATA_FORMAT_X24_8_32\0" /* 10341 */
- "IMG_DATA_FORMAT_8_AS_8_8_8_8\0" /* 10366 */
- "IMG_DATA_FORMAT_ETC2_RGB\0" /* 10395 */
- "IMG_DATA_FORMAT_ETC2_RGBA\0" /* 10420 */
- "IMG_DATA_FORMAT_ETC2_R\0" /* 10446 */
- "IMG_DATA_FORMAT_ETC2_RG\0" /* 10469 */
- "IMG_DATA_FORMAT_ETC2_RGBA1\0" /* 10493 */
- "IMG_DATA_FORMAT_RESERVED_29\0" /* 10520 */
- "IMG_DATA_FORMAT_RESERVED_30\0" /* 10548 */
- "IMG_DATA_FORMAT_RESERVED_31\0" /* 10576 */
- "IMG_DATA_FORMAT_GB_GR\0" /* 10604 */
- "IMG_DATA_FORMAT_BG_RG\0" /* 10626 */
- "IMG_DATA_FORMAT_5_9_9_9\0" /* 10648 */
- "IMG_DATA_FORMAT_BC1\0" /* 10672 */
- "IMG_DATA_FORMAT_BC2\0" /* 10692 */
- "IMG_DATA_FORMAT_BC3\0" /* 10712 */
- "IMG_DATA_FORMAT_BC4\0" /* 10732 */
- "IMG_DATA_FORMAT_BC5\0" /* 10752 */
- "IMG_DATA_FORMAT_BC6\0" /* 10772 */
- "IMG_DATA_FORMAT_BC7\0" /* 10792 */
- "IMG_DATA_FORMAT_16_AS_16_16_16_16_GFX6\0" /* 10812 */
- "IMG_DATA_FORMAT_16_AS_32_32_32_32_GFX6\0" /* 10851 */
- "IMG_DATA_FORMAT_FMASK8_S2_F1\0" /* 10890 */
- "IMG_DATA_FORMAT_FMASK8_S4_F1\0" /* 10919 */
- "IMG_DATA_FORMAT_FMASK8_S8_F1\0" /* 10948 */
- "IMG_DATA_FORMAT_FMASK8_S2_F2\0" /* 10977 */
- "IMG_DATA_FORMAT_FMASK8_S4_F2\0" /* 11006 */
- "IMG_DATA_FORMAT_FMASK8_S4_F4\0" /* 11035 */
- "IMG_DATA_FORMAT_FMASK16_S16_F1\0" /* 11064 */
- "IMG_DATA_FORMAT_FMASK16_S8_F2\0" /* 11095 */
- "IMG_DATA_FORMAT_FMASK32_S16_F2\0" /* 11125 */
- "IMG_DATA_FORMAT_FMASK32_S8_F4\0" /* 11156 */
- "IMG_DATA_FORMAT_FMASK32_S8_F8\0" /* 11186 */
- "IMG_DATA_FORMAT_FMASK64_S16_F4\0" /* 11216 */
- "IMG_DATA_FORMAT_FMASK64_S16_F8\0" /* 11247 */
- "IMG_DATA_FORMAT_4_4\0" /* 11278 */
- "IMG_DATA_FORMAT_6_5_5\0" /* 11298 */
- "IMG_DATA_FORMAT_1\0" /* 11320 */
- "IMG_DATA_FORMAT_1_REVERSED\0" /* 11338 */
- "IMG_DATA_FORMAT_32_AS_8\0" /* 11365 */
- "IMG_DATA_FORMAT_32_AS_8_8\0" /* 11389 */
- "IMG_DATA_FORMAT_32_AS_32_32_32_32\0" /* 11415 */
- "DATA_FORMAT_GFX6\0" /* 11449 */
- "IMG_NUM_FORMAT_UNORM\0" /* 11466 */
- "IMG_NUM_FORMAT_SNORM\0" /* 11487 */
- "IMG_NUM_FORMAT_USCALED\0" /* 11508 */
- "IMG_NUM_FORMAT_SSCALED\0" /* 11531 */
- "IMG_NUM_FORMAT_UINT\0" /* 11554 */
- "IMG_NUM_FORMAT_SINT\0" /* 11574 */
- "IMG_NUM_FORMAT_SNORM_OGL\0" /* 11594 */
- "IMG_NUM_FORMAT_FLOAT\0" /* 11619 */
- "IMG_NUM_FORMAT_RESERVED_8\0" /* 11640 */
- "IMG_NUM_FORMAT_SRGB\0" /* 11666 */
- "IMG_NUM_FORMAT_UBNORM\0" /* 11686 */
- "IMG_NUM_FORMAT_UBNORM_OGL\0" /* 11708 */
- "IMG_NUM_FORMAT_UBINT\0" /* 11734 */
- "IMG_NUM_FORMAT_UBSCALED\0" /* 11755 */
- "IMG_NUM_FORMAT_RESERVED_14\0" /* 11779 */
- "IMG_NUM_FORMAT_RESERVED_15\0" /* 11806 */
- "NUM_FORMAT_GFX6\0" /* 11833 */
- "IMG_DATA_FORMAT_6E4\0" /* 11849 */
- "IMG_DATA_FORMAT_16_AS_32_32\0" /* 11869 */
- "IMG_DATA_FORMAT_16_AS_16_16_16_16_GFX9\0" /* 11897 */
- "IMG_DATA_FORMAT_16_AS_32_32_32_32_GFX9\0" /* 11936 */
- "IMG_DATA_FORMAT_FMASK\0" /* 11975 */
- "IMG_DATA_FORMAT_ASTC_2D_LDR\0" /* 11997 */
- "IMG_DATA_FORMAT_ASTC_2D_HDR\0" /* 12025 */
- "IMG_DATA_FORMAT_ASTC_2D_LDR_SRGB\0" /* 12053 */
- "IMG_DATA_FORMAT_ASTC_3D_LDR\0" /* 12086 */
- "IMG_DATA_FORMAT_ASTC_3D_HDR\0" /* 12114 */
- "IMG_DATA_FORMAT_ASTC_3D_LDR_SRGB\0" /* 12142 */
- "IMG_DATA_FORMAT_N_IN_16\0" /* 12175 */
- "IMG_DATA_FORMAT_N_IN_16_16\0" /* 12199 */
- "IMG_DATA_FORMAT_N_IN_16_16_16_16\0" /* 12226 */
- "IMG_DATA_FORMAT_N_IN_16_AS_16_16_16_16\0" /* 12259 */
- "IMG_DATA_FORMAT_RESERVED_56\0" /* 12298 */
- "IMG_DATA_FORMAT_S8_16\0" /* 12326 */
- "IMG_DATA_FORMAT_S8_32\0" /* 12348 */
- "IMG_DATA_FORMAT_8_AS_32\0" /* 12370 */
- "IMG_DATA_FORMAT_8_AS_32_32\0" /* 12394 */
- "DATA_FORMAT_GFX9\0" /* 12421 */
- "IMG_NUM_FORMAT_RESERVED_6\0" /* 12438 */
- "IMG_NUM_FORMAT_METADATA\0" /* 12464 */
- "IMG_NUM_FORMAT_UNORM_UINT\0" /* 12488 */
- "NUM_FORMAT_GFX9\0" /* 12514 */
- "IMG_FMASK_8_2_1\0" /* 12530 */
- "IMG_FMASK_8_4_1\0" /* 12546 */
- "IMG_FMASK_8_8_1\0" /* 12562 */
- "IMG_FMASK_8_2_2\0" /* 12578 */
- "IMG_FMASK_8_4_2\0" /* 12594 */
- "IMG_FMASK_8_4_4\0" /* 12610 */
- "IMG_FMASK_16_16_1\0" /* 12626 */
- "IMG_FMASK_16_8_2\0" /* 12644 */
- "IMG_FMASK_32_16_2\0" /* 12661 */
- "IMG_FMASK_32_8_4\0" /* 12679 */
- "IMG_FMASK_32_8_8\0" /* 12696 */
- "IMG_FMASK_64_16_4\0" /* 12713 */
- "IMG_FMASK_64_16_8\0" /* 12731 */
- "NUM_FORMAT_FMASK\0" /* 12749 */
- "IMG_ASTC_2D_4x4\0" /* 12766 */
- "IMG_ASTC_2D_5x4\0" /* 12782 */
- "IMG_ASTC_2D_5x5\0" /* 12798 */
- "IMG_ASTC_2D_6x5\0" /* 12814 */
- "IMG_ASTC_2D_6x6\0" /* 12830 */
- "IMG_ASTC_2D_8x5\0" /* 12846 */
- "IMG_ASTC_2D_8x6\0" /* 12862 */
- "IMG_ASTC_2D_8x8\0" /* 12878 */
- "IMG_ASTC_2D_10x5\0" /* 12894 */
- "IMG_ASTC_2D_10x6\0" /* 12911 */
- "IMG_ASTC_2D_10x8\0" /* 12928 */
- "IMG_ASTC_2D_10x10\0" /* 12945 */
- "IMG_ASTC_2D_12x10\0" /* 12963 */
- "IMG_ASTC_2D_12x12\0" /* 12981 */
- "NUM_FORMAT_ASTC_2D\0" /* 13015, 12999 */
- "IMG_ASTC_3D_3x3x3\0" /* 13018 */
- "IMG_ASTC_3D_4x3x3\0" /* 13036 */
- "IMG_ASTC_3D_4x4x3\0" /* 13054 */
- "IMG_ASTC_3D_4x4x4\0" /* 13072 */
- "IMG_ASTC_3D_5x4x4\0" /* 13090 */
- "IMG_ASTC_3D_5x5x4\0" /* 13108 */
- "IMG_ASTC_3D_5x5x5\0" /* 13126 */
- "IMG_ASTC_3D_6x5x5\0" /* 13144 */
- "IMG_ASTC_3D_6x6x5\0" /* 13162 */
- "IMG_ASTC_3D_6x6x6\0" /* 13180 */
- "NUM_FORMAT_ASTC_3D\0" /* 13214, 13198 */
- "META_DIRECT\0" /* 13217 */
- "WIDTH\0" /* 13229 */
- "HEIGHT\0" /* 13235 */
- "PERF_MOD\0" /* 13242 */
- "INTERLACED\0" /* 13251 */
- "BASE_LEVEL\0" /* 13262 */
- "LAST_LEVEL\0" /* 13273 */
- "TILING_INDEX\0" /* 13284 */
- "POW2_PAD\0" /* 13297 */
- "SQ_RSRC_IMG_RSVD_0\0" /* 13306 */
- "SQ_RSRC_IMG_RSVD_1\0" /* 13325 */
- "SQ_RSRC_IMG_RSVD_2\0" /* 13344 */
- "SQ_RSRC_IMG_RSVD_3\0" /* 13363 */
- "SQ_RSRC_IMG_RSVD_4\0" /* 13382 */
- "SQ_RSRC_IMG_RSVD_5\0" /* 13401 */
- "SQ_RSRC_IMG_RSVD_6\0" /* 13420 */
- "SQ_RSRC_IMG_RSVD_7\0" /* 13439 */
- "SQ_RSRC_IMG_1D\0" /* 13458, 13470 */
- "SQ_RSRC_IMG_2D\0" /* 13473 */
- "SQ_RSRC_IMG_3D\0" /* 13488 */
- "SQ_RSRC_IMG_CUBE\0" /* 13503 */
- "SQ_RSRC_IMG_1D_ARRAY\0" /* 13520 */
- "SQ_RSRC_IMG_2D_ARRAY\0" /* 13541 */
- "SQ_RSRC_IMG_2D_MSAA\0" /* 13562 */
- "SQ_RSRC_IMG_2D_MSAA_ARRAY\0" /* 13582 */
- "SW_MODE\0" /* 13608 */
- "DEPTH\0" /* 13616 */
- "PITCH_GFX6\0" /* 13622 */
- "PITCH_GFX9\0" /* 13633 */
- "BC_SWIZZLE_XYZW\0" /* 13644 */
- "BC_SWIZZLE_XWYZ\0" /* 13660 */
- "BC_SWIZZLE_WZYX\0" /* 13676 */
- "BC_SWIZZLE_WXYZ\0" /* 13692 */
- "BC_SWIZZLE_ZYXW\0" /* 13708 */
- "BC_SWIZZLE_YXWZ\0" /* 13724 */
- "BC_SWIZZLE\0" /* 13740 */
- "BASE_ARRAY\0" /* 13751 */
- "LAST_ARRAY\0" /* 13762 */
- "ARRAY_PITCH\0" /* 13773 */
- "META_DATA_ADDRESS\0" /* 13785 */
- "META_LINEAR\0" /* 13808, 13803 */
- "META_PIPE_ALIGNED\0" /* 13820, 13815 */
- "META_RB_ALIGNED\0" /* 13833, 13838 */
- "MAX_MIP\0" /* 13849 */
- "MIN_LOD_WARN\0" /* 13857 */
- "COUNTER_BANK_ID\0" /* 13870 */
- "LOD_HDW_CNT_EN\0" /* 13886 */
- "COMPRESSION_EN\0" /* 13901 */
- "ALPHA_IS_ON_MSB\0" /* 13916 */
- "COLOR_TRANSFORM\0" /* 13932 */
- "LOST_ALPHA_BITS\0" /* 13948 */
- "LOST_COLOR_BITS\0" /* 13964 */
- "SQ_TEX_WRAP\0" /* 13980 */
- "SQ_TEX_MIRROR\0" /* 13992 */
- "SQ_TEX_CLAMP_LAST_TEXEL\0" /* 14006 */
- "SQ_TEX_MIRROR_ONCE_LAST_TEXEL\0" /* 14030 */
- "SQ_TEX_CLAMP_HALF_BORDER\0" /* 14060 */
- "SQ_TEX_MIRROR_ONCE_HALF_BORDER\0" /* 14085 */
- "SQ_TEX_CLAMP_BORDER\0" /* 14116 */
- "SQ_TEX_MIRROR_ONCE_BORDER\0" /* 14136 */
- "CLAMP_X\0" /* 14162 */
- "CLAMP_Y\0" /* 14170 */
- "CLAMP_Z\0" /* 14178 */
- "MAX_ANISO_RATIO\0" /* 14186 */
- "SQ_TEX_DEPTH_COMPARE_NEVER\0" /* 14202 */
- "SQ_TEX_DEPTH_COMPARE_LESS\0" /* 14229 */
- "SQ_TEX_DEPTH_COMPARE_EQUAL\0" /* 14255 */
- "SQ_TEX_DEPTH_COMPARE_LESSEQUAL\0" /* 14282 */
- "SQ_TEX_DEPTH_COMPARE_GREATER\0" /* 14313 */
- "SQ_TEX_DEPTH_COMPARE_NOTEQUAL\0" /* 14342 */
- "SQ_TEX_DEPTH_COMPARE_GREATEREQUAL\0" /* 14372 */
- "SQ_TEX_DEPTH_COMPARE_ALWAYS\0" /* 14427, 14406 */
- "DEPTH_COMPARE_FUNC\0" /* 14434 */
- "FORCE_UNNORMALIZED\0" /* 14453 */
- "ANISO_THRESHOLD\0" /* 14472 */
- "MC_COORD_TRUNC\0" /* 14488 */
- "FORCE_DEGAMMA\0" /* 14503 */
- "ANISO_BIAS\0" /* 14517 */
- "TRUNC_COORD\0" /* 14528 */
- "DISABLE_CUBE_WRAP\0" /* 14540 */
- "FILTER_MODE\0" /* 14558 */
- "COMPAT_MODE\0" /* 14570 */
- "MAX_LOD\0" /* 14582 */
- "PERF_MIP\0" /* 14590 */
- "PERF_Z\0" /* 14599 */
- "LOD_BIAS\0" /* 14606 */
- "LOD_BIAS_SEC\0" /* 14615 */
- "SQ_TEX_XY_FILTER_POINT\0" /* 14642, 14628 */
- "SQ_TEX_XY_FILTER_BILINEAR\0" /* 14651 */
- "XY_MAG_FILTER\0" /* 14677 */
- "SQ_TEX_XY_FILTER_ANISO_POINT\0" /* 14691 */
- "SQ_TEX_XY_FILTER_ANISO_BILINEAR\0" /* 14720 */
- "XY_MIN_FILTER\0" /* 14752 */
- "SQ_TEX_Z_FILTER_NONE\0" /* 14766 */
- "SQ_TEX_Z_FILTER_POINT\0" /* 14787 */
- "SQ_TEX_Z_FILTER_LINEAR\0" /* 14809 */
- "Z_FILTER\0" /* 14832 */
- "MIP_FILTER\0" /* 14841 */
- "MIP_POINT_PRECLAMP\0" /* 14852 */
- "DISABLE_LSB_CEIL\0" /* 14871 */
- "FILTER_PREC_FIX\0" /* 14888 */
- "ANISO_OVERRIDE\0" /* 14904 */
- "BLEND_ZERO_PRT\0" /* 14919 */
- "BORDER_COLOR_PTR\0" /* 14934 */
- "UPGRADED_DEPTH\0" /* 14951 */
- "SQ_TEX_BORDER_COLOR_TRANS_BLACK\0" /* 14966 */
- "SQ_TEX_BORDER_COLOR_OPAQUE_BLACK\0" /* 14998 */
- "SQ_TEX_BORDER_COLOR_OPAQUE_WHITE\0" /* 15031 */
- "SQ_TEX_BORDER_COLOR_REGISTER\0" /* 15064 */
- "BORDER_COLOR_TYPE\0" /* 15093 */
- "SKIP_DEGAMMA\0" /* 15111 */
- "VS_LOW_THRESHOLD\0" /* 15124 */
- "GS_LOW_THRESHOLD\0" /* 15141 */
- "ES_LOW_THRESHOLD\0" /* 15158 */
- "HS_LOW_THRESHOLD\0" /* 15175 */
- "LS_LOW_THRESHOLD\0" /* 15192 */
- "PS_CU_EN\0" /* 15209, 15212 */
- "VS_CU_EN\0" /* 15218 */
- "GS_CU_EN\0" /* 15227 */
- "ES_CU_EN\0" /* 15236 */
- "LSHS_CU_EN\0" /* 15245 */
- "MAX_WAVE_ID\0" /* 15256 */
- "X_R0\0" /* 15268 */
- "RING_ORDER_TS0\0" /* 15273 */
- "RING_ORDER_TS1\0" /* 15288 */
- "RING_ORDER_TS2\0" /* 15303 */
- "PIPE_ORDER_TS0\0" /* 15318 */
- "PIPE_ORDER_TS1\0" /* 15333 */
- "PIPE_ORDER_TS2\0" /* 15348 */
- "PIPE_ORDER_TS3\0" /* 15363 */
- "TS0_DUR_MULT\0" /* 15378 */
- "TS1_DUR_MULT\0" /* 15391 */
- "TS2_DUR_MULT\0" /* 15404 */
- "TS3_DUR_MULT\0" /* 15417 */
- "TS0_DURATION\0" /* 15430 */
- "TS1_DURATION\0" /* 15443 */
- "TS2_DURATION\0" /* 15456 */
- "GPR_WRITE_PRIORITY\0" /* 15469, 15479 */
- "EXP_PRIORITY_ORDER\0" /* 15488 */
- "ENABLE_SQG_TOP_EVENTS\0" /* 15507 */
- "ENABLE_SQG_BOP_EVENTS\0" /* 15529 */
- "RSRC_MGMT_RESET\0" /* 15551 */
- "X_DELAY_14_CLKS\0" /* 15567 */
- "X_DELAY_16_CLKS\0" /* 15583 */
- "X_DELAY_18_CLKS\0" /* 15599 */
- "X_DELAY_20_CLKS\0" /* 15615 */
- "X_DELAY_22_CLKS\0" /* 15631 */
- "X_DELAY_24_CLKS\0" /* 15647 */
- "X_DELAY_26_CLKS\0" /* 15663 */
- "X_DELAY_28_CLKS\0" /* 15679 */
- "X_DELAY_30_CLKS\0" /* 15695 */
- "X_DELAY_32_CLKS\0" /* 15711 */
- "X_DELAY_34_CLKS\0" /* 15727 */
- "X_DELAY_4_CLKS\0" /* 15743 */
- "X_DELAY_6_CLKS\0" /* 15758 */
- "X_DELAY_8_CLKS\0" /* 15773 */
- "X_DELAY_10_CLKS\0" /* 15788 */
- "X_DELAY_12_CLKS\0" /* 15804 */
- "VTX_DONE_DELAY\0" /* 15820 */
- "INTERP_ONE_PRIM_PER_ROW\0" /* 15835 */
- "PC_LIMIT_ENABLE\0" /* 15859 */
- "PC_LIMIT_STRICT\0" /* 15875 */
- "PC_LIMIT_SIZE\0" /* 15891 */
- "TYPE_A\0" /* 15905 */
- "VGPR_A\0" /* 15912 */
- "SGPR_A\0" /* 15919 */
- "LDS_A\0" /* 15926 */
- "WAVES_A\0" /* 15932 */
- "EN_A\0" /* 15940 */
- "TYPE_B\0" /* 15945 */
- "VGPR_B\0" /* 15952 */
- "SGPR_B\0" /* 15959 */
- "LDS_B\0" /* 15966 */
- "WAVES_B\0" /* 15972 */
- "EN_B\0" /* 15980 */
- "MSAA1_X\0" /* 15985 */
- "MSAA1_Y\0" /* 15993 */
- "MSAA2_X\0" /* 16001 */
- "MSAA2_Y\0" /* 16009 */
- "MSAA4_X\0" /* 16017 */
- "MSAA4_Y\0" /* 16025 */
- "MSAA8_X\0" /* 16033 */
- "MSAA8_Y\0" /* 16041 */
- "MSAA16_X\0" /* 16049 */
- "MSAA16_Y\0" /* 16058 */
- "NUM_PIPES\0" /* 16067 */
- "PIPE_INTERLEAVE_SIZE_GFX6\0" /* 16077 */
- "BANK_INTERLEAVE_SIZE\0" /* 16103 */
- "NUM_SHADER_ENGINES_GFX6\0" /* 16124 */
- "SHADER_ENGINE_TILE_SIZE\0" /* 16148 */
- "NUM_GPUS_GFX6\0" /* 16172 */
- "MULTI_GPU_TILE_SIZE\0" /* 16186 */
- "ROW_SIZE\0" /* 16206 */
- "NUM_LOWER_PIPES\0" /* 16215 */
- "PIPE_INTERLEAVE_SIZE_GFX9\0" /* 16231 */
- "MAX_COMPRESSED_FRAGS\0" /* 16257 */
- "NUM_BANKS\0" /* 16278 */
- "NUM_SHADER_ENGINES_GFX9\0" /* 16288 */
- "NUM_GPUS_GFX9\0" /* 16312 */
- "NUM_RB_PER_SE\0" /* 16326 */
- "SE_ENABLE\0" /* 16340 */
- "ADDR_SURF_DISPLAY_MICRO_TILING\0" /* 16350 */
- "ADDR_SURF_THIN_MICRO_TILING\0" /* 16381 */
- "ADDR_SURF_DEPTH_MICRO_TILING\0" /* 16409 */
- "ADDR_SURF_THICK_MICRO_TILING\0" /* 16438 */
- "MICRO_TILE_MODE\0" /* 16467 */
- "ARRAY_LINEAR_GENERAL\0" /* 16489, 16483 */
- "ARRAY_LINEAR_ALIGNED\0" /* 16504 */
- "ARRAY_1D_TILED_THIN1\0" /* 16525 */
- "ARRAY_1D_TILED_THICK\0" /* 16546 */
- "ARRAY_2D_TILED_THIN1\0" /* 16567 */
- "ARRAY_2D_TILED_THICK\0" /* 16588 */
- "ARRAY_2D_TILED_XTHICK\0" /* 16609 */
- "ARRAY_3D_TILED_THIN1\0" /* 16631 */
- "ARRAY_3D_TILED_THICK\0" /* 16652 */
- "ARRAY_3D_TILED_XTHICK\0" /* 16673 */
- "ARRAY_POWER_SAVE\0" /* 16695 */
- "ARRAY_MODE\0" /* 16712 */
- "ADDR_SURF_P2\0" /* 16723 */
- "ADDR_SURF_P2_RESERVED0\0" /* 16736 */
- "ADDR_SURF_P2_RESERVED1\0" /* 16759 */
- "ADDR_SURF_P2_RESERVED2\0" /* 16795, 16782 */
- "X_ADDR_SURF_P4_8X16\0" /* 16805 */
- "X_ADDR_SURF_P4_16X16\0" /* 16825 */
- "X_ADDR_SURF_P4_16X32\0" /* 16846 */
- "X_ADDR_SURF_P4_32X32\0" /* 16867 */
- "X_ADDR_SURF_P8_16X16_8X16\0" /* 16888 */
- "X_ADDR_SURF_P8_16X32_8X16\0" /* 16914 */
- "X_ADDR_SURF_P8_32X32_8X16\0" /* 16940 */
- "X_ADDR_SURF_P8_16X32_16X16\0" /* 16966 */
- "X_ADDR_SURF_P8_32X32_16X16\0" /* 16993 */
- "X_ADDR_SURF_P8_32X32_16X32\0" /* 17020 */
- "X_ADDR_SURF_P8_32X64_32X32\0" /* 17047 */
- "PIPE_CONFIG\0" /* 17074 */
- "ADDR_SURF_TILE_SPLIT_64B\0" /* 17086 */
- "ADDR_SURF_TILE_SPLIT_128B\0" /* 17111 */
- "ADDR_SURF_TILE_SPLIT_256B\0" /* 17137 */
- "ADDR_SURF_TILE_SPLIT_512B\0" /* 17163 */
- "ADDR_SURF_TILE_SPLIT_1KB\0" /* 17189 */
- "ADDR_SURF_TILE_SPLIT_2KB\0" /* 17214 */
- "ADDR_SURF_TILE_SPLIT_4KB\0" /* 17239 */
- "TILE_SPLIT\0" /* 17264 */
- "ADDR_SURF_BANK_WIDTH_1\0" /* 17275 */
- "ADDR_SURF_BANK_WIDTH_2\0" /* 17298 */
- "ADDR_SURF_BANK_WIDTH_4\0" /* 17321 */
- "ADDR_SURF_BANK_WIDTH_8\0" /* 17344 */
- "BANK_WIDTH\0" /* 17367 */
- "ADDR_SURF_BANK_HEIGHT_1\0" /* 17378 */
- "ADDR_SURF_BANK_HEIGHT_2\0" /* 17402 */
- "ADDR_SURF_BANK_HEIGHT_4\0" /* 17426 */
- "ADDR_SURF_BANK_HEIGHT_8\0" /* 17450 */
- "BANK_HEIGHT\0" /* 17474 */
- "ADDR_SURF_MACRO_ASPECT_1\0" /* 17486 */
- "ADDR_SURF_MACRO_ASPECT_2\0" /* 17511 */
- "ADDR_SURF_MACRO_ASPECT_4\0" /* 17536 */
- "ADDR_SURF_MACRO_ASPECT_8\0" /* 17561 */
- "MACRO_TILE_ASPECT\0" /* 17586 */
- "ADDR_SURF_2_BANK\0" /* 17604 */
- "ADDR_SURF_4_BANK\0" /* 17621 */
- "ADDR_SURF_8_BANK\0" /* 17638 */
- "ADDR_SURF_16_BANK\0" /* 17655 */
- "ADDR_SURF_ROTATED_MICRO_TILING\0" /* 17673 */
- "MICRO_TILE_MODE_NEW\0" /* 17704 */
- "SAMPLE_SPLIT\0" /* 17724 */
- "MEM_BASE\0" /* 17737 */
- "WAVE_LIMIT\0" /* 17746 */
- "LOCK_LOW_THRESHOLD\0" /* 17757 */
- "SIMD_DISABLE\0" /* 17776 */
- "VGPRS\0" /* 17789 */
- "SGPRS\0" /* 17795 */
- "FP_32_DENORMS\0" /* 17801 */
- "FP_64_DENORMS\0" /* 17815 */
- "FP_ALL_DENORMS\0" /* 17829 */
- "FLOAT_MODE\0" /* 17844 */
- "PRIV\0" /* 17855 */
- "DX10_CLAMP\0" /* 17860 */
- "DEBUG_MODE\0" /* 17871 */
- "IEEE_MODE\0" /* 17882 */
- "CU_GROUP_DISABLE\0" /* 17892 */
- "CACHE_CTL\0" /* 17909 */
- "CDBG_USER\0" /* 17919 */
- "FP16_OVFL\0" /* 17929 */
- "SCRATCH_EN\0" /* 17939 */
- "USER_SGPR\0" /* 17950 */
- "TRAP_PRESENT\0" /* 17960 */
- "WAVE_CNT_EN\0" /* 17973 */
- "EXTRA_LDS_SIZE\0" /* 17985, 17991 */
- "EXCP_EN_SI\0" /* 18000 */
- "EXCP_EN\0" /* 18011 */
- "LOAD_COLLISION_WAVEID\0" /* 18019 */
- "LOAD_INTRAWAVE_COLLISION\0" /* 18041 */
- "SKIP_USGPR0\0" /* 18066 */
- "USER_SGPR_MSB\0" /* 18078 */
- "VGPR_COMP_CNT\0" /* 18092, 18102 */
- "CU_GROUP_ENABLE\0" /* 18106 */
- "OC_LDS_EN\0" /* 18122 */
- "SO_BASE0_EN\0" /* 18132 */
- "SO_BASE1_EN\0" /* 18144 */
- "SO_BASE2_EN\0" /* 18156 */
- "SO_BASE3_EN\0" /* 18168 */
- "SO_EN\0" /* 18180 */
- "DISPATCH_DRAW_EN\0" /* 18186 */
- "PC_BASE_EN\0" /* 18203 */
- "GROUP_FIFO_DEPTH\0" /* 18214 */
- "GS_VGPR_COMP_CNT\0" /* 18231 */
- "ES_VGPR_COMP_CNT\0" /* 18248 */
- "LS_VGPR_COMP_CNT\0" /* 18265 */
- "TG_SIZE_EN\0" /* 18282 */
- "EXCP_EN_CIK_VI\0" /* 18293 */
- "COMPUTE_SHADER_EN\0" /* 18308 */
- "PARTIAL_TG_EN\0" /* 18326 */
- "FORCE_START_AT_000\0" /* 18340 */
- "ORDERED_APPEND_ENBL\0" /* 18359 */
- "ORDERED_APPEND_MODE\0" /* 18379 */
- "USE_THREAD_DIMENSIONS\0" /* 18399 */
- "ORDER_MODE\0" /* 18421 */
- "DISPATCH_CACHE_CNTL\0" /* 18432 */
- "SCALAR_L1_INV_VOL\0" /* 18452 */
- "VECTOR_L1_INV_VOL\0" /* 18470 */
- "DATA_ATC\0" /* 18488 */
- "RESTORE\0" /* 18497 */
- "NUM_THREAD_FULL\0" /* 18505 */
- "NUM_THREAD_PARTIAL\0" /* 18521 */
- "PIPELINESTAT_ENABLE\0" /* 18540 */
- "PERFCOUNT_ENABLE\0" /* 18560 */
- "INST_ATC\0" /* 18577 */
- "BULKY\0" /* 18586 */
- "TGID_X_EN\0" /* 18592 */
- "TGID_Y_EN\0" /* 18602 */
- "TGID_Z_EN\0" /* 18612 */
- "TIDIG_COMP_CNT\0" /* 18622 */
- "EXCP_EN_MSB\0" /* 18637 */
- "WAVES_PER_SH_SI\0" /* 18649 */
- "WAVES_PER_SH\0" /* 18665 */
- "TG_PER_CU\0" /* 18678 */
- "LOCK_THRESHOLD\0" /* 18688 */
- "SIMD_DEST_CNTL\0" /* 18703 */
- "FORCE_SIMD_DIST\0" /* 18718 */
- "CU_GROUP_COUNT\0" /* 18734 */
- "SH0_CU_EN\0" /* 18749 */
- "SH1_CU_EN\0" /* 18759 */
- "WAVES\0" /* 18769 */
- "WAVESIZE\0" /* 18775 */
- "SEND_SEID\0" /* 18784 */
- "RESERVED3\0" /* 18794 */
- "RESERVED4\0" /* 18804 */
- "WAVE_ID_BASE\0" /* 18814 */
- "PAYLOAD\0" /* 18827 */
- "IS_EVENT\0" /* 18835 */
- "IS_STATE\0" /* 18844 */
- "PERFCOUNTER_HI\0" /* 18853 */
- "PERF_SEL2\0" /* 18868 */
- "PERF_SEL3\0" /* 18878 */
- "CNTR_SEL2\0" /* 18888 */
- "CNTR_SEL3\0" /* 18898 */
- "CNTR_MODE3\0" /* 18908 */
- "CNTR_MODE2\0" /* 18919 */
- "PERF_SEL\0" /* 18930 */
- "PERF_SEL1\0" /* 18939 */
- "CNTR_MODE\0" /* 18949 */
- "CNTR_SEL0\0" /* 18959 */
- "CNTR_SEL1\0" /* 18969 */
- "SPM_MODE\0" /* 18979 */
- "CNTR_MODE1\0" /* 18988 */
- "CNTR_MODE0\0" /* 18999 */
- "DISABLE_AND_RESET\0" /* 19010 */
- "START_COUNTING\0" /* 19028 */
- "STOP_COUNTING\0" /* 19043 */
- "PERFMON_STATE\0" /* 19057 */
- "SPM_PERFMON_STATE\0" /* 19071 */
- "PERFMON_ENABLE_MODE\0" /* 19089 */
- "PERFMON_SAMPLE_ENABLE\0" /* 19109 */
- "DB_CLEAN_USER_DEFINED_MASK\0" /* 19131 */
- "CB_CLEAN_USER_DEFINED_MASK\0" /* 19158 */
- "VGT_BUSY_USER_DEFINED_MASK\0" /* 19185 */
- "TA_BUSY_USER_DEFINED_MASK\0" /* 19212 */
- "SX_BUSY_USER_DEFINED_MASK\0" /* 19238 */
- "SPI_BUSY_USER_DEFINED_MASK\0" /* 19264 */
- "SC_BUSY_USER_DEFINED_MASK\0" /* 19291 */
- "PA_BUSY_USER_DEFINED_MASK\0" /* 19317 */
- "GRBM_BUSY_USER_DEFINED_MASK\0" /* 19343 */
- "DB_BUSY_USER_DEFINED_MASK\0" /* 19371 */
- "CB_BUSY_USER_DEFINED_MASK\0" /* 19397 */
- "CP_BUSY_USER_DEFINED_MASK\0" /* 19423 */
- "IA_BUSY_USER_DEFINED_MASK\0" /* 19449 */
- "GDS_BUSY_USER_DEFINED_MASK\0" /* 19475 */
- "BCI_BUSY_USER_DEFINED_MASK\0" /* 19502 */
- "RLC_BUSY_USER_DEFINED_MASK\0" /* 19529 */
- "TC_BUSY_USER_DEFINED_MASK\0" /* 19556 */
- "WD_BUSY_USER_DEFINED_MASK\0" /* 19582 */
- "UTCL2_BUSY_USER_DEFINED_MASK\0" /* 19608 */
- "EA_BUSY_USER_DEFINED_MASK\0" /* 19637 */
- "RMI_BUSY_USER_DEFINED_MASK\0" /* 19663 */
- "PERF_MODE\0" /* 19690 */
- "PERF_MODE1\0" /* 19700 */
- "PERF_MODE3\0" /* 19711 */
- "PERF_MODE2\0" /* 19722 */
- "PERF_SEID_IGNORE_MASK\0" /* 19733 */
- "BIN0_MIN\0" /* 19755 */
- "BIN0_MAX\0" /* 19764 */
- "BIN1_MIN\0" /* 19773 */
- "BIN1_MAX\0" /* 19782 */
- "BIN2_MIN\0" /* 19791 */
- "BIN2_MAX\0" /* 19800 */
- "BIN3_MIN\0" /* 19809 */
- "BIN3_MAX\0" /* 19818 */
- "SQC_BANK_MASK\0" /* 19827 */
- "SQC_CLIENT_MASK\0" /* 19841 */
- "SIMD_MASK\0" /* 19857 */
- "PS_EN\0" /* 19867 */
- "VS_EN\0" /* 19873 */
- "GS_EN\0" /* 19879 */
- "ES_EN\0" /* 19885 */
- "HS_EN\0" /* 19891 */
- "LS_EN\0" /* 19897 */
- "CS_EN\0" /* 19903 */
- "CNTR_RATE\0" /* 19909 */
- "DISABLE_FLUSH\0" /* 19919 */
- "SH0_MASK\0" /* 19933 */
- "SH1_MASK\0" /* 19942 */
- "FORCE_EN\0" /* 19951 */
- "PERFCOUNTER_SELECT\0" /* 19960 */
- "PERFCOUNTER_SELECT1\0" /* 19979 */
- "PERFCOUNTER_SELECT2\0" /* 19999 */
- "PERFCOUNTER_SELECT3\0" /* 20019 */
- "OP_FILTER_ENABLE\0" /* 20039 */
- "OP_FILTER_SEL\0" /* 20056 */
- "FORMAT_FILTER_ENABLE\0" /* 20070 */
- "FORMAT_FILTER_SEL\0" /* 20091 */
- "CLEAR_FILTER_ENABLE\0" /* 20109 */
- "CLEAR_FILTER_SEL\0" /* 20129 */
- "MRT_FILTER_ENABLE\0" /* 20146 */
- "MRT_FILTER_SEL\0" /* 20164 */
- "NUM_SAMPLES_FILTER_ENABLE\0" /* 20179 */
- "NUM_SAMPLES_FILTER_SEL\0" /* 20205 */
- "NUM_FRAGMENTS_FILTER_ENABLE\0" /* 20228 */
- "NUM_FRAGMENTS_FILTER_SEL\0" /* 20256 */
- "DEPTH_CLEAR_ENABLE\0" /* 20281 */
- "STENCIL_CLEAR_ENABLE\0" /* 20300 */
- "DEPTH_COPY\0" /* 20321 */
- "STENCIL_COPY\0" /* 20332 */
- "RESUMMARIZE_ENABLE\0" /* 20345 */
- "STENCIL_COMPRESS_DISABLE\0" /* 20364 */
- "DEPTH_COMPRESS_DISABLE\0" /* 20389 */
- "COPY_CENTROID\0" /* 20412 */
- "COPY_SAMPLE\0" /* 20426 */
- "DECOMPRESS_ENABLE\0" /* 20438 */
- "ZPASS_INCREMENT_DISABLE\0" /* 20456 */
- "PERFECT_ZPASS_COUNTS\0" /* 20480 */
- "SAMPLE_RATE\0" /* 20501 */
- "ZPASS_ENABLE\0" /* 20513 */
- "ZFAIL_ENABLE\0" /* 20526 */
- "SFAIL_ENABLE\0" /* 20539 */
- "DBFAIL_ENABLE\0" /* 20552 */
- "SLICE_EVEN_ENABLE\0" /* 20566 */
- "SLICE_ODD_ENABLE\0" /* 20584 */
- "SLICE_START\0" /* 20601 */
- "SLICE_MAX\0" /* 20613 */
- "Z_READ_ONLY\0" /* 20623 */
- "STENCIL_READ_ONLY\0" /* 20635 */
- "MIPID\0" /* 20653 */
- "FORCE_OFF\0" /* 20659 */
- "FORCE_ENABLE\0" /* 20669 */
- "FORCE_DISABLE\0" /* 20682 */
- "FORCE_RESERVED\0" /* 20696 */
- "FORCE_HIZ_ENABLE\0" /* 20719, 20711 */
- "FORCE_HIS_ENABLE0\0" /* 20728, 20738 */
- "FORCE_HIS_ENABLE1\0" /* 20746, 20756 */
- "FORCE_SHADER_Z_ORDER\0" /* 20777, 20764 */
- "FAST_Z_DISABLE\0" /* 20785 */
- "FAST_STENCIL_DISABLE\0" /* 20800 */
- "NOOP_CULL_DISABLE\0" /* 20821 */
- "FORCE_COLOR_KILL\0" /* 20839 */
- "FORCE_Z_READ\0" /* 20856 */
- "FORCE_STENCIL_READ\0" /* 20869 */
- "FORCE_FULL_Z_RANGE\0" /* 20888 */
- "FORCE_QC_SMASK_CONFLICT\0" /* 20907 */
- "DISABLE_VIEWPORT_CLAMP\0" /* 20931 */
- "IGNORE_SC_ZRANGE\0" /* 20954 */
- "DISABLE_FULLY_COVERED\0" /* 20971 */
- "FORCE_SUMM_OFF\0" /* 20993 */
- "FORCE_SUMM_MINZ\0" /* 21008 */
- "FORCE_SUMM_MAXZ\0" /* 21024 */
- "FORCE_SUMM_BOTH\0" /* 21040 */
- "FORCE_Z_LIMIT_SUMM\0" /* 21056 */
- "MAX_TILES_IN_DTT\0" /* 21075 */
- "DISABLE_TILE_RATE_TILES\0" /* 21092 */
- "FORCE_Z_DIRTY\0" /* 21124, 21116 */
- "FORCE_STENCIL_DIRTY\0" /* 21130 */
- "FORCE_Z_VALID\0" /* 21150 */
- "FORCE_STENCIL_VALID\0" /* 21164 */
- "PRESERVE_COMPRESSION\0" /* 21184, 21193 */
- "PSLC_AUTO\0" /* 21205 */
- "PSLC_ON_HANG_ONLY\0" /* 21215 */
- "PSLC_ASAP\0" /* 21233 */
- "PSLC_COUNTDOWN\0" /* 21243 */
- "PARTIAL_SQUAD_LAUNCH_CONTROL\0" /* 21258 */
- "PARTIAL_SQUAD_LAUNCH_COUNTDOWN\0" /* 21287 */
- "DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION\0" /* 21318 */
- "DISABLE_SMEM_EXPCLEAR_OPTIMIZATION\0" /* 21354 */
- "DISABLE_COLOR_ON_VALIDATION\0" /* 21389 */
- "DECOMPRESS_Z_ON_FLUSH\0" /* 21417 */
- "DISABLE_REG_SNOOP\0" /* 21439 */
- "DEPTH_BOUNDS_HIER_DEPTH_DISABLE\0" /* 21457 */
- "SEPARATE_HIZS_FUNC_ENABLE\0" /* 21489 */
- "HIZ_ZFUNC\0" /* 21515, 21519 */
- "HIS_SFUNC_FF\0" /* 21525 */
- "HIS_SFUNC_BF\0" /* 21538 */
- "PRESERVE_ZRANGE\0" /* 21551 */
- "PRESERVE_SRESULTS\0" /* 21567 */
- "DISABLE_FAST_PASS\0" /* 21585 */
- "ALLOW_PARTIAL_RES_HIER_KILL\0" /* 21603 */
- "CLEAR\0" /* 21631 */
- "TL_X\0" /* 21637 */
- "TL_Y\0" /* 21642 */
- "BR_X\0" /* 21647 */
- "BR_Y\0" /* 21652 */
- "ADDR5_SWIZZLE_MASK\0" /* 21657 */
- "ARRAY_PRT_TILED_THIN1\0" /* 21676 */
- "ARRAY_PRT_2D_TILED_THIN1\0" /* 21698 */
- "X_ADDR_SURF_P16_32X32_8X16\0" /* 21723 */
- "X_ADDR_SURF_P16_32X32_16X16\0" /* 21750 */
- "Z_INVALID\0" /* 21778 */
- "Z_16\0" /* 21788 */
- "Z_24\0" /* 21793 */
- "Z_32_FLOAT\0" /* 21798 */
- "NUM_SAMPLES\0" /* 21809 */
- "TILE_MODE_INDEX\0" /* 21821 */
- "DECOMPRESS_ON_N_ZPLANES\0" /* 21837 */
- "ALLOW_EXPCLEAR\0" /* 21861 */
- "READ_SIZE\0" /* 21876 */
- "TILE_SURFACE_ENABLE\0" /* 21886 */
- "CLEAR_DISALLOWED\0" /* 21906 */
- "ZRANGE_PRECISION\0" /* 21923 */
- "STENCIL_INVALID\0" /* 21940 */
- "STENCIL_8\0" /* 21956 */
- "TILE_STENCIL_DISABLE\0" /* 21966 */
- "PITCH_TILE_MAX\0" /* 21993, 21987 */
- "HEIGHT_TILE_MAX\0" /* 22002 */
- "SLICE_TILE_MAX\0" /* 22018 */
- "DEST_BASE_HI_256B\0" /* 22033 */
- "WINDOW_X_OFFSET\0" /* 22058, 22051 */
- "WINDOW_Y_OFFSET\0" /* 22074, 22067 */
- "WINDOW_OFFSET_DISABLE\0" /* 22083 */
- "CLIP_RULE\0" /* 22105 */
- "ER_TRI\0" /* 22115 */
- "ER_RECT\0" /* 22122 */
- "ER_LINE_LR\0" /* 22130 */
- "ER_LINE_RL\0" /* 22141 */
- "ER_LINE_TB\0" /* 22152 */
- "ER_LINE_BT\0" /* 22163 */
- "HW_SCREEN_OFFSET_X\0" /* 22174 */
- "HW_SCREEN_OFFSET_Y\0" /* 22193 */
- "TARGET0_ENABLE\0" /* 22212 */
- "TARGET1_ENABLE\0" /* 22227 */
- "TARGET2_ENABLE\0" /* 22242 */
- "TARGET3_ENABLE\0" /* 22257 */
- "TARGET4_ENABLE\0" /* 22272 */
- "TARGET5_ENABLE\0" /* 22287 */
- "TARGET6_ENABLE\0" /* 22302 */
- "TARGET7_ENABLE\0" /* 22317 */
- "OUTPUT0_ENABLE\0" /* 22332 */
- "OUTPUT1_ENABLE\0" /* 22347 */
- "OUTPUT2_ENABLE\0" /* 22362 */
- "OUTPUT3_ENABLE\0" /* 22377 */
- "OUTPUT4_ENABLE\0" /* 22392 */
- "OUTPUT5_ENABLE\0" /* 22407 */
- "OUTPUT6_ENABLE\0" /* 22422 */
- "OUTPUT7_ENABLE\0" /* 22437 */
- "RASTER_CONFIG_RB_MAP_0\0" /* 22452 */
- "RASTER_CONFIG_RB_MAP_1\0" /* 22475 */
- "RASTER_CONFIG_RB_MAP_2\0" /* 22498 */
- "RASTER_CONFIG_RB_MAP_3\0" /* 22521 */
- "RB_MAP_PKR0\0" /* 22544 */
- "RB_MAP_PKR1\0" /* 22556 */
- "RASTER_CONFIG_RB_XSEL2_0\0" /* 22568 */
- "RASTER_CONFIG_RB_XSEL2_1\0" /* 22593 */
- "RASTER_CONFIG_RB_XSEL2_2\0" /* 22618 */
- "RASTER_CONFIG_RB_XSEL2_3\0" /* 22643 */
- "RB_XSEL2\0" /* 22668 */
- "RB_XSEL\0" /* 22677 */
- "RB_YSEL\0" /* 22685 */
- "RASTER_CONFIG_PKR_MAP_0\0" /* 22693 */
- "RASTER_CONFIG_PKR_MAP_1\0" /* 22717 */
- "RASTER_CONFIG_PKR_MAP_2\0" /* 22741 */
- "RASTER_CONFIG_PKR_MAP_3\0" /* 22765 */
- "PKR_MAP\0" /* 22789 */
- "RASTER_CONFIG_PKR_XSEL_0\0" /* 22797 */
- "RASTER_CONFIG_PKR_XSEL_1\0" /* 22822 */
- "RASTER_CONFIG_PKR_XSEL_2\0" /* 22847 */
- "RASTER_CONFIG_PKR_XSEL_3\0" /* 22872 */
- "PKR_XSEL\0" /* 22897 */
- "RASTER_CONFIG_PKR_YSEL_0\0" /* 22906 */
- "RASTER_CONFIG_PKR_YSEL_1\0" /* 22931 */
- "RASTER_CONFIG_PKR_YSEL_2\0" /* 22956 */
- "RASTER_CONFIG_PKR_YSEL_3\0" /* 22981 */
- "PKR_YSEL\0" /* 23006 */
- "RASTER_CONFIG_PKR_XSEL2_0\0" /* 23015 */
- "RASTER_CONFIG_PKR_XSEL2_1\0" /* 23041 */
- "RASTER_CONFIG_PKR_XSEL2_2\0" /* 23067 */
- "RASTER_CONFIG_PKR_XSEL2_3\0" /* 23093 */
- "PKR_XSEL2\0" /* 23119 */
- "RASTER_CONFIG_SC_MAP_0\0" /* 23129 */
- "RASTER_CONFIG_SC_MAP_1\0" /* 23152 */
- "RASTER_CONFIG_SC_MAP_2\0" /* 23175 */
- "RASTER_CONFIG_SC_MAP_3\0" /* 23198 */
- "SC_MAP\0" /* 23221 */
- "RASTER_CONFIG_SC_XSEL_8_WIDE_TILE\0" /* 23228 */
- "RASTER_CONFIG_SC_XSEL_16_WIDE_TILE\0" /* 23262 */
- "RASTER_CONFIG_SC_XSEL_32_WIDE_TILE\0" /* 23297 */
- "RASTER_CONFIG_SC_XSEL_64_WIDE_TILE\0" /* 23332 */
- "SC_XSEL\0" /* 23367 */
- "RASTER_CONFIG_SC_YSEL_8_WIDE_TILE\0" /* 23375 */
- "RASTER_CONFIG_SC_YSEL_16_WIDE_TILE\0" /* 23409 */
- "RASTER_CONFIG_SC_YSEL_32_WIDE_TILE\0" /* 23444 */
- "RASTER_CONFIG_SC_YSEL_64_WIDE_TILE\0" /* 23479 */
- "SC_YSEL\0" /* 23514 */
- "RASTER_CONFIG_SE_MAP_0\0" /* 23522 */
- "RASTER_CONFIG_SE_MAP_1\0" /* 23545 */
- "RASTER_CONFIG_SE_MAP_2\0" /* 23568 */
- "RASTER_CONFIG_SE_MAP_3\0" /* 23591 */
- "SE_MAP\0" /* 23614 */
- "RASTER_CONFIG_SE_XSEL_8_WIDE_TILE\0" /* 23621 */
- "RASTER_CONFIG_SE_XSEL_16_WIDE_TILE\0" /* 23655 */
- "RASTER_CONFIG_SE_XSEL_32_WIDE_TILE\0" /* 23690 */
- "RASTER_CONFIG_SE_XSEL_64_WIDE_TILE\0" /* 23725 */
- "SE_XSEL_GFX6\0" /* 23760 */
- "RASTER_CONFIG_SE_YSEL_8_WIDE_TILE\0" /* 23773 */
- "RASTER_CONFIG_SE_YSEL_16_WIDE_TILE\0" /* 23807 */
- "RASTER_CONFIG_SE_YSEL_32_WIDE_TILE\0" /* 23842 */
- "RASTER_CONFIG_SE_YSEL_64_WIDE_TILE\0" /* 23877 */
- "SE_YSEL_GFX6\0" /* 23912 */
- "SE_XSEL_GFX9\0" /* 23925 */
- "SE_YSEL_GFX9\0" /* 23938 */
- "RASTER_CONFIG_SE_PAIR_MAP_0\0" /* 23951 */
- "RASTER_CONFIG_SE_PAIR_MAP_1\0" /* 23979 */
- "RASTER_CONFIG_SE_PAIR_MAP_2\0" /* 24007 */
- "RASTER_CONFIG_SE_PAIR_MAP_3\0" /* 24035 */
- "SE_PAIR_MAP\0" /* 24063 */
- "RASTER_CONFIG_SE_PAIR_XSEL_8_WIDE_TILE\0" /* 24075 */
- "RASTER_CONFIG_SE_PAIR_XSEL_16_WIDE_TILE\0" /* 24114 */
- "RASTER_CONFIG_SE_PAIR_XSEL_32_WIDE_TILE\0" /* 24154 */
- "RASTER_CONFIG_SE_PAIR_XSEL_64_WIDE_TILE\0" /* 24194 */
- "SE_PAIR_XSEL_GFX6\0" /* 24234 */
- "RASTER_CONFIG_SE_PAIR_YSEL_8_WIDE_TILE\0" /* 24252 */
- "RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE\0" /* 24291 */
- "RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE\0" /* 24331 */
- "RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE\0" /* 24371 */
- "SE_PAIR_YSEL_GFX6\0" /* 24411 */
- "SE_PAIR_XSEL_GFX9\0" /* 24429 */
- "SE_PAIR_YSEL_GFX9\0" /* 24447 */
- "OVERWRITE_COMBINER_DISABLE\0" /* 24465 */
- "OVERWRITE_COMBINER_MRT_SHARING_DISABLE\0" /* 24492 */
- "OVERWRITE_COMBINER_WATERMARK\0" /* 24531 */
- "STENCIL_KEEP\0" /* 24560 */
- "STENCIL_ZERO\0" /* 24573 */
- "STENCIL_ONES\0" /* 24586 */
- "STENCIL_REPLACE_TEST\0" /* 24599 */
- "STENCIL_REPLACE_OP\0" /* 24620 */
- "STENCIL_ADD_CLAMP\0" /* 24639 */
- "STENCIL_SUB_CLAMP\0" /* 24657 */
- "STENCIL_INVERT\0" /* 24675 */
- "STENCIL_ADD_WRAP\0" /* 24690 */
- "STENCIL_SUB_WRAP\0" /* 24707 */
- "STENCIL_AND\0" /* 24724 */
- "STENCIL_OR\0" /* 24736 */
- "STENCIL_XOR\0" /* 24747 */
- "STENCIL_NAND\0" /* 24759 */
- "STENCIL_NOR\0" /* 24772 */
- "STENCIL_XNOR\0" /* 24784 */
- "STENCILFAIL\0" /* 24797 */
- "STENCILZPASS\0" /* 24809 */
- "STENCILZFAIL\0" /* 24822 */
- "STENCILFAIL_BF\0" /* 24835 */
- "STENCILZPASS_BF\0" /* 24850 */
- "STENCILZFAIL_BF\0" /* 24866 */
- "STENCILTESTVAL\0" /* 24882 */
- "STENCILMASK\0" /* 24897 */
- "STENCILWRITEMASK\0" /* 24909 */
- "STENCILOPVAL\0" /* 24926 */
- "STENCILTESTVAL_BF\0" /* 24939 */
- "STENCILMASK_BF\0" /* 24957 */
- "STENCILWRITEMASK_BF\0" /* 24972 */
- "STENCILOPVAL_BF\0" /* 24992 */
- "X_0_0F\0" /* 25008 */
- "DEFAULT_VAL\0" /* 25015 */
- "FLAT_SHADE\0" /* 25027 */
- "CYL_WRAP\0" /* 25038 */
- "PT_SPRITE_TEX\0" /* 25047 */
- "DUP\0" /* 25061 */
- "FP16_INTERP_MODE\0" /* 25065 */
- "USE_DEFAULT_ATTR1\0" /* 25082 */
- "DEFAULT_VAL_ATTR1\0" /* 25100 */
- "PT_SPRITE_TEX_ATTR1\0" /* 25118 */
- "ATTR0_VALID\0" /* 25138 */
- "ATTR1_VALID\0" /* 25150 */
- "VS_EXPORT_COUNT\0" /* 25162 */
- "VS_HALF_PACK\0" /* 25178 */
- "VS_EXPORTS_FOG\0" /* 25191 */
- "VS_OUT_FOG_VEC_ADDR\0" /* 25206 */
- "PERSP_SAMPLE_ENA\0" /* 25226 */
- "PERSP_CENTER_ENA\0" /* 25243 */
- "PERSP_CENTROID_ENA\0" /* 25260 */
- "PERSP_PULL_MODEL_ENA\0" /* 25279 */
- "LINEAR_SAMPLE_ENA\0" /* 25300 */
- "LINEAR_CENTER_ENA\0" /* 25318 */
- "LINEAR_CENTROID_ENA\0" /* 25336 */
- "LINE_STIPPLE_TEX_ENA\0" /* 25356 */
- "POS_X_FLOAT_ENA\0" /* 25377 */
- "POS_Y_FLOAT_ENA\0" /* 25393 */
- "POS_Z_FLOAT_ENA\0" /* 25409 */
- "POS_W_FLOAT_ENA\0" /* 25425 */
- "FRONT_FACE_ENA\0" /* 25441 */
- "ANCILLARY_ENA\0" /* 25456 */
- "SAMPLE_COVERAGE_ENA\0" /* 25470 */
- "POS_FIXED_PT_ENA\0" /* 25490 */
- "FLAT_SHADE_ENA\0" /* 25507 */
- "PNT_SPRITE_ENA\0" /* 25522 */
- "SPI_PNT_SPRITE_SEL_0\0" /* 25537 */
- "SPI_PNT_SPRITE_SEL_1\0" /* 25558 */
- "SPI_PNT_SPRITE_SEL_S\0" /* 25579 */
- "SPI_PNT_SPRITE_SEL_T\0" /* 25600 */
- "SPI_PNT_SPRITE_SEL_NONE\0" /* 25621 */
- "PNT_SPRITE_OVRD_X\0" /* 25645 */
- "PNT_SPRITE_OVRD_Y\0" /* 25663 */
- "PNT_SPRITE_OVRD_Z\0" /* 25681 */
- "PNT_SPRITE_OVRD_W\0" /* 25699 */
- "PNT_SPRITE_TOP_1\0" /* 25717 */
- "NUM_INTERP\0" /* 25734 */
- "PARAM_GEN\0" /* 25745 */
- "FOG_ADDR\0" /* 25755 */
- "BC_OPTIMIZE_DISABLE\0" /* 25764 */
- "PASS_FOG_THROUGH_PS\0" /* 25784 */
- "OFFCHIP_PARAM_EN\0" /* 25804 */
- "LATE_PC_DEALLOC\0" /* 25821 */
- "PERSP_CENTER_CNTL\0" /* 25837 */
- "PERSP_CENTROID_CNTL\0" /* 25855 */
- "LINEAR_CENTER_CNTL\0" /* 25875 */
- "LINEAR_CENTROID_CNTL\0" /* 25894 */
- "X_CALCULATE_PER_PIXEL_FLOATING_POINT_POSITION_AT\0" /* 25915 */
- "POS_FLOAT_LOCATION\0" /* 25964 */
- "POS_FLOAT_ULC\0" /* 25983 */
- "FRONT_FACE_ALL_BITS\0" /* 25997 */
- "NUM_PS_WAVES\0" /* 26017 */
- "NUM_VS_WAVES\0" /* 26030 */
- "NUM_GS_WAVES\0" /* 26043 */
- "NUM_ES_WAVES\0" /* 26056 */
- "NUM_HS_WAVES\0" /* 26069 */
- "NUM_LS_WAVES\0" /* 26082 */
- "SPI_SHADER_NONE\0" /* 26095 */
- "SPI_SHADER_1COMP\0" /* 26111 */
- "SPI_SHADER_2COMP\0" /* 26128 */
- "SPI_SHADER_4COMPRESS\0" /* 26145 */
- "SPI_SHADER_4COMP\0" /* 26166 */
- "POS0_EXPORT_FORMAT\0" /* 26183 */
- "POS1_EXPORT_FORMAT\0" /* 26202 */
- "POS2_EXPORT_FORMAT\0" /* 26221 */
- "POS3_EXPORT_FORMAT\0" /* 26240 */
- "SPI_SHADER_ZERO\0" /* 26259 */
- "SPI_SHADER_32_R\0" /* 26275 */
- "SPI_SHADER_32_GR\0" /* 26291 */
- "SPI_SHADER_32_AR\0" /* 26308 */
- "SPI_SHADER_FP16_ABGR\0" /* 26325 */
- "SPI_SHADER_UNORM16_ABGR\0" /* 26346 */
- "SPI_SHADER_SNORM16_ABGR\0" /* 26370 */
- "SPI_SHADER_UINT16_ABGR\0" /* 26394 */
- "SPI_SHADER_SINT16_ABGR\0" /* 26417 */
- "SPI_SHADER_32_ABGR\0" /* 26440 */
- "Z_EXPORT_FORMAT\0" /* 26459 */
- "COL0_EXPORT_FORMAT\0" /* 26475 */
- "COL1_EXPORT_FORMAT\0" /* 26494 */
- "COL2_EXPORT_FORMAT\0" /* 26513 */
- "COL3_EXPORT_FORMAT\0" /* 26532 */
- "COL4_EXPORT_FORMAT\0" /* 26551 */
- "COL5_EXPORT_FORMAT\0" /* 26570 */
- "COL6_EXPORT_FORMAT\0" /* 26589 */
- "COL7_EXPORT_FORMAT\0" /* 26608 */
- "SX_RT_EXPORT_NO_CONVERSION\0" /* 26627 */
- "SX_RT_EXPORT_32_R\0" /* 26654 */
- "SX_RT_EXPORT_32_A\0" /* 26672 */
- "SX_RT_EXPORT_10_11_11\0" /* 26690 */
- "SX_RT_EXPORT_2_10_10_10\0" /* 26712 */
- "SX_RT_EXPORT_8_8_8_8\0" /* 26736 */
- "SX_RT_EXPORT_5_6_5\0" /* 26757 */
- "SX_RT_EXPORT_1_5_5_5\0" /* 26776 */
- "SX_RT_EXPORT_4_4_4_4\0" /* 26797 */
- "SX_RT_EXPORT_16_16_GR\0" /* 26818 */
- "SX_RT_EXPORT_16_16_AR\0" /* 26840 */
- "MRT0\0" /* 26862 */
- "MRT1\0" /* 26867 */
- "MRT2\0" /* 26872 */
- "MRT3\0" /* 26877 */
- "MRT4\0" /* 26882 */
- "MRT5\0" /* 26887 */
- "MRT6\0" /* 26892 */
- "MRT7\0" /* 26897 */
- "EXACT\0" /* 26902 */
- "11BIT_FORMAT\0" /* 26908 */
- "10BIT_FORMAT\0" /* 26921 */
- "8BIT_FORMAT\0" /* 26934 */
- "6BIT_FORMAT\0" /* 26946 */
- "5BIT_FORMAT\0" /* 26958 */
- "4BIT_FORMAT\0" /* 26970 */
- "MRT0_EPSILON\0" /* 26982 */
- "MRT1_EPSILON\0" /* 26995 */
- "MRT2_EPSILON\0" /* 27008 */
- "MRT3_EPSILON\0" /* 27021 */
- "MRT4_EPSILON\0" /* 27034 */
- "MRT5_EPSILON\0" /* 27047 */
- "MRT6_EPSILON\0" /* 27060 */
- "MRT7_EPSILON\0" /* 27073 */
- "MRT0_COLOR_OPT_DISABLE\0" /* 27086 */
- "MRT0_ALPHA_OPT_DISABLE\0" /* 27109 */
- "MRT1_COLOR_OPT_DISABLE\0" /* 27132 */
- "MRT1_ALPHA_OPT_DISABLE\0" /* 27155 */
- "MRT2_COLOR_OPT_DISABLE\0" /* 27178 */
- "MRT2_ALPHA_OPT_DISABLE\0" /* 27201 */
- "MRT3_COLOR_OPT_DISABLE\0" /* 27224 */
- "MRT3_ALPHA_OPT_DISABLE\0" /* 27247 */
- "MRT4_COLOR_OPT_DISABLE\0" /* 27270 */
- "MRT4_ALPHA_OPT_DISABLE\0" /* 27293 */
- "MRT5_COLOR_OPT_DISABLE\0" /* 27316 */
- "MRT5_ALPHA_OPT_DISABLE\0" /* 27339 */
- "MRT6_COLOR_OPT_DISABLE\0" /* 27362 */
- "MRT6_ALPHA_OPT_DISABLE\0" /* 27385 */
- "MRT7_COLOR_OPT_DISABLE\0" /* 27408 */
- "MRT7_ALPHA_OPT_DISABLE\0" /* 27431 */
- "PIXEN_ZERO_OPT_DISABLE\0" /* 27454 */
- "BLEND_OPT_PRESERVE_NONE_IGNORE_ALL\0" /* 27477 */
- "BLEND_OPT_PRESERVE_ALL_IGNORE_NONE\0" /* 27512 */
- "BLEND_OPT_PRESERVE_C1_IGNORE_C0\0" /* 27547 */
- "BLEND_OPT_PRESERVE_C0_IGNORE_C1\0" /* 27579 */
- "BLEND_OPT_PRESERVE_A1_IGNORE_A0\0" /* 27611 */
- "BLEND_OPT_PRESERVE_A0_IGNORE_A1\0" /* 27643 */
- "BLEND_OPT_PRESERVE_NONE_IGNORE_A0\0" /* 27675 */
- "BLEND_OPT_PRESERVE_NONE_IGNORE_NONE\0" /* 27709 */
- "COLOR_SRC_OPT\0" /* 27745 */
- "COLOR_DST_OPT\0" /* 27759 */
- "OPT_COMB_NONE\0" /* 27773 */
- "OPT_COMB_ADD\0" /* 27787 */
- "OPT_COMB_SUBTRACT\0" /* 27800 */
- "OPT_COMB_MIN\0" /* 27818 */
- "OPT_COMB_MAX\0" /* 27831 */
- "OPT_COMB_REVSUBTRACT\0" /* 27844 */
- "OPT_COMB_BLEND_DISABLED\0" /* 27865 */
- "OPT_COMB_SAFE_ADD\0" /* 27889 */
- "COLOR_COMB_FCN\0" /* 27907 */
- "ALPHA_SRC_OPT\0" /* 27922 */
- "ALPHA_DST_OPT\0" /* 27936 */
- "ALPHA_COMB_FCN\0" /* 27950 */
- "BLEND_ZERO\0" /* 27965 */
- "BLEND_ONE\0" /* 27976 */
- "BLEND_SRC_COLOR\0" /* 27986 */
- "BLEND_ONE_MINUS_SRC_COLOR\0" /* 28002 */
- "BLEND_SRC_ALPHA\0" /* 28028 */
- "BLEND_ONE_MINUS_SRC_ALPHA\0" /* 28044 */
- "BLEND_DST_ALPHA\0" /* 28070 */
- "BLEND_ONE_MINUS_DST_ALPHA\0" /* 28086 */
- "BLEND_DST_COLOR\0" /* 28112 */
- "BLEND_ONE_MINUS_DST_COLOR\0" /* 28128 */
- "BLEND_SRC_ALPHA_SATURATE\0" /* 28154 */
- "BLEND_CONSTANT_COLOR\0" /* 28179 */
- "BLEND_ONE_MINUS_CONSTANT_COLOR\0" /* 28200 */
- "BLEND_SRC1_COLOR\0" /* 28231 */
- "BLEND_INV_SRC1_COLOR\0" /* 28248 */
- "BLEND_SRC1_ALPHA\0" /* 28269 */
- "BLEND_INV_SRC1_ALPHA\0" /* 28286 */
- "BLEND_CONSTANT_ALPHA\0" /* 28307 */
- "BLEND_ONE_MINUS_CONSTANT_ALPHA\0" /* 28328 */
- "COLOR_SRCBLEND\0" /* 28359 */
- "COMB_DST_PLUS_SRC\0" /* 28374 */
- "COMB_SRC_MINUS_DST\0" /* 28392 */
- "COMB_MIN_DST_SRC\0" /* 28411 */
- "COMB_MAX_DST_SRC\0" /* 28428 */
- "COMB_DST_MINUS_SRC\0" /* 28445 */
- "COLOR_DESTBLEND\0" /* 28464 */
- "ALPHA_SRCBLEND\0" /* 28480 */
- "ALPHA_DESTBLEND\0" /* 28495 */
- "SEPARATE_ALPHA_BLEND\0" /* 28511 */
- "DISABLE_ROP3\0" /* 28540, 28532 */
- "SRC_STATE_ID\0" /* 28545 */
- "BASE_ADDR_GFX6\0" /* 28558 */
- "BASE_ADDR_GFX9\0" /* 28573 */
- "DI_SRC_SEL_DMA\0" /* 28588 */
- "DI_SRC_SEL_IMMEDIATE\0" /* 28603 */
- "DI_SRC_SEL_AUTO_INDEX\0" /* 28624 */
- "DI_SRC_SEL_RESERVED\0" /* 28646 */
- "SOURCE_SELECT\0" /* 28666 */
- "DI_MAJOR_MODE_0\0" /* 28680 */
- "DI_MAJOR_MODE_1\0" /* 28696 */
- "MAJOR_MODE\0" /* 28712 */
- "NOT_EOP\0" /* 28723 */
- "USE_OPAQUE\0" /* 28731 */
- "SPRITE_EN_R6XX\0" /* 28742 */
- "UNROLLED_INST\0" /* 28757 */
- "GRBM_SKEW_NO_DEC\0" /* 28771 */
- "REG_RT_INDEX\0" /* 28788 */
- "ADDRESS_LOW\0" /* 28801 */
- "STENCIL_ENABLE\0" /* 28813 */
- "Z_WRITE_ENABLE\0" /* 28828 */
- "DEPTH_BOUNDS_ENABLE\0" /* 28843 */
- "FRAG_NEVER\0" /* 28863 */
- "FRAG_LESS\0" /* 28874 */
- "FRAG_EQUAL\0" /* 28884 */
- "FRAG_LEQUAL\0" /* 28895 */
- "FRAG_GREATER\0" /* 28907 */
- "FRAG_NOTEQUAL\0" /* 28920 */
- "FRAG_GEQUAL\0" /* 28934 */
- "FRAG_ALWAYS\0" /* 28946 */
- "BACKFACE_ENABLE\0" /* 28958 */
- "REF_NEVER\0" /* 28974 */
- "REF_LESS\0" /* 28984 */
- "REF_EQUAL\0" /* 28993 */
- "REF_LEQUAL\0" /* 29003 */
- "REF_GREATER\0" /* 29014 */
- "REF_NOTEQUAL\0" /* 29026 */
- "REF_GEQUAL\0" /* 29039 */
- "REF_ALWAYS\0" /* 29050 */
- "STENCILFUNC\0" /* 29061 */
- "STENCILFUNC_BF\0" /* 29073 */
- "ENABLE_COLOR_WRITES_ON_DEPTH_FAIL\0" /* 29088 */
- "DISABLE_COLOR_WRITES_ON_DEPTH_PASS\0" /* 29122 */
- "MAX_ANCHOR_SAMPLES\0" /* 29157 */
- "PS_ITER_SAMPLES\0" /* 29176 */
- "MASK_EXPORT_NUM_SAMPLES\0" /* 29192 */
- "ALPHA_TO_MASK_NUM_SAMPLES\0" /* 29216 */
- "HIGH_QUALITY_INTERSECTIONS\0" /* 29242 */
- "INCOHERENT_EQAA_READS\0" /* 29269 */
- "INTERPOLATE_COMP_Z\0" /* 29291 */
- "INTERPOLATE_SRC_Z\0" /* 29310 */
- "STATIC_ANCHOR_ASSOCIATIONS\0" /* 29328 */
- "ALPHA_TO_MASK_EQAA_DISABLE\0" /* 29355 */
- "OVERRASTERIZATION_AMOUNT\0" /* 29382 */
- "ENABLE_POSTZ_OVERRASTERIZATION\0" /* 29407 */
- "DISABLE_DUAL_QUAD\0" /* 29438 */
- "DEGAMMA_ENABLE\0" /* 29456 */
- "CB_DISABLE\0" /* 29471 */
- "CB_NORMAL\0" /* 29482 */
- "CB_ELIMINATE_FAST_CLEAR\0" /* 29505, 29492 */
- "CB_RESOLVE\0" /* 29516 */
- "CB_FMASK_DECOMPRESS\0" /* 29527 */
- "CB_DCC_DECOMPRESS\0" /* 29547 */
- "X_0X00\0" /* 29565 */
- "X_0X05\0" /* 29572 */
- "X_0X0A\0" /* 29579 */
- "X_0X0F\0" /* 29586 */
- "X_0X11\0" /* 29593 */
- "X_0X22\0" /* 29600 */
- "X_0X33\0" /* 29607 */
- "X_0X44\0" /* 29614 */
- "X_0X50\0" /* 29621 */
- "X_0X55\0" /* 29628 */
- "X_0X5A\0" /* 29635 */
- "X_0X5F\0" /* 29642 */
- "X_0X66\0" /* 29649 */
- "X_0X77\0" /* 29656 */
- "X_0X88\0" /* 29663 */
- "X_0X99\0" /* 29670 */
- "X_0XA0\0" /* 29677 */
- "X_0XA5\0" /* 29684 */
- "X_0XAA\0" /* 29691 */
- "X_0XAF\0" /* 29698 */
- "X_0XBB\0" /* 29705 */
- "X_0XCC\0" /* 29712 */
- "X_0XDD\0" /* 29719 */
- "X_0XEE\0" /* 29726 */
- "X_0XF0\0" /* 29733 */
- "X_0XF5\0" /* 29740 */
- "X_0XFA\0" /* 29747 */
- "X_0XFF\0" /* 29754 */
- "Z_EXPORT_ENABLE\0" /* 29761 */
- "STENCIL_TEST_VAL_EXPORT_ENABLE\0" /* 29777 */
- "STENCIL_OP_VAL_EXPORT_ENABLE\0" /* 29808 */
- "LATE_Z\0" /* 29837 */
- "EARLY_Z_THEN_LATE_Z\0" /* 29844 */
- "RE_Z\0" /* 29864 */
- "EARLY_Z_THEN_RE_Z\0" /* 29869 */
- "KILL_ENABLE\0" /* 29887 */
- "COVERAGE_TO_MASK_ENABLE\0" /* 29899 */
- "MASK_EXPORT_ENABLE\0" /* 29923 */
- "EXEC_ON_HIER_FAIL\0" /* 29942 */
- "EXEC_ON_NOOP\0" /* 29960 */
- "ALPHA_TO_MASK_DISABLE\0" /* 29973 */
- "DEPTH_BEFORE_SHADER\0" /* 29995 */
- "EXPORT_ANY_Z\0" /* 30015 */
- "EXPORT_LESS_THAN_Z\0" /* 30028 */
- "EXPORT_GREATER_THAN_Z\0" /* 30047 */
- "EXPORT_RESERVED\0" /* 30069 */
- "CONSERVATIVE_Z_EXPORT\0" /* 30085 */
- "DUAL_QUAD_DISABLE\0" /* 30107 */
- "PRIMITIVE_ORDERED_PIXEL_SHADER\0" /* 30125 */
- "EXEC_IF_OVERLAPPED\0" /* 30156 */
- "POPS_OVERLAP_NUM_SAMPLES\0" /* 30175 */
- "UCP_ENA_0\0" /* 30200 */
- "UCP_ENA_1\0" /* 30210 */
- "UCP_ENA_2\0" /* 30220 */
- "UCP_ENA_3\0" /* 30230 */
- "UCP_ENA_4\0" /* 30240 */
- "UCP_ENA_5\0" /* 30250 */
- "PS_UCP_Y_SCALE_NEG\0" /* 30260 */
- "PS_UCP_MODE\0" /* 30279 */
- "CLIP_DISABLE\0" /* 30291 */
- "UCP_CULL_ONLY_ENA\0" /* 30304 */
- "BOUNDARY_EDGE_FLAG_ENA\0" /* 30322 */
- "DX_CLIP_SPACE_DEF\0" /* 30345 */
- "DIS_CLIP_ERR_DETECT\0" /* 30363 */
- "VTX_KILL_OR\0" /* 30383 */
- "DX_RASTERIZATION_KILL\0" /* 30395 */
- "DX_LINEAR_ATTR_CLIP_ENA\0" /* 30417 */
- "VTE_VPORT_PROVOKE_DISABLE\0" /* 30441 */
- "ZCLIP_NEAR_DISABLE\0" /* 30467 */
- "ZCLIP_FAR_DISABLE\0" /* 30486 */
- "CULL_FRONT\0" /* 30504 */
- "CULL_BACK\0" /* 30515 */
- "FACE\0" /* 30525 */
- "X_DISABLE_POLY_MODE\0" /* 30530, 30540 */
- "X_DUAL_MODE\0" /* 30550 */
- "X_DRAW_POINTS\0" /* 30562 */
- "X_DRAW_LINES\0" /* 30576 */
- "X_DRAW_TRIANGLES\0" /* 30589 */
- "POLYMODE_FRONT_PTYPE\0" /* 30606 */
- "POLYMODE_BACK_PTYPE\0" /* 30627 */
- "POLY_OFFSET_FRONT_ENABLE\0" /* 30647 */
- "POLY_OFFSET_BACK_ENABLE\0" /* 30672 */
- "POLY_OFFSET_PARA_ENABLE\0" /* 30696 */
- "VTX_WINDOW_OFFSET_ENABLE\0" /* 30720 */
- "PROVOKING_VTX_LAST\0" /* 30745 */
- "PERSP_CORR_DIS\0" /* 30764 */
- "MULTI_PRIM_IB_ENA\0" /* 30779 */
- "RIGHT_TRIANGLE_ALTERNATE_GRADIENT_REF\0" /* 30797 */
- "NEW_QUAD_DECOMPOSITION\0" /* 30835 */
- "VPORT_X_SCALE_ENA\0" /* 30858 */
- "VPORT_X_OFFSET_ENA\0" /* 30876 */
- "VPORT_Y_SCALE_ENA\0" /* 30895 */
- "VPORT_Y_OFFSET_ENA\0" /* 30913 */
- "VPORT_Z_SCALE_ENA\0" /* 30932 */
- "VPORT_Z_OFFSET_ENA\0" /* 30950 */
- "VTX_XY_FMT\0" /* 30969 */
- "VTX_Z_FMT\0" /* 30980 */
- "VTX_W0_FMT\0" /* 30990 */
- "PERFCOUNTER_REF\0" /* 31001 */
- "CLIP_DIST_ENA_0\0" /* 31017 */
- "CLIP_DIST_ENA_1\0" /* 31033 */
- "CLIP_DIST_ENA_2\0" /* 31049 */
- "CLIP_DIST_ENA_3\0" /* 31065 */
- "CLIP_DIST_ENA_4\0" /* 31081 */
- "CLIP_DIST_ENA_5\0" /* 31097 */
- "CLIP_DIST_ENA_6\0" /* 31113 */
- "CLIP_DIST_ENA_7\0" /* 31129 */
- "CULL_DIST_ENA_0\0" /* 31145 */
- "CULL_DIST_ENA_1\0" /* 31161 */
- "CULL_DIST_ENA_2\0" /* 31177 */
- "CULL_DIST_ENA_3\0" /* 31193 */
- "CULL_DIST_ENA_4\0" /* 31209 */
- "CULL_DIST_ENA_5\0" /* 31225 */
- "CULL_DIST_ENA_6\0" /* 31241 */
- "CULL_DIST_ENA_7\0" /* 31257 */
- "USE_VTX_POINT_SIZE\0" /* 31273 */
- "USE_VTX_EDGE_FLAG\0" /* 31292 */
- "USE_VTX_RENDER_TARGET_INDX\0" /* 31310 */
- "USE_VTX_VIEWPORT_INDX\0" /* 31337 */
- "USE_VTX_KILL_FLAG\0" /* 31359 */
- "VS_OUT_MISC_VEC_ENA\0" /* 31377 */
- "VS_OUT_CCDIST0_VEC_ENA\0" /* 31397 */
- "VS_OUT_CCDIST1_VEC_ENA\0" /* 31420 */
- "VS_OUT_MISC_SIDE_BUS_ENA\0" /* 31443 */
- "USE_VTX_GS_CUT_FLAG\0" /* 31468 */
- "USE_VTX_LINE_WIDTH\0" /* 31488 */
- "USE_VTX_SHD_OBJPRIM_ID\0" /* 31507 */
- "VTE_XY_INF_DISCARD\0" /* 31530 */
- "VTE_Z_INF_DISCARD\0" /* 31549 */
- "VTE_W_INF_DISCARD\0" /* 31567 */
- "VTE_0XNANINF_IS_0\0" /* 31585 */
- "VTE_XY_NAN_RETAIN\0" /* 31603 */
- "VTE_Z_NAN_RETAIN\0" /* 31621 */
- "VTE_W_NAN_RETAIN\0" /* 31638 */
- "VTE_W_RECIP_NAN_IS_0\0" /* 31655 */
- "VS_XY_NAN_TO_INF\0" /* 31676 */
- "VS_XY_INF_RETAIN\0" /* 31693 */
- "VS_Z_NAN_TO_INF\0" /* 31710 */
- "VS_Z_INF_RETAIN\0" /* 31726 */
- "VS_W_NAN_TO_INF\0" /* 31742 */
- "VS_W_INF_RETAIN\0" /* 31758 */
- "VS_CLIP_DIST_INF_DISCARD\0" /* 31774 */
- "VTE_NO_OUTPUT_NEG_0\0" /* 31799 */
- "LINE_STIPPLE_RESET\0" /* 31819 */
- "EXPAND_FULL_LENGTH\0" /* 31838 */
- "FRACTIONAL_ACCUM\0" /* 31857 */
- "DIAMOND_ADJUST\0" /* 31874 */
- "TRIANGLE_FILTER_DISABLE\0" /* 31889 */
- "LINE_FILTER_DISABLE\0" /* 31913 */
- "POINT_FILTER_DISABLE\0" /* 31933 */
- "RECTANGLE_FILTER_DISABLE\0" /* 31954 */
- "TRIANGLE_EXPAND_ENA\0" /* 31979 */
- "LINE_EXPAND_ENA\0" /* 31999 */
- "POINT_EXPAND_ENA\0" /* 32015 */
- "RECTANGLE_EXPAND_ENA\0" /* 32032 */
- "PRIM_EXPAND_CONSTANT\0" /* 32053 */
- "XMAX_RIGHT_EXCLUSION\0" /* 32074 */
- "YMAX_BOTTOM_EXCLUSION\0" /* 32095 */
- "SMALL_PRIM_FILTER_ENABLE\0" /* 32117 */
- "SRBSL_ENABLE\0" /* 32142 */
- "MIN_SIZE\0" /* 32155 */
- "MAX_SIZE\0" /* 32164 */
- "LINE_PATTERN\0" /* 32173 */
- "REPEAT_COUNT\0" /* 32186 */
- "PATTERN_BIT_ORDER\0" /* 32199 */
- "AUTO_RESET_CNTL\0" /* 32217 */
- "VGT_OUTPATH_VTX_REUSE\0" /* 32233 */
- "VGT_OUTPATH_TESS_EN\0" /* 32255 */
- "VGT_OUTPATH_PASSTHRU\0" /* 32275 */
- "VGT_OUTPATH_GS_BLOCK\0" /* 32296 */
- "VGT_OUTPATH_HS_BLOCK\0" /* 32317 */
- "PATH_SELECT\0" /* 32338 */
- "TESS_MODE\0" /* 32350 */
- "REUSE_DEPTH\0" /* 32360 */
- "VGT_GRP_3D_POINT\0" /* 32372 */
- "VGT_GRP_3D_LINE\0" /* 32389 */
- "VGT_GRP_3D_TRI\0" /* 32405 */
- "VGT_GRP_3D_RECT\0" /* 32420 */
- "VGT_GRP_3D_QUAD\0" /* 32436 */
- "VGT_GRP_2D_COPY_RECT_V0\0" /* 32452 */
- "VGT_GRP_2D_COPY_RECT_V1\0" /* 32476 */
- "VGT_GRP_2D_COPY_RECT_V2\0" /* 32500 */
- "VGT_GRP_2D_COPY_RECT_V3\0" /* 32524 */
- "VGT_GRP_2D_FILL_RECT\0" /* 32548 */
- "VGT_GRP_2D_LINE\0" /* 32569 */
- "VGT_GRP_2D_TRI\0" /* 32585 */
- "VGT_GRP_PRIM_INDEX_LINE\0" /* 32600 */
- "VGT_GRP_PRIM_INDEX_TRI\0" /* 32624 */
- "VGT_GRP_PRIM_INDEX_QUAD\0" /* 32647 */
- "VGT_GRP_3D_LINE_ADJ\0" /* 32671 */
- "VGT_GRP_3D_TRI_ADJ\0" /* 32691 */
- "VGT_GRP_3D_PATCH\0" /* 32710 */
- "RETAIN_ORDER\0" /* 32727 */
- "RETAIN_QUADS\0" /* 32740 */
- "VGT_GRP_LIST\0" /* 32753 */
- "VGT_GRP_STRIP\0" /* 32766 */
- "VGT_GRP_FAN\0" /* 32780 */
- "VGT_GRP_LOOP\0" /* 32792 */
- "VGT_GRP_POLYGON\0" /* 32805 */
- "PRIM_ORDER\0" /* 32821 */
- "FIRST_DECR\0" /* 32832, 32838 */
- "COMP_X_EN\0" /* 32843 */
- "COMP_Y_EN\0" /* 32853 */
- "COMP_Z_EN\0" /* 32863 */
- "COMP_W_EN\0" /* 32873 */
- "SHIFT\0" /* 32883 */
- "VGT_GRP_INDEX_16\0" /* 32889 */
- "VGT_GRP_INDEX_32\0" /* 32906 */
- "VGT_GRP_UINT_16\0" /* 32923 */
- "VGT_GRP_UINT_32\0" /* 32939 */
- "VGT_GRP_SINT_16\0" /* 32955 */
- "VGT_GRP_SINT_32\0" /* 32971 */
- "VGT_GRP_FLOAT_32\0" /* 32987 */
- "VGT_GRP_AUTO_PRIM\0" /* 33004 */
- "VGT_GRP_FIX_1_23_TO_FLOAT\0" /* 33022 */
- "X_CONV\0" /* 33048 */
- "Y_CONV\0" /* 33055 */
- "Z_CONV\0" /* 33062 */
- "Z_OFFSET\0" /* 33069 */
- "W_CONV\0" /* 33078 */
- "W_OFFSET\0" /* 33085 */
- "GS_OFF\0" /* 33094 */
- "GS_SCENARIO_A\0" /* 33101 */
- "GS_SCENARIO_B\0" /* 33115 */
- "GS_SCENARIO_G\0" /* 33129 */
- "GS_SCENARIO_C\0" /* 33143 */
- "SPRITE_EN\0" /* 33157 */
- "GS_CUT_1024\0" /* 33167 */
- "GS_CUT_512\0" /* 33179 */
- "GS_CUT_256\0" /* 33190 */
- "GS_CUT_128\0" /* 33201 */
- "CUT_MODE\0" /* 33212 */
- "GS_C_PACK_EN\0" /* 33221 */
- "RESERVED_2\0" /* 33234 */
- "ES_PASSTHRU\0" /* 33245 */
- "COMPUTE_MODE\0" /* 33257 */
- "FAST_COMPUTE_MODE\0" /* 33270 */
- "ELEMENT_INFO_EN\0" /* 33288 */
- "PARTIAL_THD_AT_EOI\0" /* 33304 */
- "SUPPRESS_CUTS\0" /* 33323 */
- "ES_WRITE_OPTIMIZE\0" /* 33337 */
- "GS_WRITE_OPTIMIZE\0" /* 33355 */
- "X_0_OFFCHIP_GS\0" /* 33373 */
- "X_3_ES_AND_GS_ARE_ONCHIP\0" /* 33388, 33406 */
- "RESERVED_3\0" /* 33413 */
- "RESERVED_4\0" /* 33424 */
- "RESERVED_5\0" /* 33435 */
- "ES_VERTS_PER_SUBGRP\0" /* 33446 */
- "GS_PRIMS_PER_SUBGRP\0" /* 33466 */
- "GS_INST_PRIMS_IN_SUBGRP\0" /* 33486 */
- "MSAA_ENABLE\0" /* 33510 */
- "VPORT_SCISSOR_ENABLE\0" /* 33522 */
- "LINE_STIPPLE_ENABLE\0" /* 33543 */
- "SEND_UNLIT_STILES_TO_PKR\0" /* 33563 */
- "SCALE_LINE_WIDTH_PAD\0" /* 33588 */
- "ALTERNATE_RBS_PER_TILE\0" /* 33609 */
- "COARSE_TILE_STARTS_ON_EVEN_RB\0" /* 33632 */
- "WALK_SIZE\0" /* 33662 */
- "WALK_ALIGNMENT\0" /* 33672 */
- "WALK_ALIGN8_PRIM_FITS_ST\0" /* 33687 */
- "WALK_FENCE_ENABLE\0" /* 33712 */
- "WALK_FENCE_SIZE\0" /* 33730 */
- "SUPERTILE_WALK_ORDER_ENABLE\0" /* 33746, 33751 */
- "TILE_COVER_DISABLE\0" /* 33774 */
- "TILE_COVER_NO_SCISSOR\0" /* 33793 */
- "ZMM_LINE_EXTENT\0" /* 33815 */
- "ZMM_LINE_OFFSET\0" /* 33831 */
- "ZMM_RECT_EXTENT\0" /* 33847 */
- "KILL_PIX_POST_HI_Z\0" /* 33863 */
- "KILL_PIX_POST_DETAIL_MASK\0" /* 33882 */
- "PS_ITER_SAMPLE\0" /* 33908 */
- "MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE\0" /* 33923 */
- "MULTI_GPU_SUPERTILE_ENABLE\0" /* 33963 */
- "GPU_ID_OVERRIDE_ENABLE\0" /* 33990 */
- "GPU_ID_OVERRIDE\0" /* 34013 */
- "MULTI_GPU_PRIM_DISCARD_ENABLE\0" /* 34029 */
- "FORCE_EOV_CNTDWN_ENABLE\0" /* 34059 */
- "FORCE_EOV_REZ_ENABLE\0" /* 34083 */
- "OUT_OF_ORDER_PRIMITIVE_ENABLE\0" /* 34104 */
- "OUT_OF_ORDER_WATER_MARK\0" /* 34134 */
- "GS_PER_ES\0" /* 34158 */
- "ES_PER_GS\0" /* 34168 */
- "GS_PER_VS\0" /* 34178 */
- "OUTPRIM_TYPE_POINTLIST\0" /* 34188 */
- "OUTPRIM_TYPE_LINESTRIP\0" /* 34211 */
- "OUTPRIM_TYPE_TRISTRIP\0" /* 34234 */
- "OUTPRIM_TYPE\0" /* 34256 */
- "OUTPRIM_TYPE_1\0" /* 34269 */
- "OUTPRIM_TYPE_2\0" /* 34284 */
- "OUTPRIM_TYPE_3\0" /* 34299 */
- "UNIQUE_TYPE_PER_STREAM\0" /* 34314 */
- "VGT_INDEX_16\0" /* 34337 */
- "VGT_INDEX_32\0" /* 34350 */
- "VGT_INDEX_8\0" /* 34363 */
- "VGT_DMA_SWAP_NONE\0" /* 34375 */
- "VGT_DMA_SWAP_16_BIT\0" /* 34393 */
- "VGT_DMA_SWAP_32_BIT\0" /* 34413 */
- "VGT_DMA_SWAP_WORD\0" /* 34433 */
- "SWAP_MODE\0" /* 34451 */
- "VGT_DMA_BUF_MEM\0" /* 34461 */
- "VGT_DMA_BUF_RING\0" /* 34477 */
- "VGT_DMA_BUF_SETUP\0" /* 34494 */
- "BUF_TYPE\0" /* 34512 */
- "VGT_POLICY_LRU\0" /* 34521 */
- "VGT_POLICY_STREAM\0" /* 34536 */
- "RDREQ_POLICY_CIK\0" /* 34554 */
- "RDREQ_POLICY\0" /* 34571 */
- "REQ_PATH\0" /* 34584 */
- "PRIMITIVEID_EN\0" /* 34593 */
- "DISABLE_RESET_ON_EOI\0" /* 34608 */
- "NGG_DISABLE_PROVOK_REUSE\0" /* 34629 */
- "SAMPLE_STREAMOUTSTATS1\0" /* 34654 */
- "SAMPLE_STREAMOUTSTATS2\0" /* 34677 */
- "SAMPLE_STREAMOUTSTATS3\0" /* 34700 */
- "CACHE_FLUSH_TS\0" /* 34723 */
- "CONTEXT_DONE\0" /* 34738 */
- "CACHE_FLUSH\0" /* 34751 */
- "CS_PARTIAL_FLUSH\0" /* 34763 */
- "VGT_STREAMOUT_SYNC\0" /* 34780 */
- "VGT_STREAMOUT_RESET\0" /* 34799 */
- "END_OF_PIPE_INCR_DE\0" /* 34819 */
- "END_OF_PIPE_IB_END\0" /* 34839 */
- "RST_PIX_CNT\0" /* 34858 */
- "VS_PARTIAL_FLUSH\0" /* 34870 */
- "PS_PARTIAL_FLUSH\0" /* 34887 */
- "FLUSH_HS_OUTPUT\0" /* 34904 */
- "FLUSH_LS_OUTPUT\0" /* 34920 */
- "CACHE_FLUSH_AND_INV_TS_EVENT\0" /* 34936 */
- "ZPASS_DONE\0" /* 34965 */
- "CACHE_FLUSH_AND_INV_EVENT\0" /* 34976 */
- "PERFCOUNTER_START\0" /* 35002 */
- "PERFCOUNTER_STOP\0" /* 35020 */
- "PIPELINESTAT_START\0" /* 35037 */
- "PIPELINESTAT_STOP\0" /* 35056 */
- "PERFCOUNTER_SAMPLE\0" /* 35074 */
- "FLUSH_ES_OUTPUT\0" /* 35093 */
- "FLUSH_GS_OUTPUT\0" /* 35109 */
- "SAMPLE_PIPELINESTAT\0" /* 35125 */
- "SO_VGTSTREAMOUT_FLUSH\0" /* 35145 */
- "SAMPLE_STREAMOUTSTATS\0" /* 35167 */
- "RESET_VTX_CNT\0" /* 35189 */
- "BLOCK_CONTEXT_DONE\0" /* 35203 */
- "CS_CONTEXT_DONE\0" /* 35222 */
- "VGT_FLUSH\0" /* 35238 */
- "SC_SEND_DB_VPZ\0" /* 35248 */
- "BOTTOM_OF_PIPE_TS\0" /* 35263 */
- "DB_CACHE_FLUSH_AND_INV\0" /* 35281 */
- "FLUSH_AND_INV_DB_DATA_TS\0" /* 35304 */
- "FLUSH_AND_INV_DB_META\0" /* 35329 */
- "FLUSH_AND_INV_CB_DATA_TS\0" /* 35351 */
- "FLUSH_AND_INV_CB_META\0" /* 35376 */
- "FLUSH_AND_INV_CB_PIXEL_DATA\0" /* 35398 */
- "THREAD_TRACE_START\0" /* 35426 */
- "THREAD_TRACE_STOP\0" /* 35445 */
- "THREAD_TRACE_MARKER\0" /* 35463 */
- "THREAD_TRACE_FLUSH\0" /* 35483 */
- "THREAD_TRACE_FINISH\0" /* 35502 */
- "PIXEL_PIPE_STAT_CONTROL\0" /* 35522 */
- "PIXEL_PIPE_STAT_DUMP\0" /* 35546 */
- "PIXEL_PIPE_STAT_RESET\0" /* 35567 */
- "BREAK_BATCH\0" /* 35589 */
- "FLUSH_DFSM\0" /* 35601 */
- "RESET_TO_LOWEST_VGT\0" /* 35612 */
- "TGID_ROLLOVER\0" /* 35632 */
- "ENABLE_NGG_PIPELINE\0" /* 35646 */
- "ENABLE_LEGACY_PIPELINE\0" /* 35666 */
- "EVENT_TYPE\0" /* 35689 */
- "ADDRESS_HI_GFX6\0" /* 35700 */
- "EXTENDED_EVENT\0" /* 35716 */
- "ADDRESS_HI_GFX9\0" /* 35731 */
- "RESET_EN\0" /* 35747 */
- "PRIMGROUP_SIZE\0" /* 35756 */
- "PARTIAL_VS_WAVE_ON\0" /* 35771 */
- "SWITCH_ON_EOP\0" /* 35790 */
- "PARTIAL_ES_WAVE_ON\0" /* 35804 */
- "SWITCH_ON_EOI\0" /* 35823 */
- "WD_SWITCH_ON_EOP\0" /* 35837 */
- "MAX_PRIMGRP_IN_WAVE\0" /* 35854 */
- "ITEMSIZE\0" /* 35874 */
- "REUSE_OFF\0" /* 35883 */
- "VTX_CNT_EN\0" /* 35893 */
- "FULL_CACHE\0" /* 35904 */
- "HTILE_USES_PRELOAD_WIN\0" /* 35915 */
- "PRELOAD\0" /* 35938 */
- "PREFETCH_WIDTH\0" /* 35946 */
- "PREFETCH_HEIGHT\0" /* 35961 */
- "DST_OUTSIDE_ZERO_TO_ONE\0" /* 35977 */
- "TC_COMPATIBLE\0" /* 36001 */
- "COMPAREFUNC0\0" /* 36015 */
- "COMPAREVALUE0\0" /* 36028 */
- "COMPAREMASK0\0" /* 36042 */
- "COMPAREFUNC1\0" /* 36055 */
- "COMPAREVALUE1\0" /* 36068 */
- "COMPAREMASK1\0" /* 36082 */
- "START_X\0" /* 36095 */
- "START_Y\0" /* 36103 */
- "MAX_X\0" /* 36111 */
- "MAX_Y\0" /* 36117 */
- "VERTEX_STRIDE\0" /* 36123 */
- "MAX_VERT_OUT\0" /* 36137 */
- "ACCUM_ISOLINE\0" /* 36150 */
- "ACCUM_TRI\0" /* 36164 */
- "ACCUM_QUAD\0" /* 36174 */
- "DONUT_SPLIT\0" /* 36185 */
- "TRAP_SPLIT\0" /* 36197 */
- "LS_STAGE_OFF\0" /* 36208 */
- "LS_STAGE_ON\0" /* 36221 */
- "CS_STAGE_ON\0" /* 36233 */
- "ES_STAGE_OFF\0" /* 36245 */
- "ES_STAGE_DS\0" /* 36258 */
- "ES_STAGE_REAL\0" /* 36270 */
- "VS_STAGE_REAL\0" /* 36284 */
- "VS_STAGE_DS\0" /* 36298 */
- "VS_STAGE_COPY_SHADER\0" /* 36310 */
- "DYNAMIC_HS\0" /* 36331 */
- "DIS_DEALLOC_ACCUM_0\0" /* 36342 */
- "DIS_DEALLOC_ACCUM_1\0" /* 36362 */
- "VS_WAVE_ID_EN\0" /* 36382 */
- "ORDERED_ID_MODE\0" /* 36396 */
- "GS_FAST_LAUNCH\0" /* 36412 */
- "NUM_PATCHES\0" /* 36427 */
- "HS_NUM_INPUT_CP\0" /* 36439 */
- "HS_NUM_OUTPUT_CP\0" /* 36455 */
- "TESS_ISOLINE\0" /* 36472 */
- "TESS_TRIANGLE\0" /* 36485 */
- "TESS_QUAD\0" /* 36499 */
- "PART_INTEGER\0" /* 36509 */
- "PART_POW2\0" /* 36522 */
- "PART_FRAC_ODD\0" /* 36532 */
- "PART_FRAC_EVEN\0" /* 36546 */
- "PARTITIONING\0" /* 36561 */
- "OUTPUT_POINT\0" /* 36574 */
- "OUTPUT_LINE\0" /* 36587 */
- "OUTPUT_TRIANGLE_CW\0" /* 36599 */
- "OUTPUT_TRIANGLE_CCW\0" /* 36618 */
- "TOPOLOGY\0" /* 36638 */
- "RESERVED_REDUC_AXIS\0" /* 36647 */
- "DEPRECATED\0" /* 36667 */
- "NUM_DS_WAVES_PER_SIMD\0" /* 36678 */
- "DISABLE_DONUTS\0" /* 36700 */
- "VGT_POLICY_BYPASS\0" /* 36715 */
- "DISTRIBUTION_MODE_NO_DIST\0" /* 36733 */
- "DISTRIBUTION_MODE_PATCHES\0" /* 36759 */
- "DISTRIBUTION_MODE_DONUTS\0" /* 36785 */
- "DISTRIBUTION_MODE_TRAPEZOIDS\0" /* 36810 */
- "DISTRIBUTION_MODE\0" /* 36839 */
- "ALPHA_TO_MASK_ENABLE\0" /* 36857 */
- "ALPHA_TO_MASK_OFFSET0\0" /* 36878 */
- "ALPHA_TO_MASK_OFFSET1\0" /* 36900 */
- "ALPHA_TO_MASK_OFFSET2\0" /* 36922 */
- "ALPHA_TO_MASK_OFFSET3\0" /* 36944 */
- "OFFSET_ROUND\0" /* 36966 */
- "POLY_OFFSET_NEG_NUM_DB_BITS\0" /* 36979 */
- "POLY_OFFSET_DB_IS_FLOAT_FMT\0" /* 37007 */
- "STREAMOUT_0_EN\0" /* 37035 */
- "STREAMOUT_1_EN\0" /* 37050 */
- "STREAMOUT_2_EN\0" /* 37065 */
- "STREAMOUT_3_EN\0" /* 37080 */
- "RAST_STREAM\0" /* 37095 */
- "RAST_STREAM_MASK\0" /* 37107 */
- "USE_RAST_STREAM_MASK\0" /* 37124 */
- "EN_PRIMS_NEEDED_CNT\0" /* 37145 */
- "STREAM_0_BUFFER_EN\0" /* 37165 */
- "STREAM_1_BUFFER_EN\0" /* 37184 */
- "STREAM_2_BUFFER_EN\0" /* 37203 */
- "STREAM_3_BUFFER_EN\0" /* 37222 */
- "DISTANCE_0\0" /* 37241 */
- "DISTANCE_1\0" /* 37252 */
- "DISTANCE_2\0" /* 37263 */
- "DISTANCE_3\0" /* 37274 */
- "DISTANCE_4\0" /* 37285 */
- "DISTANCE_5\0" /* 37296 */
- "DISTANCE_6\0" /* 37307 */
- "DISTANCE_7\0" /* 37318 */
- "DISTANCE_8\0" /* 37329 */
- "DISTANCE_9\0" /* 37340 */
- "DISTANCE_10\0" /* 37351 */
- "DISTANCE_11\0" /* 37363 */
- "DISTANCE_12\0" /* 37375 */
- "DISTANCE_13\0" /* 37387 */
- "DISTANCE_14\0" /* 37399 */
- "DISTANCE_15\0" /* 37411 */
- "EXPAND_LINE_WIDTH\0" /* 37423 */
- "LAST_PIXEL\0" /* 37441 */
- "PERPENDICULAR_ENDCAP_ENA\0" /* 37452 */
- "DX10_DIAMOND_TEST_ENA\0" /* 37477 */
- "MSAA_NUM_SAMPLES\0" /* 37499 */
- "AA_MASK_CENTROID_DTMN\0" /* 37516 */
- "MAX_SAMPLE_DIST\0" /* 37538 */
- "MSAA_EXPOSED_SAMPLES\0" /* 37554 */
- "DETAIL_TO_EXPOSED_MODE\0" /* 37575 */
- "COVERAGE_TO_SHADER_SELECT\0" /* 37598 */
- "PIX_CENTER\0" /* 37624 */
- "X_TRUNCATE\0" /* 37635 */
- "X_ROUND\0" /* 37646 */
- "X_ROUND_TO_EVEN\0" /* 37654 */
- "X_ROUND_TO_ODD\0" /* 37670 */
- "ROUND_MODE\0" /* 37685 */
- "X_16_8_FIXED_POINT_1_16TH\0" /* 37696 */
- "X_16_8_FIXED_POINT_1_8TH\0" /* 37722 */
- "X_16_8_FIXED_POINT_1_4TH\0" /* 37747 */
- "X_16_8_FIXED_POINT_1_2\0" /* 37772 */
- "X_16_8_FIXED_POINT_1\0" /* 37795 */
- "X_16_8_FIXED_POINT_1_256TH\0" /* 37816 */
- "X_14_10_FIXED_POINT_1_1024TH\0" /* 37843 */
- "X_12_12_FIXED_POINT_1_4096TH\0" /* 37872 */
- "QUANT_MODE\0" /* 37901 */
- "S0_X\0" /* 37912 */
- "S0_Y\0" /* 37917 */
- "S1_X\0" /* 37922 */
- "S1_Y\0" /* 37927 */
- "S2_X\0" /* 37932 */
- "S2_Y\0" /* 37937 */
- "S3_X\0" /* 37942 */
- "S3_Y\0" /* 37947 */
- "S4_X\0" /* 37952 */
- "S4_Y\0" /* 37957 */
- "S5_X\0" /* 37962 */
- "S5_Y\0" /* 37967 */
- "S6_X\0" /* 37972 */
- "S6_Y\0" /* 37977 */
- "S7_X\0" /* 37982 */
- "S7_Y\0" /* 37987 */
- "S8_X\0" /* 37992 */
- "S8_Y\0" /* 37997 */
- "S9_X\0" /* 38002 */
- "S9_Y\0" /* 38007 */
- "S10_X\0" /* 38012 */
- "S10_Y\0" /* 38018 */
- "S11_X\0" /* 38024 */
- "S11_Y\0" /* 38030 */
- "S12_X\0" /* 38036 */
- "S12_Y\0" /* 38042 */
- "S13_X\0" /* 38048 */
- "S13_Y\0" /* 38054 */
- "S14_X\0" /* 38060 */
- "S14_Y\0" /* 38066 */
- "S15_X\0" /* 38072 */
- "S15_Y\0" /* 38078 */
- "AA_MASK_X0Y0\0" /* 38084 */
- "AA_MASK_X1Y0\0" /* 38097 */
- "AA_MASK_X0Y1\0" /* 38110 */
- "AA_MASK_X1Y1\0" /* 38123 */
- "REALIGN_DQUADS_AFTER_N_WAVES\0" /* 38136 */
- "VTX_REUSE_DEPTH\0" /* 38165 */
- "DEALLOC_DIST\0" /* 38181 */
- "FMASK_TILE_MAX\0" /* 38194 */
- "MIP_LEVEL\0" /* 38209 */
- "ENDIAN_NONE\0" /* 38219 */
- "ENDIAN_8IN16\0" /* 38231 */
- "ENDIAN_8IN32\0" /* 38244 */
- "ENDIAN_8IN64\0" /* 38257 */
- "ENDIAN\0" /* 38270 */
- "COLOR_INVALID\0" /* 38277 */
- "COLOR_8\0" /* 38291 */
- "COLOR_16\0" /* 38299 */
- "COLOR_8_8\0" /* 38308 */
- "COLOR_32\0" /* 38318 */
- "COLOR_16_16\0" /* 38327 */
- "COLOR_10_11_11\0" /* 38339 */
- "COLOR_11_11_10\0" /* 38354 */
- "COLOR_10_10_10_2\0" /* 38369 */
- "COLOR_2_10_10_10\0" /* 38386 */
- "COLOR_8_8_8_8\0" /* 38403 */
- "COLOR_32_32\0" /* 38417 */
- "COLOR_16_16_16_16\0" /* 38429 */
- "COLOR_32_32_32_32\0" /* 38447 */
- "COLOR_5_6_5\0" /* 38465 */
- "COLOR_1_5_5_5\0" /* 38477 */
- "COLOR_5_5_5_1\0" /* 38491 */
- "COLOR_4_4_4_4\0" /* 38505 */
- "COLOR_8_24\0" /* 38519 */
- "COLOR_24_8\0" /* 38530 */
- "COLOR_X24_8_32_FLOAT\0" /* 38541 */
- "NUMBER_UNORM\0" /* 38562 */
- "NUMBER_SNORM\0" /* 38575 */
- "NUMBER_UINT\0" /* 38588 */
- "NUMBER_SINT\0" /* 38600 */
- "NUMBER_SRGB\0" /* 38612 */
- "NUMBER_FLOAT\0" /* 38624 */
- "NUMBER_TYPE\0" /* 38637 */
- "SWAP_STD\0" /* 38649 */
- "SWAP_ALT\0" /* 38658 */
- "SWAP_STD_REV\0" /* 38667 */
- "SWAP_ALT_REV\0" /* 38680 */
- "COMP_SWAP\0" /* 38693 */
- "BLEND_CLAMP\0" /* 38703 */
- "BLEND_BYPASS\0" /* 38715 */
- "SIMPLE_FLOAT\0" /* 38728 */
- "CMASK_IS_LINEAR\0" /* 38741 */
- "FORCE_OPT_AUTO\0" /* 38757 */
- "FORCE_OPT_DISABLE\0" /* 38772 */
- "FORCE_OPT_ENABLE_IF_SRC_A_0\0" /* 38790 */
- "FORCE_OPT_ENABLE_IF_SRC_RGB_0\0" /* 38818 */
- "FORCE_OPT_ENABLE_IF_SRC_ARGB_0\0" /* 38848 */
- "FORCE_OPT_ENABLE_IF_SRC_A_1\0" /* 38879 */
- "FORCE_OPT_ENABLE_IF_SRC_RGB_1\0" /* 38907 */
- "FORCE_OPT_ENABLE_IF_SRC_ARGB_1\0" /* 38937 */
- "BLEND_OPT_DONT_RD_DST\0" /* 38968 */
- "BLEND_OPT_DISCARD_PIXEL\0" /* 38990 */
- "FMASK_COMPRESSION_DISABLE\0" /* 39014 */
- "FMASK_COMPRESS_1FRAG_ONLY\0" /* 39040 */
- "DCC_ENABLE\0" /* 39066 */
- "CMASK_ADDR_TYPE\0" /* 39077 */
- "FMASK_TILE_MODE_INDEX\0" /* 39093 */
- "FMASK_BANK_HEIGHT\0" /* 39115 */
- "NUM_FRAGMENTS\0" /* 39133 */
- "FORCE_DST_ALPHA_1\0" /* 39147 */
- "MIP0_DEPTH\0" /* 39165 */
- "COLOR_SW_MODE\0" /* 39176 */
- "FMASK_SW_MODE\0" /* 39190 */
- "RESOURCE_TYPE\0" /* 39204 */
- "KEY_CLEAR_ENABLE\0" /* 39218 */
- "MAX_UNCOMPRESSED_BLOCK_SIZE\0" /* 39235 */
- "MIN_COMPRESSED_BLOCK_SIZE\0" /* 39263 */
- "MAX_COMPRESSED_BLOCK_SIZE\0" /* 39289 */
- "INDEPENDENT_64B_BLOCKS\0" /* 39315 */
- "LOSSY_RGB_PRECISION\0" /* 39338 */
- "LOSSY_ALPHA_PRECISION\0" /* 39358 */
- "MATCH_ALL_BITS\0" /* 39380 */
- "BASE_HI\0" /* 39395 */
- "EN_INST_OPT_BASIC\0" /* 39403 */
- "EN_INST_OPT_ADV\0" /* 39421 */
- "HW_USE_ONLY\0" /* 39437 */
- "DWB\0" /* 39449 */
- "GRAD_ADJ_0\0" /* 39453 */
- "GRAD_ADJ_1\0" /* 39464 */
- "GRAD_ADJ_2\0" /* 39475 */
- "GRAD_ADJ_3\0" /* 39486 */
- "TTRACE_STALL_ALL\0" /* 39497 */
- "ALLOC_ARB_LRU_ENA\0" /* 39514 */
- "EXP_ARB_LRU_ENA\0" /* 39532 */
- "PS_PKR_PRIORITY_CNTL\0" /* 39548 */
- "BATON_RESET_DISABLE\0" /* 39569 */
- "CRC_SIMD_ID_WADDR_DISABLE\0" /* 39589 */
- "LBPW_CU_CHK_MODE\0" /* 39615 */
- "LBPW_CU_CHK_CNT\0" /* 39632 */
- "CSC_PWR_SAVE_DISABLE\0" /* 39648 */
- "CSG_PWR_SAVE_DISABLE\0" /* 39669 */
- "CONTEXT_SAVE_WAIT_GDS_REQUEST_CYCLE_OVHD\0" /* 39690 */
- "CONTEXT_SAVE_WAIT_GDS_GRANT_CYCLE_OVHD\0" /* 39731 */
- "SPI_SHADER_LATE_ALLOC_GS\0" /* 39770 */
- "X_MAX\0" /* 39795 */
- "Y_MAX\0" /* 39801 */
- "PARTIALLY_RESIDENT\0" /* 39807 */
- "FAULT_BEHAVIOR\0" /* 39826 */
- "ITERATE_FLUSH\0" /* 39841 */
- "MAXMIP\0" /* 39855 */
- "FORCE_ON\0" /* 39862 */
- "PUNCHOUT_MODE\0" /* 39871 */
- "POPS_DRAIN_PS_ON_OVERLAP\0" /* 39885 */
- "DISALLOW_OVERFLOW\0" /* 39910 */
- "PS_INVOKE_MASK\0" /* 39928 */
- "EPITCH\0" /* 39943 */
- "NUM_SE\0" /* 39950 */
- "DISABLE_SRBSL_DB_OPTIMIZED_PACKING\0" /* 39957 */
- "PERFMON_ENABLE\0" /* 39992 */
- "LEFT_QTR\0" /* 40007 */
- "LEFT_HALF\0" /* 40016 */
- "RIGHT_HALF\0" /* 40026 */
- "RIGHT_QTR\0" /* 40037 */
- "TOP_QTR\0" /* 40047 */
- "TOP_HALF\0" /* 40055 */
- "BOT_HALF\0" /* 40064 */
- "BOT_QTR\0" /* 40073 */
- "LEFT_EYE_FOV_LEFT\0" /* 40081 */
- "LEFT_EYE_FOV_RIGHT\0" /* 40099 */
- "RIGHT_EYE_FOV_LEFT\0" /* 40118 */
- "RIGHT_EYE_FOV_RIGHT\0" /* 40137 */
- "FOV_TOP\0" /* 40157 */
- "FOV_BOT\0" /* 40165 */
- "OBJ_ID_SEL\0" /* 40173 */
- "ADD_PIPED_PRIM_ID\0" /* 40184 */
- "EN_32BIT_OBJPRIMID\0" /* 40202 */
- "VERTEX_REUSE_OFF\0" /* 40221 */
- "INDEX_BUF_EDGE_FLAG_ENA\0" /* 40238 */
- "DISCARD_0_AREA_TRIANGLES\0" /* 40262 */
- "DISCARD_0_AREA_LINES\0" /* 40287 */
- "DISCARD_0_AREA_POINTS\0" /* 40308 */
- "DISCARD_0_AREA_RECTANGLES\0" /* 40330 */
- "USE_PROVOKING_ZW\0" /* 40356 */
- "MAX_PRIMS_PER_SUBGROUP\0" /* 40373 */
- "OBJPRIM_ID_EN\0" /* 40396 */
- "EN_REG_RT_INDEX\0" /* 40410 */
- "EN_PIPELINE_PRIMID\0" /* 40426 */
- "OBJECT_ID_INST_EN\0" /* 40445 */
- "COMPOUND_INDEX_EN\0" /* 40463 */
- "BINNING_ALLOWED\0" /* 40481 */
- "FORCE_BINNING_ON\0" /* 40497 */
- "DISABLE_BINNING_USE_NEW_SC\0" /* 40514 */
- "DISABLE_BINNING_USE_LEGACY_SC\0" /* 40541 */
- "BINNING_MODE\0" /* 40571 */
- "BIN_SIZE_X\0" /* 40584 */
- "BIN_SIZE_Y\0" /* 40595 */
- "BIN_SIZE_X_EXTEND\0" /* 40606 */
- "BIN_SIZE_Y_EXTEND\0" /* 40624 */
- "CONTEXT_STATES_PER_BIN\0" /* 40642 */
- "PERSISTENT_STATES_PER_BIN\0" /* 40665 */
- "DISABLE_START_OF_PRIM\0" /* 40691 */
- "FPOVS_PER_BATCH\0" /* 40713 */
- "OPTIMAL_BIN_SELECTION\0" /* 40729 */
- "MAX_ALLOC_COUNT\0" /* 40751 */
- "MAX_PRIM_PER_BATCH\0" /* 40767 */
- "OVER_RAST_ENABLE\0" /* 40786 */
- "OVER_RAST_SAMPLE_SELECT\0" /* 40803 */
- "UNDER_RAST_ENABLE\0" /* 40827 */
- "UNDER_RAST_SAMPLE_SELECT\0" /* 40845 */
- "PBB_UNCERTAINTY_REGION_ENABLE\0" /* 40870 */
- "ZMM_TRI_EXTENT\0" /* 40900 */
- "ZMM_TRI_OFFSET\0" /* 40915 */
- "OVERRIDE_OVER_RAST_INNER_TO_NORMAL\0" /* 40930 */
- "OVERRIDE_UNDER_RAST_INNER_TO_NORMAL\0" /* 40965 */
- "DEGENERATE_OVERRIDE_INNER_TO_NORMAL_DISABLE\0" /* 41001 */
- "UNCERTAINTY_REGION_MODE\0" /* 41045 */
- "OUTER_UNCERTAINTY_EDGERULE_OVERRIDE\0" /* 41069 */
- "INNER_UNCERTAINTY_EDGERULE_OVERRIDE\0" /* 41105 */
- "NULL_SQUAD_AA_MASK_ENABLE\0" /* 41141 */
- "COVERAGE_AA_MASK_ENABLE\0" /* 41167 */
- "PREZ_AA_MASK_ENABLE\0" /* 41191 */
- "POSTZ_AA_MASK_ENABLE\0" /* 41211 */
- "CENTROID_SAMPLE_OVERRIDE\0" /* 41232 */
- "MAX_DEALLOCS_IN_WAVE\0" /* 41257 */
- "BASE_256B\0" /* 41278 */
- "MIP0_HEIGHT\0" /* 41288 */
- "MIP0_WIDTH\0" /* 41300 */
- "DRAW_INDEX_LOC\0" /* 41311 */
- "IB_BASE_LO\0" /* 41326 */
- "IB_BASE_HI\0" /* 41337 */
- "CP_DMA_WORD0\0" /* 41348 */
- "CP_DMA_WORD1\0" /* 41361 */
- "CP_DMA_WORD2\0" /* 41374 */
- "CP_DMA_WORD3\0" /* 41387 */
- "COMMAND\0" /* 41400 */
- "DMA_DATA_WORD0\0" /* 41408 */
- "SRBM_STATUS2\0" /* 41423 */
- "SRBM_STATUS\0" /* 41436 */
- "SRBM_STATUS3\0" /* 41448 */
- "SDMA0_STATUS_REG\0" /* 41461 */
- "SDMA1_STATUS_REG\0" /* 41478 */
- "GRBM_STATUS2\0" /* 41495 */
- "GRBM_STATUS\0" /* 41508 */
- "CP_STRMOUT_CNTL\0" /* 41520 */
- "CP_COHER_CNTL\0" /* 41536 */
- "CP_COHER_SIZE\0" /* 41550 */
- "CP_COHER_BASE\0" /* 41564 */
- "GRBM_STATUS_SE0\0" /* 41578 */
- "GRBM_STATUS_SE1\0" /* 41594 */
- "GRBM_STATUS_SE2\0" /* 41610 */
- "GRBM_STATUS_SE3\0" /* 41626 */
- "CP_COHER_BASE_HI\0" /* 41642 */
- "CP_COHER_START_DELAY\0" /* 41659 */
- "CP_COHER_STATUS\0" /* 41680 */
- "CP_CPC_STATUS\0" /* 41696 */
- "CP_CPC_BUSY_STAT\0" /* 41710 */
- "CP_CPC_STALLED_STAT1\0" /* 41727 */
- "CP_CPF_STATUS\0" /* 41748 */
- "CP_CPF_BUSY_STAT\0" /* 41762 */
- "CP_CPF_STALLED_STAT1\0" /* 41779 */
- "CP_COHER_SIZE_HI\0" /* 41800 */
- "VGT_VTX_VECT_EJECT_REG\0" /* 41817 */
- "VGT_CACHE_INVALIDATION\0" /* 41840 */
- "VGT_ESGS_RING_SIZE\0" /* 41863 */
- "VGT_GSVS_RING_SIZE\0" /* 41882 */
- "VGT_GS_VERTEX_REUSE\0" /* 41901 */
- "VGT_PRIMITIVE_TYPE\0" /* 41921 */
- "VGT_INDEX_TYPE\0" /* 41940 */
- "VGT_STRMOUT_BUFFER_FILLED_SIZE_0\0" /* 41955 */
- "VGT_STRMOUT_BUFFER_FILLED_SIZE_1\0" /* 41988 */
- "VGT_STRMOUT_BUFFER_FILLED_SIZE_2\0" /* 42021 */
- "VGT_STRMOUT_BUFFER_FILLED_SIZE_3\0" /* 42054 */
- "VGT_NUM_INDICES\0" /* 42087 */
- "VGT_NUM_INSTANCES\0" /* 42103 */
- "VGT_TF_RING_SIZE\0" /* 42121 */
- "VGT_HS_OFFCHIP_PARAM\0" /* 42138 */
- "VGT_TF_MEMORY_BASE\0" /* 42159 */
- "PA_CL_ENHANCE\0" /* 42178 */
- "PA_SU_LINE_STIPPLE_VALUE\0" /* 42192 */
- "PA_SC_LINE_STIPPLE_STATE\0" /* 42217 */
- "CP_STALLED_STAT3\0" /* 42242 */
- "CP_STALLED_STAT1\0" /* 42259 */
- "CP_STALLED_STAT2\0" /* 42276 */
- "CP_STAT\0" /* 42293 */
- "GRBM_GFX_INDEX\0" /* 42301 */
- "PA_SC_SCREEN_EXTENT_MIN_0\0" /* 42316 */
- "PA_SC_SCREEN_EXTENT_MAX_0\0" /* 42342 */
- "PA_SC_SCREEN_EXTENT_MIN_1\0" /* 42368 */
- "PA_SC_SCREEN_EXTENT_MAX_1\0" /* 42394 */
- "PA_SC_ENHANCE\0" /* 42420 */
- "SQC_CACHES\0" /* 42434 */
- "SQ_RANDOM_WAVE_PRI\0" /* 42445 */
- "SQ_EXP_0\0" /* 42464 */
- "TA_CS_BC_BASE_ADDR\0" /* 42473 */
- "TA_CS_BC_BASE_ADDR_HI\0" /* 42492 */
- "DB_OCCLUSION_COUNT0_LOW\0" /* 42514 */
- "SQ_BUF_RSRC_WORD0\0" /* 42538 */
- "DB_OCCLUSION_COUNT0_HI\0" /* 42556 */
- "SQ_BUF_RSRC_WORD1\0" /* 42579 */
- "DB_OCCLUSION_COUNT1_LOW\0" /* 42597 */
- "SQ_BUF_RSRC_WORD2\0" /* 42621 */
- "DB_OCCLUSION_COUNT1_HI\0" /* 42639 */
- "SQ_BUF_RSRC_WORD3\0" /* 42662 */
- "DB_OCCLUSION_COUNT2_LOW\0" /* 42680 */
- "SQ_IMG_RSRC_WORD0\0" /* 42704 */
- "DB_OCCLUSION_COUNT2_HI\0" /* 42722 */
- "SQ_IMG_RSRC_WORD1\0" /* 42745 */
- "DB_OCCLUSION_COUNT3_LOW\0" /* 42763 */
- "SQ_IMG_RSRC_WORD2\0" /* 42787 */
- "DB_OCCLUSION_COUNT3_HI\0" /* 42805 */
- "SQ_IMG_RSRC_WORD3\0" /* 42828 */
- "SQ_IMG_RSRC_WORD4\0" /* 42846 */
- "SQ_IMG_RSRC_WORD5\0" /* 42864 */
- "SQ_IMG_RSRC_WORD6\0" /* 42882 */
- "SQ_IMG_RSRC_WORD7\0" /* 42900 */
- "SQ_IMG_SAMP_WORD0\0" /* 42918 */
- "SQ_IMG_SAMP_WORD1\0" /* 42936 */
- "SQ_IMG_SAMP_WORD2\0" /* 42954 */
- "SQ_IMG_SAMP_WORD3\0" /* 42972 */
- "SPI_DYN_GPR_LOCK_EN\0" /* 42990 */
- "SPI_STATIC_THREAD_MGMT_1\0" /* 43010 */
- "SPI_STATIC_THREAD_MGMT_2\0" /* 43035 */
- "SPI_STATIC_THREAD_MGMT_3\0" /* 43060 */
- "SPI_PS_MAX_WAVE_ID\0" /* 43085 */
- "SPI_ARB_PRIORITY\0" /* 43104 */
- "SPI_ARB_CYCLES_0\0" /* 43121 */
- "SPI_ARB_CYCLES_1\0" /* 43138 */
- "SQ_FLAT_SCRATCH_WORD0\0" /* 43155 */
- "SQ_FLAT_SCRATCH_WORD1\0" /* 43177 */
- "DB_ZPASS_COUNT_LOW\0" /* 43199 */
- "DB_ZPASS_COUNT_HI\0" /* 43218 */
- "SPI_CONFIG_CNTL\0" /* 43236 */
- "SPI_CONFIG_CNTL_1\0" /* 43252 */
- "SPI_RESOURCE_RESERVE_CU_AB_0\0" /* 43270 */
- "DB_SUBTILE_CONTROL\0" /* 43299 */
- "GB_ADDR_CONFIG\0" /* 43318 */
- "GB_TILE_MODE0\0" /* 43333 */
- "GB_TILE_MODE1\0" /* 43347 */
- "GB_TILE_MODE2\0" /* 43361 */
- "GB_TILE_MODE3\0" /* 43375 */
- "GB_TILE_MODE4\0" /* 43389 */
- "GB_TILE_MODE5\0" /* 43403 */
- "GB_TILE_MODE6\0" /* 43417 */
- "GB_TILE_MODE7\0" /* 43431 */
- "GB_TILE_MODE8\0" /* 43445 */
- "GB_TILE_MODE9\0" /* 43459 */
- "GB_TILE_MODE10\0" /* 43473 */
- "GB_TILE_MODE11\0" /* 43488 */
- "GB_TILE_MODE12\0" /* 43503 */
- "GB_TILE_MODE13\0" /* 43518 */
- "GB_TILE_MODE14\0" /* 43533 */
- "GB_TILE_MODE15\0" /* 43548 */
- "GB_TILE_MODE16\0" /* 43563 */
- "GB_TILE_MODE17\0" /* 43578 */
- "GB_TILE_MODE18\0" /* 43593 */
- "GB_TILE_MODE19\0" /* 43608 */
- "GB_TILE_MODE20\0" /* 43623 */
- "GB_TILE_MODE21\0" /* 43638 */
- "GB_TILE_MODE22\0" /* 43653 */
- "GB_TILE_MODE23\0" /* 43668 */
- "GB_TILE_MODE24\0" /* 43683 */
- "GB_TILE_MODE25\0" /* 43698 */
- "GB_TILE_MODE26\0" /* 43713 */
- "GB_TILE_MODE27\0" /* 43728 */
- "GB_TILE_MODE28\0" /* 43743 */
- "GB_TILE_MODE29\0" /* 43758 */
- "GB_TILE_MODE30\0" /* 43773 */
- "GB_TILE_MODE31\0" /* 43788 */
- "GB_MACROTILE_MODE0\0" /* 43803 */
- "GB_MACROTILE_MODE1\0" /* 43822 */
- "GB_MACROTILE_MODE2\0" /* 43841 */
- "GB_MACROTILE_MODE3\0" /* 43860 */
- "GB_MACROTILE_MODE4\0" /* 43879 */
- "GB_MACROTILE_MODE5\0" /* 43898 */
- "GB_MACROTILE_MODE6\0" /* 43917 */
- "GB_MACROTILE_MODE7\0" /* 43936 */
- "GB_MACROTILE_MODE8\0" /* 43955 */
- "GB_MACROTILE_MODE9\0" /* 43974 */
- "GB_MACROTILE_MODE10\0" /* 43993 */
- "GB_MACROTILE_MODE11\0" /* 44013 */
- "GB_MACROTILE_MODE12\0" /* 44033 */
- "GB_MACROTILE_MODE13\0" /* 44053 */
- "GB_MACROTILE_MODE14\0" /* 44073 */
- "GB_MACROTILE_MODE15\0" /* 44093 */
- "SPI_SHADER_TBA_LO_PS\0" /* 44113 */
- "SPI_SHADER_TBA_HI_PS\0" /* 44134 */
- "SPI_SHADER_TMA_LO_PS\0" /* 44155 */
- "SPI_SHADER_TMA_HI_PS\0" /* 44176 */
- "SPI_SHADER_PGM_RSRC3_PS\0" /* 44197 */
- "SPI_SHADER_PGM_LO_PS\0" /* 44221 */
- "SPI_SHADER_PGM_HI_PS\0" /* 44242 */
- "SPI_SHADER_PGM_RSRC1_PS\0" /* 44263 */
- "SPI_SHADER_PGM_RSRC2_PS\0" /* 44287 */
- "SPI_SHADER_USER_DATA_PS_0\0" /* 44311 */
- "SPI_SHADER_USER_DATA_PS_1\0" /* 44337 */
- "SPI_SHADER_USER_DATA_PS_2\0" /* 44363 */
- "SPI_SHADER_USER_DATA_PS_3\0" /* 44389 */
- "SPI_SHADER_USER_DATA_PS_4\0" /* 44415 */
- "SPI_SHADER_USER_DATA_PS_5\0" /* 44441 */
- "SPI_SHADER_USER_DATA_PS_6\0" /* 44467 */
- "SPI_SHADER_USER_DATA_PS_7\0" /* 44493 */
- "SPI_SHADER_USER_DATA_PS_8\0" /* 44519 */
- "SPI_SHADER_USER_DATA_PS_9\0" /* 44545 */
- "SPI_SHADER_USER_DATA_PS_10\0" /* 44571 */
- "SPI_SHADER_USER_DATA_PS_11\0" /* 44598 */
- "SPI_SHADER_USER_DATA_PS_12\0" /* 44625 */
- "SPI_SHADER_USER_DATA_PS_13\0" /* 44652 */
- "SPI_SHADER_USER_DATA_PS_14\0" /* 44679 */
- "SPI_SHADER_USER_DATA_PS_15\0" /* 44706 */
- "SPI_SHADER_TBA_LO_VS\0" /* 44733 */
- "SPI_SHADER_TBA_HI_VS\0" /* 44754 */
- "SPI_SHADER_TMA_LO_VS\0" /* 44775 */
- "SPI_SHADER_TMA_HI_VS\0" /* 44796 */
- "SPI_SHADER_PGM_RSRC3_VS\0" /* 44817 */
- "SPI_SHADER_LATE_ALLOC_VS\0" /* 44841 */
- "SPI_SHADER_PGM_LO_VS\0" /* 44866 */
- "SPI_SHADER_PGM_HI_VS\0" /* 44887 */
- "SPI_SHADER_PGM_RSRC1_VS\0" /* 44908 */
- "SPI_SHADER_PGM_RSRC2_VS\0" /* 44932 */
- "SPI_SHADER_USER_DATA_VS_0\0" /* 44956 */
- "SPI_SHADER_USER_DATA_VS_1\0" /* 44982 */
- "SPI_SHADER_USER_DATA_VS_2\0" /* 45008 */
- "SPI_SHADER_USER_DATA_VS_3\0" /* 45034 */
- "SPI_SHADER_USER_DATA_VS_4\0" /* 45060 */
- "SPI_SHADER_USER_DATA_VS_5\0" /* 45086 */
- "SPI_SHADER_USER_DATA_VS_6\0" /* 45112 */
- "SPI_SHADER_USER_DATA_VS_7\0" /* 45138 */
- "SPI_SHADER_USER_DATA_VS_8\0" /* 45164 */
- "SPI_SHADER_USER_DATA_VS_9\0" /* 45190 */
- "SPI_SHADER_USER_DATA_VS_10\0" /* 45216 */
- "SPI_SHADER_USER_DATA_VS_11\0" /* 45243 */
- "SPI_SHADER_USER_DATA_VS_12\0" /* 45270 */
- "SPI_SHADER_USER_DATA_VS_13\0" /* 45297 */
- "SPI_SHADER_USER_DATA_VS_14\0" /* 45324 */
- "SPI_SHADER_USER_DATA_VS_15\0" /* 45351 */
- "SPI_SHADER_TBA_LO_GS\0" /* 45378 */
- "SPI_SHADER_TBA_HI_GS\0" /* 45399 */
- "SPI_SHADER_TMA_LO_GS\0" /* 45420 */
- "SPI_SHADER_TMA_HI_GS\0" /* 45441 */
- "SPI_SHADER_PGM_RSRC3_GS\0" /* 45462 */
- "SPI_SHADER_PGM_LO_GS\0" /* 45486 */
- "SPI_SHADER_PGM_HI_GS\0" /* 45507 */
- "SPI_SHADER_PGM_RSRC1_GS\0" /* 45528 */
- "SPI_SHADER_PGM_RSRC2_GS\0" /* 45552 */
- "SPI_SHADER_USER_DATA_GS_0\0" /* 45576 */
- "SPI_SHADER_USER_DATA_GS_1\0" /* 45602 */
- "SPI_SHADER_USER_DATA_GS_2\0" /* 45628 */
- "SPI_SHADER_USER_DATA_GS_3\0" /* 45654 */
- "SPI_SHADER_USER_DATA_GS_4\0" /* 45680 */
- "SPI_SHADER_USER_DATA_GS_5\0" /* 45706 */
- "SPI_SHADER_USER_DATA_GS_6\0" /* 45732 */
- "SPI_SHADER_USER_DATA_GS_7\0" /* 45758 */
- "SPI_SHADER_USER_DATA_GS_8\0" /* 45784 */
- "SPI_SHADER_USER_DATA_GS_9\0" /* 45810 */
- "SPI_SHADER_USER_DATA_GS_10\0" /* 45836 */
- "SPI_SHADER_USER_DATA_GS_11\0" /* 45863 */
- "SPI_SHADER_USER_DATA_GS_12\0" /* 45890 */
- "SPI_SHADER_USER_DATA_GS_13\0" /* 45917 */
- "SPI_SHADER_USER_DATA_GS_14\0" /* 45944 */
- "SPI_SHADER_USER_DATA_GS_15\0" /* 45971 */
- "SPI_SHADER_TBA_LO_ES\0" /* 45998 */
- "SPI_SHADER_TBA_HI_ES\0" /* 46019 */
- "SPI_SHADER_TMA_LO_ES\0" /* 46040 */
- "SPI_SHADER_TMA_HI_ES\0" /* 46061 */
- "SPI_SHADER_PGM_RSRC3_ES\0" /* 46082 */
- "SPI_SHADER_PGM_LO_ES\0" /* 46106 */
- "SPI_SHADER_PGM_HI_ES\0" /* 46127 */
- "SPI_SHADER_PGM_RSRC1_ES\0" /* 46148 */
- "SPI_SHADER_PGM_RSRC2_ES\0" /* 46172 */
- "SPI_SHADER_USER_DATA_ES_0\0" /* 46196 */
- "SPI_SHADER_USER_DATA_ES_1\0" /* 46222 */
- "SPI_SHADER_USER_DATA_ES_2\0" /* 46248 */
- "SPI_SHADER_USER_DATA_ES_3\0" /* 46274 */
- "SPI_SHADER_USER_DATA_ES_4\0" /* 46300 */
- "SPI_SHADER_USER_DATA_ES_5\0" /* 46326 */
- "SPI_SHADER_USER_DATA_ES_6\0" /* 46352 */
- "SPI_SHADER_USER_DATA_ES_7\0" /* 46378 */
- "SPI_SHADER_USER_DATA_ES_8\0" /* 46404 */
- "SPI_SHADER_USER_DATA_ES_9\0" /* 46430 */
- "SPI_SHADER_USER_DATA_ES_10\0" /* 46456 */
- "SPI_SHADER_USER_DATA_ES_11\0" /* 46483 */
- "SPI_SHADER_USER_DATA_ES_12\0" /* 46510 */
- "SPI_SHADER_USER_DATA_ES_13\0" /* 46537 */
- "SPI_SHADER_USER_DATA_ES_14\0" /* 46564 */
- "SPI_SHADER_USER_DATA_ES_15\0" /* 46591 */
- "SPI_SHADER_TBA_LO_HS\0" /* 46618 */
- "SPI_SHADER_TBA_HI_HS\0" /* 46639 */
- "SPI_SHADER_TMA_LO_HS\0" /* 46660 */
- "SPI_SHADER_TMA_HI_HS\0" /* 46681 */
- "SPI_SHADER_PGM_RSRC3_HS\0" /* 46702 */
- "SPI_SHADER_PGM_LO_HS\0" /* 46726 */
- "SPI_SHADER_PGM_HI_HS\0" /* 46747 */
- "SPI_SHADER_PGM_RSRC1_HS\0" /* 46768 */
- "SPI_SHADER_PGM_RSRC2_HS\0" /* 46792 */
- "SPI_SHADER_USER_DATA_HS_0\0" /* 46816 */
- "SPI_SHADER_USER_DATA_HS_1\0" /* 46842 */
- "SPI_SHADER_USER_DATA_HS_2\0" /* 46868 */
- "SPI_SHADER_USER_DATA_HS_3\0" /* 46894 */
- "SPI_SHADER_USER_DATA_HS_4\0" /* 46920 */
- "SPI_SHADER_USER_DATA_HS_5\0" /* 46946 */
- "SPI_SHADER_USER_DATA_HS_6\0" /* 46972 */
- "SPI_SHADER_USER_DATA_HS_7\0" /* 46998 */
- "SPI_SHADER_USER_DATA_HS_8\0" /* 47024 */
- "SPI_SHADER_USER_DATA_HS_9\0" /* 47050 */
- "SPI_SHADER_USER_DATA_HS_10\0" /* 47076 */
- "SPI_SHADER_USER_DATA_HS_11\0" /* 47103 */
- "SPI_SHADER_USER_DATA_HS_12\0" /* 47130 */
- "SPI_SHADER_USER_DATA_HS_13\0" /* 47157 */
- "SPI_SHADER_USER_DATA_HS_14\0" /* 47184 */
- "SPI_SHADER_USER_DATA_HS_15\0" /* 47211 */
- "SPI_SHADER_TBA_LO_LS\0" /* 47238 */
- "SPI_SHADER_TBA_HI_LS\0" /* 47259 */
- "SPI_SHADER_TMA_LO_LS\0" /* 47280 */
- "SPI_SHADER_TMA_HI_LS\0" /* 47301 */
- "SPI_SHADER_PGM_RSRC3_LS\0" /* 47322 */
- "SPI_SHADER_PGM_LO_LS\0" /* 47346 */
- "SPI_SHADER_PGM_HI_LS\0" /* 47367 */
- "SPI_SHADER_PGM_RSRC1_LS\0" /* 47388 */
- "SPI_SHADER_PGM_RSRC2_LS\0" /* 47412 */
- "SPI_SHADER_USER_DATA_LS_0\0" /* 47436 */
- "SPI_SHADER_USER_DATA_LS_1\0" /* 47462 */
- "SPI_SHADER_USER_DATA_LS_2\0" /* 47488 */
- "SPI_SHADER_USER_DATA_LS_3\0" /* 47514 */
- "SPI_SHADER_USER_DATA_LS_4\0" /* 47540 */
- "SPI_SHADER_USER_DATA_LS_5\0" /* 47566 */
- "SPI_SHADER_USER_DATA_LS_6\0" /* 47592 */
- "SPI_SHADER_USER_DATA_LS_7\0" /* 47618 */
- "SPI_SHADER_USER_DATA_LS_8\0" /* 47644 */
- "SPI_SHADER_USER_DATA_LS_9\0" /* 47670 */
- "SPI_SHADER_USER_DATA_LS_10\0" /* 47696 */
- "SPI_SHADER_USER_DATA_LS_11\0" /* 47723 */
- "SPI_SHADER_USER_DATA_LS_12\0" /* 47750 */
- "SPI_SHADER_USER_DATA_LS_13\0" /* 47777 */
- "SPI_SHADER_USER_DATA_LS_14\0" /* 47804 */
- "SPI_SHADER_USER_DATA_LS_15\0" /* 47831 */
- "COMPUTE_DISPATCH_INITIATOR\0" /* 47858 */
- "COMPUTE_DIM_X\0" /* 47885 */
- "COMPUTE_DIM_Y\0" /* 47899 */
- "COMPUTE_DIM_Z\0" /* 47913 */
- "COMPUTE_START_X\0" /* 47927 */
- "COMPUTE_START_Y\0" /* 47943 */
- "COMPUTE_START_Z\0" /* 47959 */
- "COMPUTE_NUM_THREAD_X\0" /* 47975 */
- "COMPUTE_NUM_THREAD_Y\0" /* 47996 */
- "COMPUTE_NUM_THREAD_Z\0" /* 48017 */
- "COMPUTE_MAX_WAVE_ID\0" /* 48038 */
- "COMPUTE_PIPELINESTAT_ENABLE\0" /* 48058 */
- "COMPUTE_PERFCOUNT_ENABLE\0" /* 48086 */
- "COMPUTE_PGM_LO\0" /* 48111 */
- "COMPUTE_PGM_HI\0" /* 48126 */
- "COMPUTE_TBA_LO\0" /* 48141 */
- "COMPUTE_TBA_HI\0" /* 48156 */
- "COMPUTE_TMA_LO\0" /* 48171 */
- "COMPUTE_TMA_HI\0" /* 48186 */
- "COMPUTE_PGM_RSRC1\0" /* 48201 */
- "COMPUTE_PGM_RSRC2\0" /* 48219 */
- "COMPUTE_VMID\0" /* 48237 */
- "COMPUTE_RESOURCE_LIMITS\0" /* 48250 */
- "COMPUTE_STATIC_THREAD_MGMT_SE0\0" /* 48274 */
- "COMPUTE_STATIC_THREAD_MGMT_SE1\0" /* 48305 */
- "COMPUTE_TMPRING_SIZE\0" /* 48336 */
- "COMPUTE_STATIC_THREAD_MGMT_SE2\0" /* 48357 */
- "COMPUTE_STATIC_THREAD_MGMT_SE3\0" /* 48388 */
- "COMPUTE_RESTART_X\0" /* 48419 */
- "COMPUTE_RESTART_Y\0" /* 48437 */
- "COMPUTE_RESTART_Z\0" /* 48455 */
- "COMPUTE_MISC_RESERVED\0" /* 48473 */
- "COMPUTE_DISPATCH_ID\0" /* 48495 */
- "COMPUTE_THREADGROUP_ID\0" /* 48515 */
- "COMPUTE_RELAUNCH\0" /* 48538 */
- "COMPUTE_WAVE_RESTORE_ADDR_LO\0" /* 48555 */
- "COMPUTE_WAVE_RESTORE_ADDR_HI\0" /* 48584 */
- "COMPUTE_WAVE_RESTORE_CONTROL\0" /* 48613 */
- "COMPUTE_USER_DATA_0\0" /* 48642 */
- "COMPUTE_USER_DATA_1\0" /* 48662 */
- "COMPUTE_USER_DATA_2\0" /* 48682 */
- "COMPUTE_USER_DATA_3\0" /* 48702 */
- "COMPUTE_USER_DATA_4\0" /* 48722 */
- "COMPUTE_USER_DATA_5\0" /* 48742 */
- "COMPUTE_USER_DATA_6\0" /* 48762 */
- "COMPUTE_USER_DATA_7\0" /* 48782 */
- "COMPUTE_USER_DATA_8\0" /* 48802 */
- "COMPUTE_USER_DATA_9\0" /* 48822 */
- "COMPUTE_USER_DATA_10\0" /* 48842 */
- "COMPUTE_USER_DATA_11\0" /* 48863 */
- "COMPUTE_USER_DATA_12\0" /* 48884 */
- "COMPUTE_USER_DATA_13\0" /* 48905 */
- "COMPUTE_USER_DATA_14\0" /* 48926 */
- "COMPUTE_USER_DATA_15\0" /* 48947 */
- "COMPUTE_NOWHERE\0" /* 48968 */
- "CPG_PERFCOUNTER1_LO\0" /* 48984 */
- "CPG_PERFCOUNTER1_HI\0" /* 49004 */
- "CPG_PERFCOUNTER0_LO\0" /* 49024 */
- "CPG_PERFCOUNTER0_HI\0" /* 49044 */
- "CPC_PERFCOUNTER1_LO\0" /* 49064 */
- "CPC_PERFCOUNTER1_HI\0" /* 49084 */
- "CPC_PERFCOUNTER0_LO\0" /* 49104 */
- "CPC_PERFCOUNTER0_HI\0" /* 49124 */
- "CPF_PERFCOUNTER1_LO\0" /* 49144 */
- "CPF_PERFCOUNTER1_HI\0" /* 49164 */
- "CPF_PERFCOUNTER0_LO\0" /* 49184 */
- "CPF_PERFCOUNTER0_HI\0" /* 49204 */
- "GRBM_PERFCOUNTER0_LO\0" /* 49224 */
- "GRBM_PERFCOUNTER0_HI\0" /* 49245 */
- "GRBM_PERFCOUNTER1_LO\0" /* 49266 */
- "GRBM_PERFCOUNTER1_HI\0" /* 49287 */
- "GRBM_SE0_PERFCOUNTER_LO\0" /* 49308 */
- "GRBM_SE0_PERFCOUNTER_HI\0" /* 49332 */
- "GRBM_SE1_PERFCOUNTER_LO\0" /* 49356 */
- "GRBM_SE1_PERFCOUNTER_HI\0" /* 49380 */
- "GRBM_SE2_PERFCOUNTER_LO\0" /* 49404 */
- "GRBM_SE2_PERFCOUNTER_HI\0" /* 49428 */
- "GRBM_SE3_PERFCOUNTER_LO\0" /* 49452 */
- "GRBM_SE3_PERFCOUNTER_HI\0" /* 49476 */
- "WD_PERFCOUNTER0_LO\0" /* 49500 */
- "WD_PERFCOUNTER0_HI\0" /* 49519 */
- "WD_PERFCOUNTER1_LO\0" /* 49538 */
- "WD_PERFCOUNTER1_HI\0" /* 49557 */
- "WD_PERFCOUNTER2_LO\0" /* 49576 */
- "WD_PERFCOUNTER2_HI\0" /* 49595 */
- "WD_PERFCOUNTER3_LO\0" /* 49614 */
- "WD_PERFCOUNTER3_HI\0" /* 49633 */
- "IA_PERFCOUNTER0_LO\0" /* 49652 */
- "IA_PERFCOUNTER0_HI\0" /* 49671 */
- "IA_PERFCOUNTER1_LO\0" /* 49690 */
- "IA_PERFCOUNTER1_HI\0" /* 49709 */
- "IA_PERFCOUNTER2_LO\0" /* 49728 */
- "IA_PERFCOUNTER2_HI\0" /* 49747 */
- "IA_PERFCOUNTER3_LO\0" /* 49766 */
- "IA_PERFCOUNTER3_HI\0" /* 49785 */
- "VGT_PERFCOUNTER0_LO\0" /* 49804 */
- "VGT_PERFCOUNTER0_HI\0" /* 49824 */
- "VGT_PERFCOUNTER1_LO\0" /* 49844 */
- "VGT_PERFCOUNTER1_HI\0" /* 49864 */
- "VGT_PERFCOUNTER2_LO\0" /* 49884 */
- "VGT_PERFCOUNTER2_HI\0" /* 49904 */
- "VGT_PERFCOUNTER3_LO\0" /* 49924 */
- "VGT_PERFCOUNTER3_HI\0" /* 49944 */
- "PA_SU_PERFCOUNTER0_LO\0" /* 49964 */
- "PA_SU_PERFCOUNTER0_HI\0" /* 49986 */
- "PA_SU_PERFCOUNTER1_LO\0" /* 50008 */
- "PA_SU_PERFCOUNTER1_HI\0" /* 50030 */
- "PA_SU_PERFCOUNTER2_LO\0" /* 50052 */
- "PA_SU_PERFCOUNTER2_HI\0" /* 50074 */
- "PA_SU_PERFCOUNTER3_LO\0" /* 50096 */
- "PA_SU_PERFCOUNTER3_HI\0" /* 50118 */
- "PA_SC_PERFCOUNTER0_LO\0" /* 50140 */
- "PA_SC_PERFCOUNTER0_HI\0" /* 50162 */
- "PA_SC_PERFCOUNTER1_LO\0" /* 50184 */
- "PA_SC_PERFCOUNTER1_HI\0" /* 50206 */
- "PA_SC_PERFCOUNTER2_LO\0" /* 50228 */
- "PA_SC_PERFCOUNTER2_HI\0" /* 50250 */
- "PA_SC_PERFCOUNTER3_LO\0" /* 50272 */
- "PA_SC_PERFCOUNTER3_HI\0" /* 50294 */
- "PA_SC_PERFCOUNTER4_LO\0" /* 50316 */
- "PA_SC_PERFCOUNTER4_HI\0" /* 50338 */
- "PA_SC_PERFCOUNTER5_LO\0" /* 50360 */
- "PA_SC_PERFCOUNTER5_HI\0" /* 50382 */
- "PA_SC_PERFCOUNTER6_LO\0" /* 50404 */
- "PA_SC_PERFCOUNTER6_HI\0" /* 50426 */
- "PA_SC_PERFCOUNTER7_LO\0" /* 50448 */
- "PA_SC_PERFCOUNTER7_HI\0" /* 50470 */
- "SPI_PERFCOUNTER0_HI\0" /* 50492 */
- "SPI_PERFCOUNTER0_LO\0" /* 50512 */
- "SPI_PERFCOUNTER1_HI\0" /* 50532 */
- "SPI_PERFCOUNTER1_LO\0" /* 50552 */
- "SPI_PERFCOUNTER2_HI\0" /* 50572 */
- "SPI_PERFCOUNTER2_LO\0" /* 50592 */
- "SPI_PERFCOUNTER3_HI\0" /* 50612 */
- "SPI_PERFCOUNTER3_LO\0" /* 50632 */
- "SPI_PERFCOUNTER4_HI\0" /* 50652 */
- "SPI_PERFCOUNTER4_LO\0" /* 50672 */
- "SPI_PERFCOUNTER5_HI\0" /* 50692 */
- "SPI_PERFCOUNTER5_LO\0" /* 50712 */
- "SQ_PERFCOUNTER0_LO\0" /* 50732 */
- "SQ_PERFCOUNTER0_HI\0" /* 50751 */
- "SQ_PERFCOUNTER1_LO\0" /* 50770 */
- "SQ_PERFCOUNTER1_HI\0" /* 50789 */
- "SQ_PERFCOUNTER2_LO\0" /* 50808 */
- "SQ_PERFCOUNTER2_HI\0" /* 50827 */
- "SQ_PERFCOUNTER3_LO\0" /* 50846 */
- "SQ_PERFCOUNTER3_HI\0" /* 50865 */
- "SQ_PERFCOUNTER4_LO\0" /* 50884 */
- "SQ_PERFCOUNTER4_HI\0" /* 50903 */
- "SQ_PERFCOUNTER5_LO\0" /* 50922 */
- "SQ_PERFCOUNTER5_HI\0" /* 50941 */
- "SQ_PERFCOUNTER6_LO\0" /* 50960 */
- "SQ_PERFCOUNTER6_HI\0" /* 50979 */
- "SQ_PERFCOUNTER7_LO\0" /* 50998 */
- "SQ_PERFCOUNTER7_HI\0" /* 51017 */
- "SQ_PERFCOUNTER8_LO\0" /* 51036 */
- "SQ_PERFCOUNTER8_HI\0" /* 51055 */
- "SQ_PERFCOUNTER9_LO\0" /* 51074 */
- "SQ_PERFCOUNTER9_HI\0" /* 51093 */
- "SQ_PERFCOUNTER10_LO\0" /* 51112 */
- "SQ_PERFCOUNTER10_HI\0" /* 51132 */
- "SQ_PERFCOUNTER11_LO\0" /* 51152 */
- "SQ_PERFCOUNTER11_HI\0" /* 51172 */
- "SQ_PERFCOUNTER12_LO\0" /* 51192 */
- "SQ_PERFCOUNTER12_HI\0" /* 51212 */
- "SQ_PERFCOUNTER13_LO\0" /* 51232 */
- "SQ_PERFCOUNTER13_HI\0" /* 51252 */
- "SQ_PERFCOUNTER14_LO\0" /* 51272 */
- "SQ_PERFCOUNTER14_HI\0" /* 51292 */
- "SQ_PERFCOUNTER15_LO\0" /* 51312 */
- "SQ_PERFCOUNTER15_HI\0" /* 51332 */
- "SX_PERFCOUNTER0_LO\0" /* 51352 */
- "SX_PERFCOUNTER0_HI\0" /* 51371 */
- "SX_PERFCOUNTER1_LO\0" /* 51390 */
- "SX_PERFCOUNTER1_HI\0" /* 51409 */
- "SX_PERFCOUNTER2_LO\0" /* 51428 */
- "SX_PERFCOUNTER2_HI\0" /* 51447 */
- "SX_PERFCOUNTER3_LO\0" /* 51466 */
- "SX_PERFCOUNTER3_HI\0" /* 51485 */
- "GDS_PERFCOUNTER0_LO\0" /* 51504 */
- "GDS_PERFCOUNTER0_HI\0" /* 51524 */
- "GDS_PERFCOUNTER1_LO\0" /* 51544 */
- "GDS_PERFCOUNTER1_HI\0" /* 51564 */
- "GDS_PERFCOUNTER2_LO\0" /* 51584 */
- "GDS_PERFCOUNTER2_HI\0" /* 51604 */
- "GDS_PERFCOUNTER3_LO\0" /* 51624 */
- "GDS_PERFCOUNTER3_HI\0" /* 51644 */
- "TA_PERFCOUNTER0_LO\0" /* 51664 */
- "TA_PERFCOUNTER0_HI\0" /* 51683 */
- "TA_PERFCOUNTER1_LO\0" /* 51702 */
- "TA_PERFCOUNTER1_HI\0" /* 51721 */
- "TD_PERFCOUNTER0_LO\0" /* 51740 */
- "TD_PERFCOUNTER0_HI\0" /* 51759 */
- "TD_PERFCOUNTER1_LO\0" /* 51778 */
- "TD_PERFCOUNTER1_HI\0" /* 51797 */
- "TCP_PERFCOUNTER0_LO\0" /* 51816 */
- "TCP_PERFCOUNTER0_HI\0" /* 51836 */
- "TCP_PERFCOUNTER1_LO\0" /* 51856 */
- "TCP_PERFCOUNTER1_HI\0" /* 51876 */
- "TCP_PERFCOUNTER2_LO\0" /* 51896 */
- "TCP_PERFCOUNTER2_HI\0" /* 51916 */
- "TCP_PERFCOUNTER3_LO\0" /* 51936 */
- "TCP_PERFCOUNTER3_HI\0" /* 51956 */
- "TCC_PERFCOUNTER0_LO\0" /* 51976 */
- "TCC_PERFCOUNTER0_HI\0" /* 51996 */
- "TCC_PERFCOUNTER1_LO\0" /* 52016 */
- "TCC_PERFCOUNTER1_HI\0" /* 52036 */
- "TCC_PERFCOUNTER2_LO\0" /* 52056 */
- "TCC_PERFCOUNTER2_HI\0" /* 52076 */
- "TCC_PERFCOUNTER3_LO\0" /* 52096 */
- "TCC_PERFCOUNTER3_HI\0" /* 52116 */
- "TCA_PERFCOUNTER0_LO\0" /* 52136 */
- "TCA_PERFCOUNTER0_HI\0" /* 52156 */
- "TCA_PERFCOUNTER1_LO\0" /* 52176 */
- "TCA_PERFCOUNTER1_HI\0" /* 52196 */
- "TCA_PERFCOUNTER2_LO\0" /* 52216 */
- "TCA_PERFCOUNTER2_HI\0" /* 52236 */
- "TCA_PERFCOUNTER3_LO\0" /* 52256 */
- "TCA_PERFCOUNTER3_HI\0" /* 52276 */
- "CB_PERFCOUNTER0_LO\0" /* 52296 */
- "CB_PERFCOUNTER0_HI\0" /* 52315 */
- "CB_PERFCOUNTER1_LO\0" /* 52334 */
- "CB_PERFCOUNTER1_HI\0" /* 52353 */
- "CB_PERFCOUNTER2_LO\0" /* 52372 */
- "CB_PERFCOUNTER2_HI\0" /* 52391 */
- "CB_PERFCOUNTER3_LO\0" /* 52410 */
- "CB_PERFCOUNTER3_HI\0" /* 52429 */
- "DB_PERFCOUNTER0_LO\0" /* 52448 */
- "DB_PERFCOUNTER0_HI\0" /* 52467 */
- "DB_PERFCOUNTER1_LO\0" /* 52486 */
- "DB_PERFCOUNTER1_HI\0" /* 52505 */
- "DB_PERFCOUNTER2_LO\0" /* 52524 */
- "DB_PERFCOUNTER2_HI\0" /* 52543 */
- "DB_PERFCOUNTER3_LO\0" /* 52562 */
- "DB_PERFCOUNTER3_HI\0" /* 52581 */
- "RLC_PERFCOUNTER0_LO\0" /* 52600 */
- "RLC_PERFCOUNTER0_HI\0" /* 52620 */
- "RLC_PERFCOUNTER1_LO\0" /* 52640 */
- "RLC_PERFCOUNTER1_HI\0" /* 52660 */
- "CPG_PERFCOUNTER1_SELECT\0" /* 52680 */
- "CPG_PERFCOUNTER0_SELECT1\0" /* 52704 */
- "CPG_PERFCOUNTER0_SELECT\0" /* 52729 */
- "CPC_PERFCOUNTER1_SELECT\0" /* 52753 */
- "CPC_PERFCOUNTER0_SELECT1\0" /* 52777 */
- "CPF_PERFCOUNTER1_SELECT\0" /* 52802 */
- "CPF_PERFCOUNTER0_SELECT1\0" /* 52826 */
- "CPF_PERFCOUNTER0_SELECT\0" /* 52851 */
- "CP_PERFMON_CNTL\0" /* 52875 */
- "CPC_PERFCOUNTER0_SELECT\0" /* 52891 */
- "GRBM_PERFCOUNTER0_SELECT\0" /* 52915 */
- "GRBM_PERFCOUNTER1_SELECT\0" /* 52940 */
- "GRBM_SE0_PERFCOUNTER_SELECT\0" /* 52965 */
- "GRBM_SE1_PERFCOUNTER_SELECT\0" /* 52993 */
- "GRBM_SE2_PERFCOUNTER_SELECT\0" /* 53021 */
- "GRBM_SE3_PERFCOUNTER_SELECT\0" /* 53049 */
- "WD_PERFCOUNTER0_SELECT\0" /* 53077 */
- "WD_PERFCOUNTER1_SELECT\0" /* 53100 */
- "WD_PERFCOUNTER2_SELECT\0" /* 53123 */
- "WD_PERFCOUNTER3_SELECT\0" /* 53146 */
- "IA_PERFCOUNTER0_SELECT\0" /* 53169 */
- "IA_PERFCOUNTER1_SELECT\0" /* 53192 */
- "IA_PERFCOUNTER2_SELECT\0" /* 53215 */
- "IA_PERFCOUNTER3_SELECT\0" /* 53238 */
- "IA_PERFCOUNTER0_SELECT1\0" /* 53261 */
- "VGT_PERFCOUNTER0_SELECT\0" /* 53285 */
- "VGT_PERFCOUNTER1_SELECT\0" /* 53309 */
- "VGT_PERFCOUNTER2_SELECT\0" /* 53333 */
- "VGT_PERFCOUNTER3_SELECT\0" /* 53357 */
- "VGT_PERFCOUNTER0_SELECT1\0" /* 53381 */
- "VGT_PERFCOUNTER1_SELECT1\0" /* 53406 */
- "VGT_PERFCOUNTER_SEID_MASK\0" /* 53431 */
- "PA_SU_PERFCOUNTER0_SELECT\0" /* 53457 */
- "PA_SU_PERFCOUNTER0_SELECT1\0" /* 53483 */
- "PA_SU_PERFCOUNTER1_SELECT\0" /* 53510 */
- "PA_SU_PERFCOUNTER1_SELECT1\0" /* 53536 */
- "PA_SU_PERFCOUNTER2_SELECT\0" /* 53563 */
- "PA_SU_PERFCOUNTER3_SELECT\0" /* 53589 */
- "PA_SC_PERFCOUNTER0_SELECT\0" /* 53615 */
- "PA_SC_PERFCOUNTER0_SELECT1\0" /* 53641 */
- "PA_SC_PERFCOUNTER1_SELECT\0" /* 53668 */
- "PA_SC_PERFCOUNTER2_SELECT\0" /* 53694 */
- "PA_SC_PERFCOUNTER3_SELECT\0" /* 53720 */
- "PA_SC_PERFCOUNTER4_SELECT\0" /* 53746 */
- "PA_SC_PERFCOUNTER5_SELECT\0" /* 53772 */
- "PA_SC_PERFCOUNTER6_SELECT\0" /* 53798 */
- "PA_SC_PERFCOUNTER7_SELECT\0" /* 53824 */
- "SPI_PERFCOUNTER0_SELECT\0" /* 53850 */
- "SPI_PERFCOUNTER1_SELECT\0" /* 53874 */
- "SPI_PERFCOUNTER2_SELECT\0" /* 53898 */
- "SPI_PERFCOUNTER3_SELECT\0" /* 53922 */
- "SPI_PERFCOUNTER0_SELECT1\0" /* 53946 */
- "SPI_PERFCOUNTER1_SELECT1\0" /* 53971 */
- "SPI_PERFCOUNTER2_SELECT1\0" /* 53996 */
- "SPI_PERFCOUNTER3_SELECT1\0" /* 54021 */
- "SPI_PERFCOUNTER4_SELECT\0" /* 54046 */
- "SPI_PERFCOUNTER5_SELECT\0" /* 54070 */
- "SPI_PERFCOUNTER_BINS\0" /* 54094 */
- "SQ_PERFCOUNTER0_SELECT\0" /* 54115 */
- "SQ_PERFCOUNTER1_SELECT\0" /* 54138 */
- "SQ_PERFCOUNTER2_SELECT\0" /* 54161 */
- "SQ_PERFCOUNTER3_SELECT\0" /* 54184 */
- "SQ_PERFCOUNTER4_SELECT\0" /* 54207 */
- "SQ_PERFCOUNTER5_SELECT\0" /* 54230 */
- "SQ_PERFCOUNTER6_SELECT\0" /* 54253 */
- "SQ_PERFCOUNTER7_SELECT\0" /* 54276 */
- "SQ_PERFCOUNTER8_SELECT\0" /* 54299 */
- "SQ_PERFCOUNTER9_SELECT\0" /* 54322 */
- "SQ_PERFCOUNTER10_SELECT\0" /* 54345 */
- "SQ_PERFCOUNTER11_SELECT\0" /* 54369 */
- "SQ_PERFCOUNTER12_SELECT\0" /* 54393 */
- "SQ_PERFCOUNTER13_SELECT\0" /* 54417 */
- "SQ_PERFCOUNTER14_SELECT\0" /* 54441 */
- "SQ_PERFCOUNTER15_SELECT\0" /* 54465 */
- "SQ_PERFCOUNTER_CTRL\0" /* 54489 */
- "SQ_PERFCOUNTER_MASK\0" /* 54509 */
- "SQ_PERFCOUNTER_CTRL2\0" /* 54529 */
- "SX_PERFCOUNTER0_SELECT\0" /* 54550 */
- "SX_PERFCOUNTER1_SELECT\0" /* 54573 */
- "SX_PERFCOUNTER2_SELECT\0" /* 54596 */
- "SX_PERFCOUNTER3_SELECT\0" /* 54619 */
- "SX_PERFCOUNTER0_SELECT1\0" /* 54642 */
- "SX_PERFCOUNTER1_SELECT1\0" /* 54666 */
- "GDS_PERFCOUNTER0_SELECT\0" /* 54690 */
- "GDS_PERFCOUNTER1_SELECT\0" /* 54714 */
- "GDS_PERFCOUNTER2_SELECT\0" /* 54738 */
- "GDS_PERFCOUNTER3_SELECT\0" /* 54762 */
- "GDS_PERFCOUNTER0_SELECT1\0" /* 54786 */
- "TA_PERFCOUNTER0_SELECT\0" /* 54811 */
- "TA_PERFCOUNTER0_SELECT1\0" /* 54834 */
- "TA_PERFCOUNTER1_SELECT\0" /* 54858 */
- "TD_PERFCOUNTER0_SELECT\0" /* 54881 */
- "TD_PERFCOUNTER0_SELECT1\0" /* 54904 */
- "TD_PERFCOUNTER1_SELECT\0" /* 54928 */
- "TCP_PERFCOUNTER0_SELECT\0" /* 54951 */
- "TCP_PERFCOUNTER0_SELECT1\0" /* 54975 */
- "TCP_PERFCOUNTER1_SELECT\0" /* 55000 */
- "TCP_PERFCOUNTER1_SELECT1\0" /* 55024 */
- "TCP_PERFCOUNTER2_SELECT\0" /* 55049 */
- "TCP_PERFCOUNTER3_SELECT\0" /* 55073 */
- "TCC_PERFCOUNTER0_SELECT\0" /* 55097 */
- "TCC_PERFCOUNTER0_SELECT1\0" /* 55121 */
- "TCC_PERFCOUNTER1_SELECT\0" /* 55146 */
- "TCC_PERFCOUNTER1_SELECT1\0" /* 55170 */
- "TCC_PERFCOUNTER2_SELECT\0" /* 55195 */
- "TCC_PERFCOUNTER3_SELECT\0" /* 55219 */
- "TCA_PERFCOUNTER0_SELECT\0" /* 55243 */
- "TCA_PERFCOUNTER0_SELECT1\0" /* 55267 */
- "TCA_PERFCOUNTER1_SELECT\0" /* 55292 */
- "TCA_PERFCOUNTER1_SELECT1\0" /* 55316 */
- "TCA_PERFCOUNTER2_SELECT\0" /* 55341 */
- "TCA_PERFCOUNTER3_SELECT\0" /* 55365 */
- "CB_PERFCOUNTER_FILTER\0" /* 55389 */
- "CB_PERFCOUNTER0_SELECT\0" /* 55411 */
- "CB_PERFCOUNTER0_SELECT1\0" /* 55434 */
- "CB_PERFCOUNTER1_SELECT\0" /* 55458 */
- "CB_PERFCOUNTER2_SELECT\0" /* 55481 */
- "CB_PERFCOUNTER3_SELECT\0" /* 55504 */
- "DB_PERFCOUNTER0_SELECT\0" /* 55527 */
- "DB_PERFCOUNTER0_SELECT1\0" /* 55550 */
- "DB_PERFCOUNTER1_SELECT\0" /* 55574 */
- "DB_PERFCOUNTER1_SELECT1\0" /* 55597 */
- "DB_PERFCOUNTER2_SELECT\0" /* 55621 */
- "DB_PERFCOUNTER3_SELECT\0" /* 55644 */
- "DB_RENDER_CONTROL\0" /* 55667 */
- "DB_COUNT_CONTROL\0" /* 55685 */
- "DB_DEPTH_VIEW\0" /* 55702 */
- "DB_RENDER_OVERRIDE\0" /* 55716 */
- "DB_RENDER_OVERRIDE2\0" /* 55735 */
- "DB_HTILE_DATA_BASE\0" /* 55755 */
- "DB_DEPTH_BOUNDS_MIN\0" /* 55774 */
- "DB_DEPTH_BOUNDS_MAX\0" /* 55794 */
- "DB_STENCIL_CLEAR\0" /* 55814 */
- "DB_DEPTH_CLEAR\0" /* 55831 */
- "PA_SC_SCREEN_SCISSOR_TL\0" /* 55846 */
- "PA_SC_SCREEN_SCISSOR_BR\0" /* 55870 */
- "DB_DEPTH_INFO\0" /* 55894 */
- "DB_Z_INFO\0" /* 55908 */
- "DB_STENCIL_INFO\0" /* 55918 */
- "DB_Z_READ_BASE\0" /* 55934 */
- "DB_STENCIL_READ_BASE\0" /* 55949 */
- "DB_Z_WRITE_BASE\0" /* 55970 */
- "DB_STENCIL_WRITE_BASE\0" /* 55986 */
- "DB_DEPTH_SIZE\0" /* 56008 */
- "DB_DEPTH_SLICE\0" /* 56022 */
- "TA_BC_BASE_ADDR\0" /* 56037 */
- "TA_BC_BASE_ADDR_HI\0" /* 56053 */
- "COHER_DEST_BASE_HI_0\0" /* 56072 */
- "COHER_DEST_BASE_HI_1\0" /* 56093 */
- "COHER_DEST_BASE_HI_2\0" /* 56114 */
- "COHER_DEST_BASE_HI_3\0" /* 56135 */
- "COHER_DEST_BASE_2\0" /* 56156 */
- "COHER_DEST_BASE_3\0" /* 56174 */
- "PA_SC_WINDOW_OFFSET\0" /* 56192 */
- "PA_SC_WINDOW_SCISSOR_TL\0" /* 56212 */
- "PA_SC_WINDOW_SCISSOR_BR\0" /* 56236 */
- "PA_SC_CLIPRECT_RULE\0" /* 56260 */
- "PA_SC_CLIPRECT_0_TL\0" /* 56280 */
- "PA_SC_CLIPRECT_0_BR\0" /* 56300 */
- "PA_SC_CLIPRECT_1_TL\0" /* 56320 */
- "PA_SC_CLIPRECT_1_BR\0" /* 56340 */
- "PA_SC_CLIPRECT_2_TL\0" /* 56360 */
- "PA_SC_CLIPRECT_2_BR\0" /* 56380 */
- "PA_SC_CLIPRECT_3_TL\0" /* 56400 */
- "PA_SC_CLIPRECT_3_BR\0" /* 56420 */
- "PA_SC_EDGERULE\0" /* 56440 */
- "PA_SU_HARDWARE_SCREEN_OFFSET\0" /* 56455 */
- "CB_TARGET_MASK\0" /* 56484 */
- "CB_SHADER_MASK\0" /* 56499 */
- "PA_SC_GENERIC_SCISSOR_TL\0" /* 56514 */
- "PA_SC_GENERIC_SCISSOR_BR\0" /* 56539 */
- "COHER_DEST_BASE_0\0" /* 56564 */
- "COHER_DEST_BASE_1\0" /* 56582 */
- "PA_SC_VPORT_SCISSOR_0_TL\0" /* 56600 */
- "PA_SC_VPORT_SCISSOR_0_BR\0" /* 56625 */
- "PA_SC_VPORT_SCISSOR_1_TL\0" /* 56650 */
- "PA_SC_VPORT_SCISSOR_1_BR\0" /* 56675 */
- "PA_SC_VPORT_SCISSOR_2_TL\0" /* 56700 */
- "PA_SC_VPORT_SCISSOR_2_BR\0" /* 56725 */
- "PA_SC_VPORT_SCISSOR_3_TL\0" /* 56750 */
- "PA_SC_VPORT_SCISSOR_3_BR\0" /* 56775 */
- "PA_SC_VPORT_SCISSOR_4_TL\0" /* 56800 */
- "PA_SC_VPORT_SCISSOR_4_BR\0" /* 56825 */
- "PA_SC_VPORT_SCISSOR_5_TL\0" /* 56850 */
- "PA_SC_VPORT_SCISSOR_5_BR\0" /* 56875 */
- "PA_SC_VPORT_SCISSOR_6_TL\0" /* 56900 */
- "PA_SC_VPORT_SCISSOR_6_BR\0" /* 56925 */
- "PA_SC_VPORT_SCISSOR_7_TL\0" /* 56950 */
- "PA_SC_VPORT_SCISSOR_7_BR\0" /* 56975 */
- "PA_SC_VPORT_SCISSOR_8_TL\0" /* 57000 */
- "PA_SC_VPORT_SCISSOR_8_BR\0" /* 57025 */
- "PA_SC_VPORT_SCISSOR_9_TL\0" /* 57050 */
- "PA_SC_VPORT_SCISSOR_9_BR\0" /* 57075 */
- "PA_SC_VPORT_SCISSOR_10_TL\0" /* 57100 */
- "PA_SC_VPORT_SCISSOR_10_BR\0" /* 57126 */
- "PA_SC_VPORT_SCISSOR_11_TL\0" /* 57152 */
- "PA_SC_VPORT_SCISSOR_11_BR\0" /* 57178 */
- "PA_SC_VPORT_SCISSOR_12_TL\0" /* 57204 */
- "PA_SC_VPORT_SCISSOR_12_BR\0" /* 57230 */
- "PA_SC_VPORT_SCISSOR_13_TL\0" /* 57256 */
- "PA_SC_VPORT_SCISSOR_13_BR\0" /* 57282 */
- "PA_SC_VPORT_SCISSOR_14_TL\0" /* 57308 */
- "PA_SC_VPORT_SCISSOR_14_BR\0" /* 57334 */
- "PA_SC_VPORT_SCISSOR_15_TL\0" /* 57360 */
- "PA_SC_VPORT_SCISSOR_15_BR\0" /* 57386 */
- "PA_SC_VPORT_ZMIN_0\0" /* 57412 */
- "PA_SC_VPORT_ZMAX_0\0" /* 57431 */
- "PA_SC_VPORT_ZMIN_1\0" /* 57450 */
- "PA_SC_VPORT_ZMAX_1\0" /* 57469 */
- "PA_SC_VPORT_ZMIN_2\0" /* 57488 */
- "PA_SC_VPORT_ZMAX_2\0" /* 57507 */
- "PA_SC_VPORT_ZMIN_3\0" /* 57526 */
- "PA_SC_VPORT_ZMAX_3\0" /* 57545 */
- "PA_SC_VPORT_ZMIN_4\0" /* 57564 */
- "PA_SC_VPORT_ZMAX_4\0" /* 57583 */
- "PA_SC_VPORT_ZMIN_5\0" /* 57602 */
- "PA_SC_VPORT_ZMAX_5\0" /* 57621 */
- "PA_SC_VPORT_ZMIN_6\0" /* 57640 */
- "PA_SC_VPORT_ZMAX_6\0" /* 57659 */
- "PA_SC_VPORT_ZMIN_7\0" /* 57678 */
- "PA_SC_VPORT_ZMAX_7\0" /* 57697 */
- "PA_SC_VPORT_ZMIN_8\0" /* 57716 */
- "PA_SC_VPORT_ZMAX_8\0" /* 57735 */
- "PA_SC_VPORT_ZMIN_9\0" /* 57754 */
- "PA_SC_VPORT_ZMAX_9\0" /* 57773 */
- "PA_SC_VPORT_ZMIN_10\0" /* 57792 */
- "PA_SC_VPORT_ZMAX_10\0" /* 57812 */
- "PA_SC_VPORT_ZMIN_11\0" /* 57832 */
- "PA_SC_VPORT_ZMAX_11\0" /* 57852 */
- "PA_SC_VPORT_ZMIN_12\0" /* 57872 */
- "PA_SC_VPORT_ZMAX_12\0" /* 57892 */
- "PA_SC_VPORT_ZMIN_13\0" /* 57912 */
- "PA_SC_VPORT_ZMAX_13\0" /* 57932 */
- "PA_SC_VPORT_ZMIN_14\0" /* 57952 */
- "PA_SC_VPORT_ZMAX_14\0" /* 57972 */
- "PA_SC_VPORT_ZMIN_15\0" /* 57992 */
- "PA_SC_VPORT_ZMAX_15\0" /* 58012 */
- "PA_SC_RASTER_CONFIG\0" /* 58032 */
- "PA_SC_RASTER_CONFIG_1\0" /* 58052 */
- "PA_SC_SCREEN_EXTENT_CONTROL\0" /* 58074 */
- "VGT_MAX_VTX_INDX\0" /* 58102 */
- "VGT_MIN_VTX_INDX\0" /* 58119 */
- "VGT_INDX_OFFSET\0" /* 58136 */
- "VGT_MULTI_PRIM_IB_RESET_INDX\0" /* 58152 */
- "CB_BLEND_RED\0" /* 58181 */
- "CB_BLEND_GREEN\0" /* 58194 */
- "CB_BLEND_BLUE\0" /* 58209 */
- "CB_BLEND_ALPHA\0" /* 58223 */
- "CB_DCC_CONTROL\0" /* 58238 */
- "DB_STENCIL_CONTROL\0" /* 58253 */
- "DB_STENCILREFMASK\0" /* 58272 */
- "DB_STENCILREFMASK_BF\0" /* 58290 */
- "PA_CL_VPORT_XSCALE\0" /* 58311 */
- "PA_CL_VPORT_XOFFSET\0" /* 58330 */
- "PA_CL_VPORT_YSCALE\0" /* 58350 */
- "PA_CL_VPORT_YOFFSET\0" /* 58369 */
- "PA_CL_VPORT_ZSCALE\0" /* 58389 */
- "PA_CL_VPORT_ZOFFSET\0" /* 58408 */
- "PA_CL_VPORT_XSCALE_1\0" /* 58428 */
- "PA_CL_VPORT_XOFFSET_1\0" /* 58449 */
- "PA_CL_VPORT_YSCALE_1\0" /* 58471 */
- "PA_CL_VPORT_YOFFSET_1\0" /* 58492 */
- "PA_CL_VPORT_ZSCALE_1\0" /* 58514 */
- "PA_CL_VPORT_ZOFFSET_1\0" /* 58535 */
- "PA_CL_VPORT_XSCALE_2\0" /* 58557 */
- "PA_CL_VPORT_XOFFSET_2\0" /* 58578 */
- "PA_CL_VPORT_YSCALE_2\0" /* 58600 */
- "PA_CL_VPORT_YOFFSET_2\0" /* 58621 */
- "PA_CL_VPORT_ZSCALE_2\0" /* 58643 */
- "PA_CL_VPORT_ZOFFSET_2\0" /* 58664 */
- "PA_CL_VPORT_XSCALE_3\0" /* 58686 */
- "PA_CL_VPORT_XOFFSET_3\0" /* 58707 */
- "PA_CL_VPORT_YSCALE_3\0" /* 58729 */
- "PA_CL_VPORT_YOFFSET_3\0" /* 58750 */
- "PA_CL_VPORT_ZSCALE_3\0" /* 58772 */
- "PA_CL_VPORT_ZOFFSET_3\0" /* 58793 */
- "PA_CL_VPORT_XSCALE_4\0" /* 58815 */
- "PA_CL_VPORT_XOFFSET_4\0" /* 58836 */
- "PA_CL_VPORT_YSCALE_4\0" /* 58858 */
- "PA_CL_VPORT_YOFFSET_4\0" /* 58879 */
- "PA_CL_VPORT_ZSCALE_4\0" /* 58901 */
- "PA_CL_VPORT_ZOFFSET_4\0" /* 58922 */
- "PA_CL_VPORT_XSCALE_5\0" /* 58944 */
- "PA_CL_VPORT_XOFFSET_5\0" /* 58965 */
- "PA_CL_VPORT_YSCALE_5\0" /* 58987 */
- "PA_CL_VPORT_YOFFSET_5\0" /* 59008 */
- "PA_CL_VPORT_ZSCALE_5\0" /* 59030 */
- "PA_CL_VPORT_ZOFFSET_5\0" /* 59051 */
- "PA_CL_VPORT_XSCALE_6\0" /* 59073 */
- "PA_CL_VPORT_XOFFSET_6\0" /* 59094 */
- "PA_CL_VPORT_YSCALE_6\0" /* 59116 */
- "PA_CL_VPORT_YOFFSET_6\0" /* 59137 */
- "PA_CL_VPORT_ZSCALE_6\0" /* 59159 */
- "PA_CL_VPORT_ZOFFSET_6\0" /* 59180 */
- "PA_CL_VPORT_XSCALE_7\0" /* 59202 */
- "PA_CL_VPORT_XOFFSET_7\0" /* 59223 */
- "PA_CL_VPORT_YSCALE_7\0" /* 59245 */
- "PA_CL_VPORT_YOFFSET_7\0" /* 59266 */
- "PA_CL_VPORT_ZSCALE_7\0" /* 59288 */
- "PA_CL_VPORT_ZOFFSET_7\0" /* 59309 */
- "PA_CL_VPORT_XSCALE_8\0" /* 59331 */
- "PA_CL_VPORT_XOFFSET_8\0" /* 59352 */
- "PA_CL_VPORT_YSCALE_8\0" /* 59374 */
- "PA_CL_VPORT_YOFFSET_8\0" /* 59395 */
- "PA_CL_VPORT_ZSCALE_8\0" /* 59417 */
- "PA_CL_VPORT_ZOFFSET_8\0" /* 59438 */
- "PA_CL_VPORT_XSCALE_9\0" /* 59460 */
- "PA_CL_VPORT_XOFFSET_9\0" /* 59481 */
- "PA_CL_VPORT_YSCALE_9\0" /* 59503 */
- "PA_CL_VPORT_YOFFSET_9\0" /* 59524 */
- "PA_CL_VPORT_ZSCALE_9\0" /* 59546 */
- "PA_CL_VPORT_ZOFFSET_9\0" /* 59567 */
- "PA_CL_VPORT_XSCALE_10\0" /* 59589 */
- "PA_CL_VPORT_XOFFSET_10\0" /* 59611 */
- "PA_CL_VPORT_YSCALE_10\0" /* 59634 */
- "PA_CL_VPORT_YOFFSET_10\0" /* 59656 */
- "PA_CL_VPORT_ZSCALE_10\0" /* 59679 */
- "PA_CL_VPORT_ZOFFSET_10\0" /* 59701 */
- "PA_CL_VPORT_XSCALE_11\0" /* 59724 */
- "PA_CL_VPORT_XOFFSET_11\0" /* 59746 */
- "PA_CL_VPORT_YSCALE_11\0" /* 59769 */
- "PA_CL_VPORT_YOFFSET_11\0" /* 59791 */
- "PA_CL_VPORT_ZSCALE_11\0" /* 59814 */
- "PA_CL_VPORT_ZOFFSET_11\0" /* 59836 */
- "PA_CL_VPORT_XSCALE_12\0" /* 59859 */
- "PA_CL_VPORT_XOFFSET_12\0" /* 59881 */
- "PA_CL_VPORT_YSCALE_12\0" /* 59904 */
- "PA_CL_VPORT_YOFFSET_12\0" /* 59926 */
- "PA_CL_VPORT_ZSCALE_12\0" /* 59949 */
- "PA_CL_VPORT_ZOFFSET_12\0" /* 59971 */
- "PA_CL_VPORT_XSCALE_13\0" /* 59994 */
- "PA_CL_VPORT_XOFFSET_13\0" /* 60016 */
- "PA_CL_VPORT_YSCALE_13\0" /* 60039 */
- "PA_CL_VPORT_YOFFSET_13\0" /* 60061 */
- "PA_CL_VPORT_ZSCALE_13\0" /* 60084 */
- "PA_CL_VPORT_ZOFFSET_13\0" /* 60106 */
- "PA_CL_VPORT_XSCALE_14\0" /* 60129 */
- "PA_CL_VPORT_XOFFSET_14\0" /* 60151 */
- "PA_CL_VPORT_YSCALE_14\0" /* 60174 */
- "PA_CL_VPORT_YOFFSET_14\0" /* 60196 */
- "PA_CL_VPORT_ZSCALE_14\0" /* 60219 */
- "PA_CL_VPORT_ZOFFSET_14\0" /* 60241 */
- "PA_CL_VPORT_XSCALE_15\0" /* 60264 */
- "PA_CL_VPORT_XOFFSET_15\0" /* 60286 */
- "PA_CL_VPORT_YSCALE_15\0" /* 60309 */
- "PA_CL_VPORT_YOFFSET_15\0" /* 60331 */
- "PA_CL_VPORT_ZSCALE_15\0" /* 60354 */
- "PA_CL_VPORT_ZOFFSET_15\0" /* 60376 */
- "PA_CL_UCP_0_X\0" /* 60399 */
- "PA_CL_UCP_0_Y\0" /* 60413 */
- "PA_CL_UCP_0_Z\0" /* 60427 */
- "PA_CL_UCP_0_W\0" /* 60441 */
- "PA_CL_UCP_1_X\0" /* 60455 */
- "PA_CL_UCP_1_Y\0" /* 60469 */
- "PA_CL_UCP_1_Z\0" /* 60483 */
- "PA_CL_UCP_1_W\0" /* 60497 */
- "PA_CL_UCP_2_X\0" /* 60511 */
- "PA_CL_UCP_2_Y\0" /* 60525 */
- "PA_CL_UCP_2_Z\0" /* 60539 */
- "PA_CL_UCP_2_W\0" /* 60553 */
- "PA_CL_UCP_3_X\0" /* 60567 */
- "PA_CL_UCP_3_Y\0" /* 60581 */
- "PA_CL_UCP_3_Z\0" /* 60595 */
- "PA_CL_UCP_3_W\0" /* 60609 */
- "PA_CL_UCP_4_X\0" /* 60623 */
- "PA_CL_UCP_4_Y\0" /* 60637 */
- "PA_CL_UCP_4_Z\0" /* 60651 */
- "PA_CL_UCP_4_W\0" /* 60665 */
- "PA_CL_UCP_5_X\0" /* 60679 */
- "PA_CL_UCP_5_Y\0" /* 60693 */
- "PA_CL_UCP_5_Z\0" /* 60707 */
- "PA_CL_UCP_5_W\0" /* 60721 */
- "SPI_PS_INPUT_CNTL_0\0" /* 60735 */
- "SPI_PS_INPUT_CNTL_1\0" /* 60755 */
- "SPI_PS_INPUT_CNTL_2\0" /* 60775 */
- "SPI_PS_INPUT_CNTL_3\0" /* 60795 */
- "SPI_PS_INPUT_CNTL_4\0" /* 60815 */
- "SPI_PS_INPUT_CNTL_5\0" /* 60835 */
- "SPI_PS_INPUT_CNTL_6\0" /* 60855 */
- "SPI_PS_INPUT_CNTL_7\0" /* 60875 */
- "SPI_PS_INPUT_CNTL_8\0" /* 60895 */
- "SPI_PS_INPUT_CNTL_9\0" /* 60915 */
- "SPI_PS_INPUT_CNTL_10\0" /* 60935 */
- "SPI_PS_INPUT_CNTL_11\0" /* 60956 */
- "SPI_PS_INPUT_CNTL_12\0" /* 60977 */
- "SPI_PS_INPUT_CNTL_13\0" /* 60998 */
- "SPI_PS_INPUT_CNTL_14\0" /* 61019 */
- "SPI_PS_INPUT_CNTL_15\0" /* 61040 */
- "SPI_PS_INPUT_CNTL_16\0" /* 61061 */
- "SPI_PS_INPUT_CNTL_17\0" /* 61082 */
- "SPI_PS_INPUT_CNTL_18\0" /* 61103 */
- "SPI_PS_INPUT_CNTL_19\0" /* 61124 */
- "SPI_PS_INPUT_CNTL_20\0" /* 61145 */
- "SPI_PS_INPUT_CNTL_21\0" /* 61166 */
- "SPI_PS_INPUT_CNTL_22\0" /* 61187 */
- "SPI_PS_INPUT_CNTL_23\0" /* 61208 */
- "SPI_PS_INPUT_CNTL_24\0" /* 61229 */
- "SPI_PS_INPUT_CNTL_25\0" /* 61250 */
- "SPI_PS_INPUT_CNTL_26\0" /* 61271 */
- "SPI_PS_INPUT_CNTL_27\0" /* 61292 */
- "SPI_PS_INPUT_CNTL_28\0" /* 61313 */
- "SPI_PS_INPUT_CNTL_29\0" /* 61334 */
- "SPI_PS_INPUT_CNTL_30\0" /* 61355 */
- "SPI_PS_INPUT_CNTL_31\0" /* 61376 */
- "SPI_VS_OUT_CONFIG\0" /* 61397 */
- "SPI_PS_INPUT_ENA\0" /* 61415 */
- "SPI_PS_INPUT_ADDR\0" /* 61432 */
- "SPI_INTERP_CONTROL_0\0" /* 61450 */
- "SPI_PS_IN_CONTROL\0" /* 61471 */
- "SPI_BARYC_CNTL\0" /* 61489 */
- "SPI_TMPRING_SIZE\0" /* 61504 */
- "SPI_WAVE_MGMT_1\0" /* 61521 */
- "SPI_WAVE_MGMT_2\0" /* 61537 */
- "SPI_SHADER_POS_FORMAT\0" /* 61553 */
- "SPI_SHADER_Z_FORMAT\0" /* 61575 */
- "SPI_SHADER_COL_FORMAT\0" /* 61595 */
- "SX_PS_DOWNCONVERT\0" /* 61617 */
- "SX_BLEND_OPT_EPSILON\0" /* 61635 */
- "SX_BLEND_OPT_CONTROL\0" /* 61656 */
- "SX_MRT0_BLEND_OPT\0" /* 61677 */
- "SX_MRT1_BLEND_OPT\0" /* 61695 */
- "SX_MRT2_BLEND_OPT\0" /* 61713 */
- "SX_MRT3_BLEND_OPT\0" /* 61731 */
- "SX_MRT4_BLEND_OPT\0" /* 61749 */
- "SX_MRT5_BLEND_OPT\0" /* 61767 */
- "SX_MRT6_BLEND_OPT\0" /* 61785 */
- "SX_MRT7_BLEND_OPT\0" /* 61803 */
- "CB_BLEND0_CONTROL\0" /* 61821 */
- "CB_BLEND1_CONTROL\0" /* 61839 */
- "CB_BLEND2_CONTROL\0" /* 61857 */
- "CB_BLEND3_CONTROL\0" /* 61875 */
- "CB_BLEND4_CONTROL\0" /* 61893 */
- "CB_BLEND5_CONTROL\0" /* 61911 */
- "CB_BLEND6_CONTROL\0" /* 61929 */
- "CB_BLEND7_CONTROL\0" /* 61947 */
- "CS_COPY_STATE\0" /* 61965 */
- "PA_CL_POINT_X_RAD\0" /* 61979 */
- "PA_CL_POINT_Y_RAD\0" /* 61997 */
- "PA_CL_POINT_SIZE\0" /* 62015 */
- "PA_CL_POINT_CULL_RAD\0" /* 62032 */
- "VGT_DMA_BASE_HI\0" /* 62053 */
- "VGT_DMA_BASE\0" /* 62069 */
- "VGT_DRAW_INITIATOR\0" /* 62082 */
- "VGT_IMMED_DATA\0" /* 62101 */
- "VGT_EVENT_ADDRESS_REG\0" /* 62116 */
- "DB_DEPTH_CONTROL\0" /* 62138 */
- "DB_EQAA\0" /* 62155 */
- "CB_COLOR_CONTROL\0" /* 62163 */
- "DB_SHADER_CONTROL\0" /* 62180 */
- "PA_CL_CLIP_CNTL\0" /* 62198 */
- "PA_SU_SC_MODE_CNTL\0" /* 62214 */
- "PA_CL_VTE_CNTL\0" /* 62233 */
- "PA_CL_VS_OUT_CNTL\0" /* 62248 */
- "PA_CL_NANINF_CNTL\0" /* 62266 */
- "PA_SU_LINE_STIPPLE_CNTL\0" /* 62284 */
- "PA_SU_LINE_STIPPLE_SCALE\0" /* 62308 */
- "PA_SU_PRIM_FILTER_CNTL\0" /* 62333 */
- "PA_SU_SMALL_PRIM_FILTER_CNTL\0" /* 62356 */
- "PA_SU_POINT_SIZE\0" /* 62385 */
- "PA_SU_POINT_MINMAX\0" /* 62402 */
- "PA_SU_LINE_CNTL\0" /* 62421 */
- "PA_SC_LINE_STIPPLE\0" /* 62437 */
- "VGT_OUTPUT_PATH_CNTL\0" /* 62456 */
- "VGT_HOS_CNTL\0" /* 62477 */
- "VGT_HOS_MAX_TESS_LEVEL\0" /* 62490 */
- "VGT_HOS_MIN_TESS_LEVEL\0" /* 62513 */
- "VGT_HOS_REUSE_DEPTH\0" /* 62536 */
- "VGT_GROUP_PRIM_TYPE\0" /* 62556 */
- "VGT_GROUP_FIRST_DECR\0" /* 62576 */
- "VGT_GROUP_DECR\0" /* 62597 */
- "VGT_GROUP_VECT_0_CNTL\0" /* 62612 */
- "VGT_GROUP_VECT_1_CNTL\0" /* 62634 */
- "VGT_GROUP_VECT_0_FMT_CNTL\0" /* 62656 */
- "VGT_GROUP_VECT_1_FMT_CNTL\0" /* 62682 */
- "VGT_GS_MODE\0" /* 62708 */
- "VGT_GS_ONCHIP_CNTL\0" /* 62720 */
- "PA_SC_MODE_CNTL_0\0" /* 62739 */
- "PA_SC_MODE_CNTL_1\0" /* 62757 */
- "VGT_ENHANCE\0" /* 62775 */
- "VGT_GS_PER_ES\0" /* 62787 */
- "VGT_ES_PER_GS\0" /* 62801 */
- "VGT_GS_PER_VS\0" /* 62815 */
- "VGT_GSVS_RING_OFFSET_1\0" /* 62829 */
- "VGT_GSVS_RING_OFFSET_2\0" /* 62852 */
- "VGT_GSVS_RING_OFFSET_3\0" /* 62875 */
- "VGT_GS_OUT_PRIM_TYPE\0" /* 62898 */
- "IA_ENHANCE\0" /* 62919 */
- "VGT_DMA_SIZE\0" /* 62930 */
- "VGT_DMA_MAX_SIZE\0" /* 62943 */
- "VGT_DMA_INDEX_TYPE\0" /* 62960 */
- "WD_ENHANCE\0" /* 62979 */
- "VGT_PRIMITIVEID_EN\0" /* 62990 */
- "VGT_DMA_NUM_INSTANCES\0" /* 63009 */
- "VGT_PRIMITIVEID_RESET\0" /* 63031 */
- "VGT_EVENT_INITIATOR\0" /* 63053 */
- "VGT_MULTI_PRIM_IB_RESET_EN\0" /* 63073 */
- "VGT_INSTANCE_STEP_RATE_0\0" /* 63100 */
- "VGT_INSTANCE_STEP_RATE_1\0" /* 63125 */
- "IA_MULTI_VGT_PARAM\0" /* 63150 */
- "VGT_ESGS_RING_ITEMSIZE\0" /* 63169 */
- "VGT_GSVS_RING_ITEMSIZE\0" /* 63192 */
- "VGT_REUSE_OFF\0" /* 63215 */
- "VGT_VTX_CNT_EN\0" /* 63229 */
- "DB_HTILE_SURFACE\0" /* 63244 */
- "DB_SRESULTS_COMPARE_STATE0\0" /* 63261 */
- "DB_SRESULTS_COMPARE_STATE1\0" /* 63288 */
- "DB_PRELOAD_CONTROL\0" /* 63315 */
- "VGT_STRMOUT_BUFFER_SIZE_0\0" /* 63334 */
- "VGT_STRMOUT_VTX_STRIDE_0\0" /* 63360 */
- "VGT_STRMOUT_BUFFER_OFFSET_0\0" /* 63385 */
- "VGT_STRMOUT_BUFFER_SIZE_1\0" /* 63413 */
- "VGT_STRMOUT_VTX_STRIDE_1\0" /* 63439 */
- "VGT_STRMOUT_BUFFER_OFFSET_1\0" /* 63464 */
- "VGT_STRMOUT_BUFFER_SIZE_2\0" /* 63492 */
- "VGT_STRMOUT_VTX_STRIDE_2\0" /* 63518 */
- "VGT_STRMOUT_BUFFER_OFFSET_2\0" /* 63543 */
- "VGT_STRMOUT_BUFFER_SIZE_3\0" /* 63571 */
- "VGT_STRMOUT_VTX_STRIDE_3\0" /* 63597 */
- "VGT_STRMOUT_BUFFER_OFFSET_3\0" /* 63622 */
- "VGT_STRMOUT_DRAW_OPAQUE_OFFSET\0" /* 63650 */
- "VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE\0" /* 63681 */
- "VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE\0" /* 63724 */
- "VGT_GS_MAX_VERT_OUT\0" /* 63762 */
- "VGT_TESS_DISTRIBUTION\0" /* 63782 */
- "VGT_SHADER_STAGES_EN\0" /* 63804 */
- "VGT_LS_HS_CONFIG\0" /* 63825 */
- "VGT_GS_VERT_ITEMSIZE\0" /* 63842 */
- "VGT_GS_VERT_ITEMSIZE_1\0" /* 63863 */
- "VGT_GS_VERT_ITEMSIZE_2\0" /* 63886 */
- "VGT_GS_VERT_ITEMSIZE_3\0" /* 63909 */
- "VGT_TF_PARAM\0" /* 63932 */
- "DB_ALPHA_TO_MASK\0" /* 63945 */
- "VGT_DISPATCH_DRAW_INDEX\0" /* 63962 */
- "PA_SU_POLY_OFFSET_DB_FMT_CNTL\0" /* 63986 */
- "PA_SU_POLY_OFFSET_CLAMP\0" /* 64016 */
- "PA_SU_POLY_OFFSET_FRONT_SCALE\0" /* 64040 */
- "PA_SU_POLY_OFFSET_FRONT_OFFSET\0" /* 64070 */
- "PA_SU_POLY_OFFSET_BACK_SCALE\0" /* 64101 */
- "PA_SU_POLY_OFFSET_BACK_OFFSET\0" /* 64130 */
- "VGT_GS_INSTANCE_CNT\0" /* 64160 */
- "VGT_STRMOUT_CONFIG\0" /* 64180 */
- "VGT_STRMOUT_BUFFER_CONFIG\0" /* 64199 */
- "PA_SC_CENTROID_PRIORITY_0\0" /* 64225 */
- "PA_SC_CENTROID_PRIORITY_1\0" /* 64251 */
- "PA_SC_LINE_CNTL\0" /* 64277 */
- "PA_SC_AA_CONFIG\0" /* 64293 */
- "PA_SU_VTX_CNTL\0" /* 64309 */
- "PA_CL_GB_VERT_CLIP_ADJ\0" /* 64324 */
- "PA_CL_GB_VERT_DISC_ADJ\0" /* 64347 */
- "PA_CL_GB_HORZ_CLIP_ADJ\0" /* 64370 */
- "PA_CL_GB_HORZ_DISC_ADJ\0" /* 64393 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0\0" /* 64416 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1\0" /* 64450 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2\0" /* 64484 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3\0" /* 64518 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0\0" /* 64552 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1\0" /* 64586 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2\0" /* 64620 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3\0" /* 64654 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0\0" /* 64688 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1\0" /* 64722 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2\0" /* 64756 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3\0" /* 64790 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0\0" /* 64824 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1\0" /* 64858 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2\0" /* 64892 */
- "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3\0" /* 64926 */
- "PA_SC_AA_MASK_X0Y0_X1Y0\0" /* 64960 */
- "PA_SC_AA_MASK_X0Y1_X1Y1\0" /* 64984 */
- "PA_SC_SHADER_CONTROL\0" /* 65008 */
- "VGT_VERTEX_REUSE_BLOCK_CNTL\0" /* 65029 */
- "VGT_OUT_DEALLOC_CNTL\0" /* 65057 */
- "CB_COLOR0_BASE\0" /* 65078 */
- "CB_COLOR0_PITCH\0" /* 65093 */
- "CB_COLOR0_SLICE\0" /* 65109 */
- "CB_COLOR0_VIEW\0" /* 65125 */
- "CB_COLOR0_INFO\0" /* 65140 */
- "CB_COLOR0_ATTRIB\0" /* 65155 */
- "CB_COLOR0_DCC_CONTROL\0" /* 65172 */
- "CB_COLOR0_CMASK\0" /* 65194 */
- "CB_COLOR0_CMASK_SLICE\0" /* 65210 */
- "CB_COLOR0_FMASK\0" /* 65232 */
- "CB_COLOR0_FMASK_SLICE\0" /* 65248 */
- "CB_COLOR0_CLEAR_WORD0\0" /* 65270 */
- "CB_COLOR0_CLEAR_WORD1\0" /* 65292 */
- "CB_COLOR0_DCC_BASE\0" /* 65314 */
- "CB_COLOR1_BASE\0" /* 65333 */
- "CB_COLOR1_PITCH\0" /* 65348 */
- "CB_COLOR1_SLICE\0" /* 65364 */
- "CB_COLOR1_VIEW\0" /* 65380 */
- "CB_COLOR1_INFO\0" /* 65395 */
- "CB_COLOR1_ATTRIB\0" /* 65410 */
- "CB_COLOR1_DCC_CONTROL\0" /* 65427 */
- "CB_COLOR1_CMASK\0" /* 65449 */
- "CB_COLOR1_CMASK_SLICE\0" /* 65465 */
- "CB_COLOR1_FMASK\0" /* 65487 */
- "CB_COLOR1_FMASK_SLICE\0" /* 65503 */
- "CB_COLOR1_CLEAR_WORD0\0" /* 65525 */
- "CB_COLOR1_CLEAR_WORD1\0" /* 65547 */
- "CB_COLOR1_DCC_BASE\0" /* 65569 */
- "CB_COLOR2_BASE\0" /* 65588 */
- "CB_COLOR2_PITCH\0" /* 65603 */
- "CB_COLOR2_SLICE\0" /* 65619 */
- "CB_COLOR2_VIEW\0" /* 65635 */
- "CB_COLOR2_INFO\0" /* 65650 */
- "CB_COLOR2_ATTRIB\0" /* 65665 */
- "CB_COLOR2_DCC_CONTROL\0" /* 65682 */
- "CB_COLOR2_CMASK\0" /* 65704 */
- "CB_COLOR2_CMASK_SLICE\0" /* 65720 */
- "CB_COLOR2_FMASK\0" /* 65742 */
- "CB_COLOR2_FMASK_SLICE\0" /* 65758 */
- "CB_COLOR2_CLEAR_WORD0\0" /* 65780 */
- "CB_COLOR2_CLEAR_WORD1\0" /* 65802 */
- "CB_COLOR2_DCC_BASE\0" /* 65824 */
- "CB_COLOR3_BASE\0" /* 65843 */
- "CB_COLOR3_PITCH\0" /* 65858 */
- "CB_COLOR3_SLICE\0" /* 65874 */
- "CB_COLOR3_VIEW\0" /* 65890 */
- "CB_COLOR3_INFO\0" /* 65905 */
- "CB_COLOR3_ATTRIB\0" /* 65920 */
- "CB_COLOR3_DCC_CONTROL\0" /* 65937 */
- "CB_COLOR3_CMASK\0" /* 65959 */
- "CB_COLOR3_CMASK_SLICE\0" /* 65975 */
- "CB_COLOR3_FMASK\0" /* 65997 */
- "CB_COLOR3_FMASK_SLICE\0" /* 66013 */
- "CB_COLOR3_CLEAR_WORD0\0" /* 66035 */
- "CB_COLOR3_CLEAR_WORD1\0" /* 66057 */
- "CB_COLOR3_DCC_BASE\0" /* 66079 */
- "CB_COLOR4_BASE\0" /* 66098 */
- "CB_COLOR4_PITCH\0" /* 66113 */
- "CB_COLOR4_SLICE\0" /* 66129 */
- "CB_COLOR4_VIEW\0" /* 66145 */
- "CB_COLOR4_INFO\0" /* 66160 */
- "CB_COLOR4_ATTRIB\0" /* 66175 */
- "CB_COLOR4_DCC_CONTROL\0" /* 66192 */
- "CB_COLOR4_CMASK\0" /* 66214 */
- "CB_COLOR4_CMASK_SLICE\0" /* 66230 */
- "CB_COLOR4_FMASK\0" /* 66252 */
- "CB_COLOR4_FMASK_SLICE\0" /* 66268 */
- "CB_COLOR4_CLEAR_WORD0\0" /* 66290 */
- "CB_COLOR4_CLEAR_WORD1\0" /* 66312 */
- "CB_COLOR4_DCC_BASE\0" /* 66334 */
- "CB_COLOR5_BASE\0" /* 66353 */
- "CB_COLOR5_PITCH\0" /* 66368 */
- "CB_COLOR5_SLICE\0" /* 66384 */
- "CB_COLOR5_VIEW\0" /* 66400 */
- "CB_COLOR5_INFO\0" /* 66415 */
- "CB_COLOR5_ATTRIB\0" /* 66430 */
- "CB_COLOR5_DCC_CONTROL\0" /* 66447 */
- "CB_COLOR5_CMASK\0" /* 66469 */
- "CB_COLOR5_CMASK_SLICE\0" /* 66485 */
- "CB_COLOR5_FMASK\0" /* 66507 */
- "CB_COLOR5_FMASK_SLICE\0" /* 66523 */
- "CB_COLOR5_CLEAR_WORD0\0" /* 66545 */
- "CB_COLOR5_CLEAR_WORD1\0" /* 66567 */
- "CB_COLOR5_DCC_BASE\0" /* 66589 */
- "CB_COLOR6_BASE\0" /* 66608 */
- "CB_COLOR6_PITCH\0" /* 66623 */
- "CB_COLOR6_SLICE\0" /* 66639 */
- "CB_COLOR6_VIEW\0" /* 66655 */
- "CB_COLOR6_INFO\0" /* 66670 */
- "CB_COLOR6_ATTRIB\0" /* 66685 */
- "CB_COLOR6_DCC_CONTROL\0" /* 66702 */
- "CB_COLOR6_CMASK\0" /* 66724 */
- "CB_COLOR6_CMASK_SLICE\0" /* 66740 */
- "CB_COLOR6_FMASK\0" /* 66762 */
- "CB_COLOR6_FMASK_SLICE\0" /* 66778 */
- "CB_COLOR6_CLEAR_WORD0\0" /* 66800 */
- "CB_COLOR6_CLEAR_WORD1\0" /* 66822 */
- "CB_COLOR6_DCC_BASE\0" /* 66844 */
- "CB_COLOR7_BASE\0" /* 66863 */
- "CB_COLOR7_PITCH\0" /* 66878 */
- "CB_COLOR7_SLICE\0" /* 66894 */
- "CB_COLOR7_VIEW\0" /* 66910 */
- "CB_COLOR7_INFO\0" /* 66925 */
- "CB_COLOR7_ATTRIB\0" /* 66940 */
- "CB_COLOR7_DCC_CONTROL\0" /* 66957 */
- "CB_COLOR7_CMASK\0" /* 66979 */
- "CB_COLOR7_CMASK_SLICE\0" /* 66995 */
- "CB_COLOR7_FMASK\0" /* 67017 */
- "CB_COLOR7_FMASK_SLICE\0" /* 67033 */
- "CB_COLOR7_CLEAR_WORD0\0" /* 67055 */
- "CB_COLOR7_CLEAR_WORD1\0" /* 67077 */
- "CB_COLOR7_DCC_BASE\0" /* 67099 */
- "VGT_TF_MEMORY_BASE_HI\0" /* 67118 */
- "WD_POS_BUF_BASE\0" /* 67140 */
- "WD_POS_BUF_BASE_HI\0" /* 67156 */
- "WD_CNTL_SB_BUF_BASE\0" /* 67175 */
- "WD_CNTL_SB_BUF_BASE_HI\0" /* 67195 */
- "WD_INDEX_BUF_BASE\0" /* 67218 */
- "WD_INDEX_BUF_BASE_HI\0" /* 67236 */
- "VGT_OBJECT_ID\0" /* 67257 */
- "VGT_INSTANCE_BASE_ID\0" /* 67271 */
- "SQC_WRITEBACK\0" /* 67292 */
- "TA_GRAD_ADJ_UCONFIG\0" /* 67306 */
- "SPI_CONFIG_CNTL_2\0" /* 67326 */
- "SPI_SHADER_USER_DATA_PS_16\0" /* 67344 */
- "SPI_SHADER_USER_DATA_PS_17\0" /* 67371 */
- "SPI_SHADER_USER_DATA_PS_18\0" /* 67398 */
- "SPI_SHADER_USER_DATA_PS_19\0" /* 67425 */
- "SPI_SHADER_USER_DATA_PS_20\0" /* 67452 */
- "SPI_SHADER_USER_DATA_PS_21\0" /* 67479 */
- "SPI_SHADER_USER_DATA_PS_22\0" /* 67506 */
- "SPI_SHADER_USER_DATA_PS_23\0" /* 67533 */
- "SPI_SHADER_USER_DATA_PS_24\0" /* 67560 */
- "SPI_SHADER_USER_DATA_PS_25\0" /* 67587 */
- "SPI_SHADER_USER_DATA_PS_26\0" /* 67614 */
- "SPI_SHADER_USER_DATA_PS_27\0" /* 67641 */
- "SPI_SHADER_USER_DATA_PS_28\0" /* 67668 */
- "SPI_SHADER_USER_DATA_PS_29\0" /* 67695 */
- "SPI_SHADER_USER_DATA_PS_30\0" /* 67722 */
- "SPI_SHADER_USER_DATA_PS_31\0" /* 67749 */
- "SPI_SHADER_USER_DATA_VS_16\0" /* 67776 */
- "SPI_SHADER_USER_DATA_VS_17\0" /* 67803 */
- "SPI_SHADER_USER_DATA_VS_18\0" /* 67830 */
- "SPI_SHADER_USER_DATA_VS_19\0" /* 67857 */
- "SPI_SHADER_USER_DATA_VS_20\0" /* 67884 */
- "SPI_SHADER_USER_DATA_VS_21\0" /* 67911 */
- "SPI_SHADER_USER_DATA_VS_22\0" /* 67938 */
- "SPI_SHADER_USER_DATA_VS_23\0" /* 67965 */
- "SPI_SHADER_USER_DATA_VS_24\0" /* 67992 */
- "SPI_SHADER_USER_DATA_VS_25\0" /* 68019 */
- "SPI_SHADER_USER_DATA_VS_26\0" /* 68046 */
- "SPI_SHADER_USER_DATA_VS_27\0" /* 68073 */
- "SPI_SHADER_USER_DATA_VS_28\0" /* 68100 */
- "SPI_SHADER_USER_DATA_VS_29\0" /* 68127 */
- "SPI_SHADER_USER_DATA_VS_30\0" /* 68154 */
- "SPI_SHADER_USER_DATA_VS_31\0" /* 68181 */
- "SPI_SHADER_PGM_RSRC2_GS_VS\0" /* 68208 */
- "SPI_SHADER_PGM_RSRC4_GS\0" /* 68235 */
- "SPI_SHADER_USER_DATA_ADDR_LO_GS\0" /* 68259 */
- "SPI_SHADER_USER_DATA_ADDR_HI_GS\0" /* 68291 */
- "SPI_SHADER_USER_DATA_ES_16\0" /* 68323 */
- "SPI_SHADER_USER_DATA_ES_17\0" /* 68350 */
- "SPI_SHADER_USER_DATA_ES_18\0" /* 68377 */
- "SPI_SHADER_USER_DATA_ES_19\0" /* 68404 */
- "SPI_SHADER_USER_DATA_ES_20\0" /* 68431 */
- "SPI_SHADER_USER_DATA_ES_21\0" /* 68458 */
- "SPI_SHADER_USER_DATA_ES_22\0" /* 68485 */
- "SPI_SHADER_USER_DATA_ES_23\0" /* 68512 */
- "SPI_SHADER_USER_DATA_ES_24\0" /* 68539 */
- "SPI_SHADER_USER_DATA_ES_25\0" /* 68566 */
- "SPI_SHADER_USER_DATA_ES_26\0" /* 68593 */
- "SPI_SHADER_USER_DATA_ES_27\0" /* 68620 */
- "SPI_SHADER_USER_DATA_ES_28\0" /* 68647 */
- "SPI_SHADER_USER_DATA_ES_29\0" /* 68674 */
- "SPI_SHADER_USER_DATA_ES_30\0" /* 68701 */
- "SPI_SHADER_USER_DATA_ES_31\0" /* 68728 */
- "SPI_SHADER_PGM_RSRC4_HS\0" /* 68755 */
- "SPI_SHADER_USER_DATA_ADDR_LO_HS\0" /* 68779 */
- "SPI_SHADER_USER_DATA_ADDR_HI_HS\0" /* 68811 */
- "SPI_SHADER_USER_DATA_LS_16\0" /* 68843 */
- "SPI_SHADER_USER_DATA_LS_17\0" /* 68870 */
- "SPI_SHADER_USER_DATA_LS_18\0" /* 68897 */
- "SPI_SHADER_USER_DATA_LS_19\0" /* 68924 */
- "SPI_SHADER_USER_DATA_LS_20\0" /* 68951 */
- "SPI_SHADER_USER_DATA_LS_21\0" /* 68978 */
- "SPI_SHADER_USER_DATA_LS_22\0" /* 69005 */
- "SPI_SHADER_USER_DATA_LS_23\0" /* 69032 */
- "SPI_SHADER_USER_DATA_LS_24\0" /* 69059 */
- "SPI_SHADER_USER_DATA_LS_25\0" /* 69086 */
- "SPI_SHADER_USER_DATA_LS_26\0" /* 69113 */
- "SPI_SHADER_USER_DATA_LS_27\0" /* 69140 */
- "SPI_SHADER_USER_DATA_LS_28\0" /* 69167 */
- "SPI_SHADER_USER_DATA_LS_29\0" /* 69194 */
- "SPI_SHADER_USER_DATA_LS_30\0" /* 69221 */
- "SPI_SHADER_USER_DATA_LS_31\0" /* 69248 */
- "SPI_SHADER_USER_DATA_COMMON_0\0" /* 69275 */
- "SPI_SHADER_USER_DATA_COMMON_1\0" /* 69305 */
- "SPI_SHADER_USER_DATA_COMMON_2\0" /* 69335 */
- "SPI_SHADER_USER_DATA_COMMON_3\0" /* 69365 */
- "SPI_SHADER_USER_DATA_COMMON_4\0" /* 69395 */
- "SPI_SHADER_USER_DATA_COMMON_5\0" /* 69425 */
- "SPI_SHADER_USER_DATA_COMMON_6\0" /* 69455 */
- "SPI_SHADER_USER_DATA_COMMON_7\0" /* 69485 */
- "SPI_SHADER_USER_DATA_COMMON_8\0" /* 69515 */
- "SPI_SHADER_USER_DATA_COMMON_9\0" /* 69545 */
- "SPI_SHADER_USER_DATA_COMMON_10\0" /* 69575 */
- "SPI_SHADER_USER_DATA_COMMON_11\0" /* 69606 */
- "SPI_SHADER_USER_DATA_COMMON_12\0" /* 69637 */
- "SPI_SHADER_USER_DATA_COMMON_13\0" /* 69668 */
- "SPI_SHADER_USER_DATA_COMMON_14\0" /* 69699 */
- "SPI_SHADER_USER_DATA_COMMON_15\0" /* 69730 */
- "SPI_SHADER_USER_DATA_COMMON_16\0" /* 69761 */
- "SPI_SHADER_USER_DATA_COMMON_17\0" /* 69792 */
- "SPI_SHADER_USER_DATA_COMMON_18\0" /* 69823 */
- "SPI_SHADER_USER_DATA_COMMON_19\0" /* 69854 */
- "SPI_SHADER_USER_DATA_COMMON_20\0" /* 69885 */
- "SPI_SHADER_USER_DATA_COMMON_21\0" /* 69916 */
- "SPI_SHADER_USER_DATA_COMMON_22\0" /* 69947 */
- "SPI_SHADER_USER_DATA_COMMON_23\0" /* 69978 */
- "SPI_SHADER_USER_DATA_COMMON_24\0" /* 70009 */
- "SPI_SHADER_USER_DATA_COMMON_25\0" /* 70040 */
- "SPI_SHADER_USER_DATA_COMMON_26\0" /* 70071 */
- "SPI_SHADER_USER_DATA_COMMON_27\0" /* 70102 */
- "SPI_SHADER_USER_DATA_COMMON_28\0" /* 70133 */
- "SPI_SHADER_USER_DATA_COMMON_29\0" /* 70164 */
- "SPI_SHADER_USER_DATA_COMMON_30\0" /* 70195 */
- "SPI_SHADER_USER_DATA_COMMON_31\0" /* 70226 */
- "COMPUTE_DISPATCH_PKT_ADDR_LO\0" /* 70257 */
- "COMPUTE_DISPATCH_PKT_ADDR_HI\0" /* 70286 */
- "COMPUTE_DISPATCH_SCRATCH_BASE_LO\0" /* 70315 */
- "COMPUTE_DISPATCH_SCRATCH_BASE_HI\0" /* 70348 */
- "CPF_LATENCY_STATS_DATA\0" /* 70381 */
- "CPG_LATENCY_STATS_DATA\0" /* 70404 */
- "CPC_LATENCY_STATS_DATA\0" /* 70427 */
- "CPF_TC_PERF_COUNTER_WINDOW_SELECT\0" /* 70450 */
- "CPG_TC_PERF_COUNTER_WINDOW_SELECT\0" /* 70484 */
- "CPF_LATENCY_STATS_SELECT\0" /* 70518 */
- "CPG_LATENCY_STATS_SELECT\0" /* 70543 */
- "CPC_LATENCY_STATS_SELECT\0" /* 70568 */
- "DB_HTILE_DATA_BASE_HI\0" /* 70593 */
- "DB_Z_READ_BASE_HI\0" /* 70615 */
- "DB_STENCIL_READ_BASE_HI\0" /* 70633 */
- "DB_Z_WRITE_BASE_HI\0" /* 70657 */
- "DB_STENCIL_WRITE_BASE_HI\0" /* 70676 */
- "DB_DFSM_CONTROL\0" /* 70701 */
- "DB_RENDER_FILTER\0" /* 70717 */
- "DB_Z_INFO2\0" /* 70734 */
- "DB_STENCIL_INFO2\0" /* 70745 */
- "PA_SC_TILE_STEERING_OVERRIDE\0" /* 70762 */
- "CP_PERFMON_CNTX_CNTL\0" /* 70791 */
- "PA_SC_RIGHT_VERT_GRID\0" /* 70812 */
- "PA_SC_LEFT_VERT_GRID\0" /* 70834 */
- "PA_SC_HORIZ_GRID\0" /* 70855 */
- "PA_SC_FOV_WINDOW_LR\0" /* 70872 */
- "PA_SC_FOV_WINDOW_TB\0" /* 70892 */
- "CB_MRT0_EPITCH\0" /* 70912 */
- "CB_MRT1_EPITCH\0" /* 70927 */
- "CB_MRT2_EPITCH\0" /* 70942 */
- "CB_MRT3_EPITCH\0" /* 70957 */
- "CB_MRT4_EPITCH\0" /* 70972 */
- "CB_MRT5_EPITCH\0" /* 70987 */
- "CB_MRT6_EPITCH\0" /* 71002 */
- "CB_MRT7_EPITCH\0" /* 71017 */
- "PA_CL_OBJPRIM_ID_CNTL\0" /* 71032 */
- "PA_CL_NGG_CNTL\0" /* 71054 */
- "PA_SU_OVER_RASTERIZATION_CNTL\0" /* 71069 */
- "VGT_GS_MAX_PRIMS_PER_SUBGROUP\0" /* 71099 */
- "VGT_DRAW_PAYLOAD_CNTL\0" /* 71129 */
- "VGT_INDEX_PAYLOAD_CNTL\0" /* 71151 */
- "VGT_DMA_EVENT_INITIATOR\0" /* 71174 */
- "PA_SC_BINNER_CNTL_0\0" /* 71198 */
- "PA_SC_BINNER_CNTL_1\0" /* 71218 */
- "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL\0" /* 71238 */
- "PA_SC_NGG_MODE_CNTL\0" /* 71276 */
- "CB_COLOR0_BASE_EXT\0" /* 71296 */
- "CB_COLOR0_ATTRIB2\0" /* 71315 */
- "CB_COLOR0_CMASK_BASE_EXT\0" /* 71333 */
- "CB_COLOR0_FMASK_BASE_EXT\0" /* 71358 */
- "CB_COLOR0_DCC_BASE_EXT\0" /* 71383 */
- "CB_COLOR1_BASE_EXT\0" /* 71406 */
- "CB_COLOR1_ATTRIB2\0" /* 71425 */
- "CB_COLOR1_CMASK_BASE_EXT\0" /* 71443 */
- "CB_COLOR1_FMASK_BASE_EXT\0" /* 71468 */
- "CB_COLOR1_DCC_BASE_EXT\0" /* 71493 */
- "CB_COLOR2_BASE_EXT\0" /* 71516 */
- "CB_COLOR2_ATTRIB2\0" /* 71535 */
- "CB_COLOR2_CMASK_BASE_EXT\0" /* 71553 */
- "CB_COLOR2_FMASK_BASE_EXT\0" /* 71578 */
- "CB_COLOR2_DCC_BASE_EXT\0" /* 71603 */
- "CB_COLOR3_BASE_EXT\0" /* 71626 */
- "CB_COLOR3_ATTRIB2\0" /* 71645 */
- "CB_COLOR3_CMASK_BASE_EXT\0" /* 71663 */
- "CB_COLOR3_FMASK_BASE_EXT\0" /* 71688 */
- "CB_COLOR3_DCC_BASE_EXT\0" /* 71713 */
- "CB_COLOR4_BASE_EXT\0" /* 71736 */
- "CB_COLOR4_ATTRIB2\0" /* 71755 */
- "CB_COLOR4_CMASK_BASE_EXT\0" /* 71773 */
- "CB_COLOR4_FMASK_BASE_EXT\0" /* 71798 */
- "CB_COLOR4_DCC_BASE_EXT\0" /* 71823 */
- "CB_COLOR5_BASE_EXT\0" /* 71846 */
- "CB_COLOR5_ATTRIB2\0" /* 71865 */
- "CB_COLOR5_CMASK_BASE_EXT\0" /* 71883 */
- "CB_COLOR5_FMASK_BASE_EXT\0" /* 71908 */
- "CB_COLOR5_DCC_BASE_EXT\0" /* 71933 */
- "CB_COLOR6_BASE_EXT\0" /* 71956 */
- "CB_COLOR6_ATTRIB2\0" /* 71975 */
- "CB_COLOR6_CMASK_BASE_EXT\0" /* 71993 */
- "CB_COLOR6_FMASK_BASE_EXT\0" /* 72018 */
- "CB_COLOR6_DCC_BASE_EXT\0" /* 72043 */
- "CB_COLOR7_BASE_EXT\0" /* 72066 */
- "CB_COLOR7_ATTRIB2\0" /* 72085 */
- "CB_COLOR7_CMASK_BASE_EXT\0" /* 72103 */
- "CB_COLOR7_FMASK_BASE_EXT\0" /* 72128 */
- "CB_COLOR7_DCC_BASE_EXT\0" /* 72153 */;
+ "DRAW_INDEX_LOC\0" /* 827 */
+ "DST_ADDR_LO\0" /* 842 */
+ "DST_ADDR_HI\0" /* 854 */
+ "IB_BASE_LO\0" /* 866 */
+ "IB_BASE_HI\0" /* 880, 877 */
+ "CP_DMA_WORD0\0" /* 888 */
+ "CP_DMA_WORD1\0" /* 901 */
+ "CP_DMA_WORD2\0" /* 914 */
+ "CP_DMA_WORD3\0" /* 927 */
+ "COMMAND\0" /* 940 */
+ "DMA_DATA_WORD0\0" /* 948 */
+ "SRC_ADDR_LO\0" /* 963 */
+ "SRC_ADDR_HI\0" /* 975 */
+ "SRBM_STATUS2\0" /* 987 */
+ "SRBM_STATUS\0" /* 1000, 1005 */
+ "SRBM_STATUS3\0" /* 1012 */
+ "SDMA0_STATUS_REG\0" /* 1025 */
+ "SDMA1_STATUS_REG\0" /* 1042 */
+ "GRBM_STATUS2\0" /* 1059 */
+ "GRBM_STATUS\0" /* 1072 */
+ "CP_STRMOUT_CNTL\0" /* 1084 */
+ "CP_COHER_CNTL\0" /* 1100 */
+ "CP_COHER_SIZE\0" /* 1114 */
+ "CP_COHER_BASE\0" /* 1128 */
+ "GRBM_STATUS_SE0\0" /* 1142 */
+ "GRBM_STATUS_SE1\0" /* 1158 */
+ "GRBM_STATUS_SE2\0" /* 1174 */
+ "GRBM_STATUS_SE3\0" /* 1190 */
+ "CP_COHER_BASE_HI\0" /* 1206 */
+ "CP_COHER_START_DELAY\0" /* 1223 */
+ "CP_COHER_STATUS\0" /* 1244 */
+ "CP_CPC_STATUS\0" /* 1260 */
+ "CP_CPC_BUSY_STAT\0" /* 1274 */
+ "CP_CPC_STALLED_STAT1\0" /* 1291 */
+ "CP_CPF_STATUS\0" /* 1312 */
+ "CP_CPF_BUSY_STAT\0" /* 1326 */
+ "CP_CPF_STALLED_STAT1\0" /* 1343 */
+ "CP_COHER_SIZE_HI\0" /* 1364 */
+ "VGT_VTX_VECT_EJECT_REG\0" /* 1381 */
+ "VGT_CACHE_INVALIDATION\0" /* 1404 */
+ "VGT_ESGS_RING_SIZE\0" /* 1427 */
+ "VGT_GSVS_RING_SIZE\0" /* 1446 */
+ "VGT_GS_VERTEX_REUSE\0" /* 1465 */
+ "VGT_PRIMITIVE_TYPE\0" /* 1485 */
+ "VGT_INDEX_TYPE\0" /* 1504 */
+ "VGT_STRMOUT_BUFFER_FILLED_SIZE_0\0" /* 1519 */
+ "VGT_STRMOUT_BUFFER_FILLED_SIZE_1\0" /* 1552 */
+ "VGT_STRMOUT_BUFFER_FILLED_SIZE_2\0" /* 1585 */
+ "VGT_STRMOUT_BUFFER_FILLED_SIZE_3\0" /* 1618 */
+ "VGT_NUM_INDICES\0" /* 1651 */
+ "VGT_NUM_INSTANCES\0" /* 1667 */
+ "VGT_TF_RING_SIZE\0" /* 1685 */
+ "VGT_HS_OFFCHIP_PARAM\0" /* 1702 */
+ "VGT_TF_MEMORY_BASE\0" /* 1723 */
+ "PA_CL_ENHANCE\0" /* 1753, 1742 */
+ "PA_SU_LINE_STIPPLE_VALUE\0" /* 1762, 1756 */
+ "PA_SC_LINE_STIPPLE_STATE\0" /* 1781 */
+ "CP_STALLED_STAT3\0" /* 1806 */
+ "CP_STALLED_STAT1\0" /* 1823 */
+ "CP_STALLED_STAT2\0" /* 1840 */
+ "CP_STAT\0" /* 1857 */
+ "GRBM_GFX_INDEX\0" /* 1865 */
+ "PA_SC_SCREEN_EXTENT_MIN_0\0" /* 1880 */
+ "PA_SC_SCREEN_EXTENT_MAX_0\0" /* 1906 */
+ "PA_SC_SCREEN_EXTENT_MIN_1\0" /* 1932 */
+ "PA_SC_SCREEN_EXTENT_MAX_1\0" /* 1958 */
+ "PA_SC_ENHANCE\0" /* 1984 */
+ "SQC_CACHES\0" /* 1998 */
+ "SQ_RANDOM_WAVE_PRI\0" /* 2009 */
+ "SQ_EXP_0\0" /* 2028 */
+ "TA_CS_BC_BASE_ADDR\0" /* 2051, 2037 */
+ "TA_CS_BC_BASE_ADDR_HI\0" /* 2056 */
+ "DB_OCCLUSION_COUNT0_LOW\0" /* 2078 */
+ "SQ_BUF_RSRC_WORD0\0" /* 2102 */
+ "DB_OCCLUSION_COUNT0_HI\0" /* 2120 */
+ "SQ_BUF_RSRC_WORD1\0" /* 2143 */
+ "DB_OCCLUSION_COUNT1_LOW\0" /* 2161 */
+ "SQ_BUF_RSRC_WORD2\0" /* 2185 */
+ "DB_OCCLUSION_COUNT1_HI\0" /* 2203 */
+ "SQ_BUF_RSRC_WORD3\0" /* 2226 */
+ "DB_OCCLUSION_COUNT2_LOW\0" /* 2244 */
+ "SQ_IMG_RSRC_WORD0\0" /* 2268 */
+ "DB_OCCLUSION_COUNT2_HI\0" /* 2286 */
+ "SQ_IMG_RSRC_WORD1\0" /* 2309 */
+ "DB_OCCLUSION_COUNT3_LOW\0" /* 2327 */
+ "SQ_IMG_RSRC_WORD2\0" /* 2351 */
+ "DB_OCCLUSION_COUNT3_HI\0" /* 2369 */
+ "SQ_IMG_RSRC_WORD3\0" /* 2392 */
+ "SQ_IMG_RSRC_WORD4\0" /* 2410 */
+ "SQ_IMG_RSRC_WORD5\0" /* 2428 */
+ "SQ_IMG_RSRC_WORD6\0" /* 2446 */
+ "SQ_IMG_RSRC_WORD7\0" /* 2464 */
+ "SQ_IMG_SAMP_WORD0\0" /* 2482 */
+ "SQ_IMG_SAMP_WORD1\0" /* 2500 */
+ "SQ_IMG_SAMP_WORD2\0" /* 2518 */
+ "SQ_IMG_SAMP_WORD3\0" /* 2536 */
+ "SPI_DYN_GPR_LOCK_EN\0" /* 2554, 2571 */
+ "SPI_STATIC_THREAD_MGMT_1\0" /* 2574 */
+ "SPI_STATIC_THREAD_MGMT_2\0" /* 2599 */
+ "SPI_STATIC_THREAD_MGMT_3\0" /* 2624 */
+ "SPI_PS_MAX_WAVE_ID\0" /* 2656, 2649 */
+ "SPI_ARB_PRIORITY\0" /* 2676, 2668 */
+ "SPI_ARB_CYCLES_0\0" /* 2685 */
+ "SPI_ARB_CYCLES_1\0" /* 2702 */
+ "SQ_FLAT_SCRATCH_WORD0\0" /* 2719 */
+ "SQ_FLAT_SCRATCH_WORD1\0" /* 2741 */
+ "DB_ZPASS_COUNT_LOW\0" /* 2763 */
+ "DB_ZPASS_COUNT_HI\0" /* 2782, 2791 */
+ "SPI_CONFIG_CNTL\0" /* 2800 */
+ "SPI_CONFIG_CNTL_1\0" /* 2816 */
+ "SPI_RESOURCE_RESERVE_CU_AB_0\0" /* 2834 */
+ "DB_SUBTILE_CONTROL\0" /* 2863 */
+ "GB_ADDR_CONFIG\0" /* 2882 */
+ "GB_TILE_MODE0\0" /* 2897 */
+ "GB_TILE_MODE1\0" /* 2911 */
+ "GB_TILE_MODE2\0" /* 2925 */
+ "GB_TILE_MODE3\0" /* 2939 */
+ "GB_TILE_MODE4\0" /* 2953 */
+ "GB_TILE_MODE5\0" /* 2967 */
+ "GB_TILE_MODE6\0" /* 2981 */
+ "GB_TILE_MODE7\0" /* 2995 */
+ "GB_TILE_MODE8\0" /* 3009 */
+ "GB_TILE_MODE9\0" /* 3023 */
+ "GB_TILE_MODE10\0" /* 3037 */
+ "GB_TILE_MODE11\0" /* 3052 */
+ "GB_TILE_MODE12\0" /* 3067 */
+ "GB_TILE_MODE13\0" /* 3082 */
+ "GB_TILE_MODE14\0" /* 3097 */
+ "GB_TILE_MODE15\0" /* 3112 */
+ "GB_TILE_MODE16\0" /* 3127 */
+ "GB_TILE_MODE17\0" /* 3142 */
+ "GB_TILE_MODE18\0" /* 3157 */
+ "GB_TILE_MODE19\0" /* 3172 */
+ "GB_TILE_MODE20\0" /* 3187 */
+ "GB_TILE_MODE21\0" /* 3202 */
+ "GB_TILE_MODE22\0" /* 3217 */
+ "GB_TILE_MODE23\0" /* 3232 */
+ "GB_TILE_MODE24\0" /* 3247 */
+ "GB_TILE_MODE25\0" /* 3262 */
+ "GB_TILE_MODE26\0" /* 3277 */
+ "GB_TILE_MODE27\0" /* 3292 */
+ "GB_TILE_MODE28\0" /* 3307 */
+ "GB_TILE_MODE29\0" /* 3322 */
+ "GB_TILE_MODE30\0" /* 3337 */
+ "GB_TILE_MODE31\0" /* 3352 */
+ "GB_MACROTILE_MODE0\0" /* 3367 */
+ "GB_MACROTILE_MODE1\0" /* 3386 */
+ "GB_MACROTILE_MODE2\0" /* 3405 */
+ "GB_MACROTILE_MODE3\0" /* 3424 */
+ "GB_MACROTILE_MODE4\0" /* 3443 */
+ "GB_MACROTILE_MODE5\0" /* 3462 */
+ "GB_MACROTILE_MODE6\0" /* 3481 */
+ "GB_MACROTILE_MODE7\0" /* 3500 */
+ "GB_MACROTILE_MODE8\0" /* 3519 */
+ "GB_MACROTILE_MODE9\0" /* 3538 */
+ "GB_MACROTILE_MODE10\0" /* 3557 */
+ "GB_MACROTILE_MODE11\0" /* 3577 */
+ "GB_MACROTILE_MODE12\0" /* 3597 */
+ "GB_MACROTILE_MODE13\0" /* 3617 */
+ "GB_MACROTILE_MODE14\0" /* 3637 */
+ "GB_MACROTILE_MODE15\0" /* 3657 */
+ "SPI_SHADER_TBA_LO_PS\0" /* 3677 */
+ "SPI_SHADER_TBA_HI_PS\0" /* 3698 */
+ "SPI_SHADER_TMA_LO_PS\0" /* 3719 */
+ "SPI_SHADER_TMA_HI_PS\0" /* 3740 */
+ "SPI_SHADER_PGM_RSRC3_PS\0" /* 3761 */
+ "SPI_SHADER_PGM_LO_PS\0" /* 3785 */
+ "SPI_SHADER_PGM_HI_PS\0" /* 3806 */
+ "SPI_SHADER_PGM_RSRC1_PS\0" /* 3827 */
+ "SPI_SHADER_PGM_RSRC2_PS\0" /* 3851 */
+ "SPI_SHADER_USER_DATA_PS_0\0" /* 3875 */
+ "SPI_SHADER_USER_DATA_PS_1\0" /* 3901 */
+ "SPI_SHADER_USER_DATA_PS_2\0" /* 3927 */
+ "SPI_SHADER_USER_DATA_PS_3\0" /* 3953 */
+ "SPI_SHADER_USER_DATA_PS_4\0" /* 3979 */
+ "SPI_SHADER_USER_DATA_PS_5\0" /* 4005 */
+ "SPI_SHADER_USER_DATA_PS_6\0" /* 4031 */
+ "SPI_SHADER_USER_DATA_PS_7\0" /* 4057 */
+ "SPI_SHADER_USER_DATA_PS_8\0" /* 4083 */
+ "SPI_SHADER_USER_DATA_PS_9\0" /* 4109 */
+ "SPI_SHADER_USER_DATA_PS_10\0" /* 4135 */
+ "SPI_SHADER_USER_DATA_PS_11\0" /* 4162 */
+ "SPI_SHADER_USER_DATA_PS_12\0" /* 4189 */
+ "SPI_SHADER_USER_DATA_PS_13\0" /* 4216 */
+ "SPI_SHADER_USER_DATA_PS_14\0" /* 4243 */
+ "SPI_SHADER_USER_DATA_PS_15\0" /* 4270 */
+ "SPI_SHADER_TBA_LO_VS\0" /* 4297 */
+ "SPI_SHADER_TBA_HI_VS\0" /* 4318 */
+ "SPI_SHADER_TMA_LO_VS\0" /* 4339 */
+ "SPI_SHADER_TMA_HI_VS\0" /* 4360 */
+ "SPI_SHADER_PGM_RSRC3_VS\0" /* 4381 */
+ "SPI_SHADER_LATE_ALLOC_VS\0" /* 4405 */
+ "SPI_SHADER_PGM_LO_VS\0" /* 4430 */
+ "SPI_SHADER_PGM_HI_VS\0" /* 4451 */
+ "SPI_SHADER_PGM_RSRC1_VS\0" /* 4472 */
+ "SPI_SHADER_PGM_RSRC2_VS\0" /* 4496 */
+ "SPI_SHADER_USER_DATA_VS_0\0" /* 4520 */
+ "SPI_SHADER_USER_DATA_VS_1\0" /* 4546 */
+ "SPI_SHADER_USER_DATA_VS_2\0" /* 4572 */
+ "SPI_SHADER_USER_DATA_VS_3\0" /* 4598 */
+ "SPI_SHADER_USER_DATA_VS_4\0" /* 4624 */
+ "SPI_SHADER_USER_DATA_VS_5\0" /* 4650 */
+ "SPI_SHADER_USER_DATA_VS_6\0" /* 4676 */
+ "SPI_SHADER_USER_DATA_VS_7\0" /* 4702 */
+ "SPI_SHADER_USER_DATA_VS_8\0" /* 4728 */
+ "SPI_SHADER_USER_DATA_VS_9\0" /* 4754 */
+ "SPI_SHADER_USER_DATA_VS_10\0" /* 4780 */
+ "SPI_SHADER_USER_DATA_VS_11\0" /* 4807 */
+ "SPI_SHADER_USER_DATA_VS_12\0" /* 4834 */
+ "SPI_SHADER_USER_DATA_VS_13\0" /* 4861 */
+ "SPI_SHADER_USER_DATA_VS_14\0" /* 4888 */
+ "SPI_SHADER_USER_DATA_VS_15\0" /* 4915 */
+ "SPI_SHADER_TBA_LO_GS\0" /* 4942 */
+ "SPI_SHADER_TBA_HI_GS\0" /* 4963 */
+ "SPI_SHADER_TMA_LO_GS\0" /* 4984 */
+ "SPI_SHADER_TMA_HI_GS\0" /* 5005 */
+ "SPI_SHADER_PGM_RSRC3_GS\0" /* 5026 */
+ "SPI_SHADER_PGM_LO_GS\0" /* 5050 */
+ "SPI_SHADER_PGM_HI_GS\0" /* 5071 */
+ "SPI_SHADER_PGM_RSRC1_GS\0" /* 5092 */
+ "SPI_SHADER_PGM_RSRC2_GS\0" /* 5116 */
+ "SPI_SHADER_USER_DATA_GS_0\0" /* 5140 */
+ "SPI_SHADER_USER_DATA_GS_1\0" /* 5166 */
+ "SPI_SHADER_USER_DATA_GS_2\0" /* 5192 */
+ "SPI_SHADER_USER_DATA_GS_3\0" /* 5218 */
+ "SPI_SHADER_USER_DATA_GS_4\0" /* 5244 */
+ "SPI_SHADER_USER_DATA_GS_5\0" /* 5270 */
+ "SPI_SHADER_USER_DATA_GS_6\0" /* 5296 */
+ "SPI_SHADER_USER_DATA_GS_7\0" /* 5322 */
+ "SPI_SHADER_USER_DATA_GS_8\0" /* 5348 */
+ "SPI_SHADER_USER_DATA_GS_9\0" /* 5374 */
+ "SPI_SHADER_USER_DATA_GS_10\0" /* 5400 */
+ "SPI_SHADER_USER_DATA_GS_11\0" /* 5427 */
+ "SPI_SHADER_USER_DATA_GS_12\0" /* 5454 */
+ "SPI_SHADER_USER_DATA_GS_13\0" /* 5481 */
+ "SPI_SHADER_USER_DATA_GS_14\0" /* 5508 */
+ "SPI_SHADER_USER_DATA_GS_15\0" /* 5535 */
+ "SPI_SHADER_TBA_LO_ES\0" /* 5562 */
+ "SPI_SHADER_TBA_HI_ES\0" /* 5583 */
+ "SPI_SHADER_TMA_LO_ES\0" /* 5604 */
+ "SPI_SHADER_TMA_HI_ES\0" /* 5625 */
+ "SPI_SHADER_PGM_RSRC3_ES\0" /* 5646 */
+ "SPI_SHADER_PGM_LO_ES\0" /* 5670 */
+ "SPI_SHADER_PGM_HI_ES\0" /* 5691 */
+ "SPI_SHADER_PGM_RSRC1_ES\0" /* 5712 */
+ "SPI_SHADER_PGM_RSRC2_ES\0" /* 5736 */
+ "SPI_SHADER_USER_DATA_ES_0\0" /* 5760 */
+ "SPI_SHADER_USER_DATA_ES_1\0" /* 5786 */
+ "SPI_SHADER_USER_DATA_ES_2\0" /* 5812 */
+ "SPI_SHADER_USER_DATA_ES_3\0" /* 5838 */
+ "SPI_SHADER_USER_DATA_ES_4\0" /* 5864 */
+ "SPI_SHADER_USER_DATA_ES_5\0" /* 5890 */
+ "SPI_SHADER_USER_DATA_ES_6\0" /* 5916 */
+ "SPI_SHADER_USER_DATA_ES_7\0" /* 5942 */
+ "SPI_SHADER_USER_DATA_ES_8\0" /* 5968 */
+ "SPI_SHADER_USER_DATA_ES_9\0" /* 5994 */
+ "SPI_SHADER_USER_DATA_ES_10\0" /* 6020 */
+ "SPI_SHADER_USER_DATA_ES_11\0" /* 6047 */
+ "SPI_SHADER_USER_DATA_ES_12\0" /* 6074 */
+ "SPI_SHADER_USER_DATA_ES_13\0" /* 6101 */
+ "SPI_SHADER_USER_DATA_ES_14\0" /* 6128 */
+ "SPI_SHADER_USER_DATA_ES_15\0" /* 6155 */
+ "SPI_SHADER_TBA_LO_HS\0" /* 6182 */
+ "SPI_SHADER_TBA_HI_HS\0" /* 6203 */
+ "SPI_SHADER_TMA_LO_HS\0" /* 6224 */
+ "SPI_SHADER_TMA_HI_HS\0" /* 6245 */
+ "SPI_SHADER_PGM_RSRC3_HS\0" /* 6266 */
+ "SPI_SHADER_PGM_LO_HS\0" /* 6290 */
+ "SPI_SHADER_PGM_HI_HS\0" /* 6311 */
+ "SPI_SHADER_PGM_RSRC1_HS\0" /* 6332 */
+ "SPI_SHADER_PGM_RSRC2_HS\0" /* 6356 */
+ "SPI_SHADER_USER_DATA_HS_0\0" /* 6380 */
+ "SPI_SHADER_USER_DATA_HS_1\0" /* 6406 */
+ "SPI_SHADER_USER_DATA_HS_2\0" /* 6432 */
+ "SPI_SHADER_USER_DATA_HS_3\0" /* 6458 */
+ "SPI_SHADER_USER_DATA_HS_4\0" /* 6484 */
+ "SPI_SHADER_USER_DATA_HS_5\0" /* 6510 */
+ "SPI_SHADER_USER_DATA_HS_6\0" /* 6536 */
+ "SPI_SHADER_USER_DATA_HS_7\0" /* 6562 */
+ "SPI_SHADER_USER_DATA_HS_8\0" /* 6588 */
+ "SPI_SHADER_USER_DATA_HS_9\0" /* 6614 */
+ "SPI_SHADER_USER_DATA_HS_10\0" /* 6640 */
+ "SPI_SHADER_USER_DATA_HS_11\0" /* 6667 */
+ "SPI_SHADER_USER_DATA_HS_12\0" /* 6694 */
+ "SPI_SHADER_USER_DATA_HS_13\0" /* 6721 */
+ "SPI_SHADER_USER_DATA_HS_14\0" /* 6748 */
+ "SPI_SHADER_USER_DATA_HS_15\0" /* 6775 */
+ "SPI_SHADER_TBA_LO_LS\0" /* 6802 */
+ "SPI_SHADER_TBA_HI_LS\0" /* 6823 */
+ "SPI_SHADER_TMA_LO_LS\0" /* 6844 */
+ "SPI_SHADER_TMA_HI_LS\0" /* 6865 */
+ "SPI_SHADER_PGM_RSRC3_LS\0" /* 6886 */
+ "SPI_SHADER_PGM_LO_LS\0" /* 6910 */
+ "SPI_SHADER_PGM_HI_LS\0" /* 6931 */
+ "SPI_SHADER_PGM_RSRC1_LS\0" /* 6952 */
+ "SPI_SHADER_PGM_RSRC2_LS\0" /* 6976 */
+ "SPI_SHADER_USER_DATA_LS_0\0" /* 7000 */
+ "SPI_SHADER_USER_DATA_LS_1\0" /* 7026 */
+ "SPI_SHADER_USER_DATA_LS_2\0" /* 7052 */
+ "SPI_SHADER_USER_DATA_LS_3\0" /* 7078 */
+ "SPI_SHADER_USER_DATA_LS_4\0" /* 7104 */
+ "SPI_SHADER_USER_DATA_LS_5\0" /* 7130 */
+ "SPI_SHADER_USER_DATA_LS_6\0" /* 7156 */
+ "SPI_SHADER_USER_DATA_LS_7\0" /* 7182 */
+ "SPI_SHADER_USER_DATA_LS_8\0" /* 7208 */
+ "SPI_SHADER_USER_DATA_LS_9\0" /* 7234 */
+ "SPI_SHADER_USER_DATA_LS_10\0" /* 7260 */
+ "SPI_SHADER_USER_DATA_LS_11\0" /* 7287 */
+ "SPI_SHADER_USER_DATA_LS_12\0" /* 7314 */
+ "SPI_SHADER_USER_DATA_LS_13\0" /* 7341 */
+ "SPI_SHADER_USER_DATA_LS_14\0" /* 7368 */
+ "SPI_SHADER_USER_DATA_LS_15\0" /* 7395 */
+ "COMPUTE_DISPATCH_INITIATOR\0" /* 7422 */
+ "COMPUTE_DIM_X\0" /* 7449 */
+ "COMPUTE_DIM_Y\0" /* 7463 */
+ "COMPUTE_DIM_Z\0" /* 7477 */
+ "COMPUTE_START_X\0" /* 7499, 7491 */
+ "COMPUTE_START_Y\0" /* 7515, 7507 */
+ "COMPUTE_START_Z\0" /* 7523 */
+ "COMPUTE_NUM_THREAD_X\0" /* 7539 */
+ "COMPUTE_NUM_THREAD_Y\0" /* 7560 */
+ "COMPUTE_NUM_THREAD_Z\0" /* 7581 */
+ "COMPUTE_MAX_WAVE_ID\0" /* 7602 */
+ "COMPUTE_PIPELINESTAT_ENABLE\0" /* 7643, 7630, 7622 */
+ "COMPUTE_PERFCOUNT_ENABLE\0" /* 7650, 7658 */
+ "COMPUTE_PGM_LO\0" /* 7675 */
+ "COMPUTE_PGM_HI\0" /* 7690 */
+ "COMPUTE_TBA_LO\0" /* 7705 */
+ "COMPUTE_TBA_HI\0" /* 7720 */
+ "COMPUTE_TMA_LO\0" /* 7735 */
+ "COMPUTE_TMA_HI\0" /* 7750 */
+ "COMPUTE_PGM_RSRC1\0" /* 7765 */
+ "COMPUTE_PGM_RSRC2\0" /* 7783 */
+ "COMPUTE_VMID\0" /* 7801 */
+ "COMPUTE_RESOURCE_LIMITS\0" /* 7814 */
+ "COMPUTE_STATIC_THREAD_MGMT_SE0\0" /* 7838 */
+ "COMPUTE_STATIC_THREAD_MGMT_SE1\0" /* 7869 */
+ "COMPUTE_TMPRING_SIZE\0" /* 7900 */
+ "COMPUTE_STATIC_THREAD_MGMT_SE2\0" /* 7921 */
+ "COMPUTE_STATIC_THREAD_MGMT_SE3\0" /* 7952 */
+ "COMPUTE_RESTART_X\0" /* 7983 */
+ "COMPUTE_RESTART_Y\0" /* 8001 */
+ "COMPUTE_RESTART_Z\0" /* 8019 */
+ "COMPUTE_MISC_RESERVED\0" /* 8050, 8037 */
+ "COMPUTE_DISPATCH_ID\0" /* 8059 */
+ "COMPUTE_THREADGROUP_ID\0" /* 8079 */
+ "COMPUTE_RELAUNCH\0" /* 8102 */
+ "COMPUTE_WAVE_RESTORE_ADDR_LO\0" /* 8119 */
+ "COMPUTE_WAVE_RESTORE_ADDR_HI\0" /* 8148 */
+ "COMPUTE_WAVE_RESTORE_CONTROL\0" /* 8177 */
+ "COMPUTE_USER_DATA_0\0" /* 8206 */
+ "COMPUTE_USER_DATA_1\0" /* 8226 */
+ "COMPUTE_USER_DATA_2\0" /* 8246 */
+ "COMPUTE_USER_DATA_3\0" /* 8266 */
+ "COMPUTE_USER_DATA_4\0" /* 8286 */
+ "COMPUTE_USER_DATA_5\0" /* 8306 */
+ "COMPUTE_USER_DATA_6\0" /* 8326 */
+ "COMPUTE_USER_DATA_7\0" /* 8346 */
+ "COMPUTE_USER_DATA_8\0" /* 8366 */
+ "COMPUTE_USER_DATA_9\0" /* 8386 */
+ "COMPUTE_USER_DATA_10\0" /* 8406 */
+ "COMPUTE_USER_DATA_11\0" /* 8427 */
+ "COMPUTE_USER_DATA_12\0" /* 8448 */
+ "COMPUTE_USER_DATA_13\0" /* 8469 */
+ "COMPUTE_USER_DATA_14\0" /* 8490 */
+ "COMPUTE_USER_DATA_15\0" /* 8511 */
+ "COMPUTE_NOWHERE\0" /* 8540, 8532 */
+ "CPG_PERFCOUNTER1_LO\0" /* 8548 */
+ "CPG_PERFCOUNTER1_HI\0" /* 8568 */
+ "CPG_PERFCOUNTER0_LO\0" /* 8588 */
+ "CPG_PERFCOUNTER0_HI\0" /* 8608 */
+ "CPC_PERFCOUNTER1_LO\0" /* 8628 */
+ "CPC_PERFCOUNTER1_HI\0" /* 8648 */
+ "CPC_PERFCOUNTER0_LO\0" /* 8668 */
+ "CPC_PERFCOUNTER0_HI\0" /* 8688 */
+ "CPF_PERFCOUNTER1_LO\0" /* 8708 */
+ "CPF_PERFCOUNTER1_HI\0" /* 8728 */
+ "CPF_PERFCOUNTER0_LO\0" /* 8748 */
+ "CPF_PERFCOUNTER0_HI\0" /* 8768 */
+ "GRBM_PERFCOUNTER0_LO\0" /* 8788 */
+ "GRBM_PERFCOUNTER0_HI\0" /* 8809 */
+ "GRBM_PERFCOUNTER1_LO\0" /* 8830 */
+ "GRBM_PERFCOUNTER1_HI\0" /* 8851 */
+ "GRBM_SE0_PERFCOUNTER_LO\0" /* 8872 */
+ "GRBM_SE0_PERFCOUNTER_HI\0" /* 8896, 8905 */
+ "GRBM_SE1_PERFCOUNTER_LO\0" /* 8920 */
+ "GRBM_SE1_PERFCOUNTER_HI\0" /* 8944 */
+ "GRBM_SE2_PERFCOUNTER_LO\0" /* 8968 */
+ "GRBM_SE2_PERFCOUNTER_HI\0" /* 8992 */
+ "GRBM_SE3_PERFCOUNTER_LO\0" /* 9016 */
+ "GRBM_SE3_PERFCOUNTER_HI\0" /* 9040 */
+ "WD_PERFCOUNTER0_LO\0" /* 9064 */
+ "WD_PERFCOUNTER0_HI\0" /* 9083 */
+ "WD_PERFCOUNTER1_LO\0" /* 9102 */
+ "WD_PERFCOUNTER1_HI\0" /* 9121 */
+ "WD_PERFCOUNTER2_LO\0" /* 9140 */
+ "WD_PERFCOUNTER2_HI\0" /* 9159 */
+ "WD_PERFCOUNTER3_LO\0" /* 9178 */
+ "WD_PERFCOUNTER3_HI\0" /* 9197 */
+ "IA_PERFCOUNTER0_LO\0" /* 9216 */
+ "IA_PERFCOUNTER0_HI\0" /* 9235 */
+ "IA_PERFCOUNTER1_LO\0" /* 9254 */
+ "IA_PERFCOUNTER1_HI\0" /* 9273 */
+ "IA_PERFCOUNTER2_LO\0" /* 9292 */
+ "IA_PERFCOUNTER2_HI\0" /* 9311 */
+ "IA_PERFCOUNTER3_LO\0" /* 9330 */
+ "IA_PERFCOUNTER3_HI\0" /* 9349 */
+ "VGT_PERFCOUNTER0_LO\0" /* 9368 */
+ "VGT_PERFCOUNTER0_HI\0" /* 9388 */
+ "VGT_PERFCOUNTER1_LO\0" /* 9408 */
+ "VGT_PERFCOUNTER1_HI\0" /* 9428 */
+ "VGT_PERFCOUNTER2_LO\0" /* 9448 */
+ "VGT_PERFCOUNTER2_HI\0" /* 9468 */
+ "VGT_PERFCOUNTER3_LO\0" /* 9488 */
+ "VGT_PERFCOUNTER3_HI\0" /* 9508 */
+ "PA_SU_PERFCOUNTER0_LO\0" /* 9528 */
+ "PA_SU_PERFCOUNTER0_HI\0" /* 9550 */
+ "PA_SU_PERFCOUNTER1_LO\0" /* 9572 */
+ "PA_SU_PERFCOUNTER1_HI\0" /* 9594 */
+ "PA_SU_PERFCOUNTER2_LO\0" /* 9616 */
+ "PA_SU_PERFCOUNTER2_HI\0" /* 9638 */
+ "PA_SU_PERFCOUNTER3_LO\0" /* 9660 */
+ "PA_SU_PERFCOUNTER3_HI\0" /* 9682 */
+ "PA_SC_PERFCOUNTER0_LO\0" /* 9704 */
+ "PA_SC_PERFCOUNTER0_HI\0" /* 9726 */
+ "PA_SC_PERFCOUNTER1_LO\0" /* 9748 */
+ "PA_SC_PERFCOUNTER1_HI\0" /* 9770 */
+ "PA_SC_PERFCOUNTER2_LO\0" /* 9792 */
+ "PA_SC_PERFCOUNTER2_HI\0" /* 9814 */
+ "PA_SC_PERFCOUNTER3_LO\0" /* 9836 */
+ "PA_SC_PERFCOUNTER3_HI\0" /* 9858 */
+ "PA_SC_PERFCOUNTER4_LO\0" /* 9880 */
+ "PA_SC_PERFCOUNTER4_HI\0" /* 9902 */
+ "PA_SC_PERFCOUNTER5_LO\0" /* 9924 */
+ "PA_SC_PERFCOUNTER5_HI\0" /* 9946 */
+ "PA_SC_PERFCOUNTER6_LO\0" /* 9968 */
+ "PA_SC_PERFCOUNTER6_HI\0" /* 9990 */
+ "PA_SC_PERFCOUNTER7_LO\0" /* 10012 */
+ "PA_SC_PERFCOUNTER7_HI\0" /* 10034 */
+ "SPI_PERFCOUNTER0_HI\0" /* 10056 */
+ "SPI_PERFCOUNTER0_LO\0" /* 10076 */
+ "SPI_PERFCOUNTER1_HI\0" /* 10096 */
+ "SPI_PERFCOUNTER1_LO\0" /* 10116 */
+ "SPI_PERFCOUNTER2_HI\0" /* 10136 */
+ "SPI_PERFCOUNTER2_LO\0" /* 10156 */
+ "SPI_PERFCOUNTER3_HI\0" /* 10176 */
+ "SPI_PERFCOUNTER3_LO\0" /* 10196 */
+ "SPI_PERFCOUNTER4_HI\0" /* 10216 */
+ "SPI_PERFCOUNTER4_LO\0" /* 10236 */
+ "SPI_PERFCOUNTER5_HI\0" /* 10256 */
+ "SPI_PERFCOUNTER5_LO\0" /* 10276 */
+ "SQ_PERFCOUNTER0_LO\0" /* 10296 */
+ "SQ_PERFCOUNTER0_HI\0" /* 10315 */
+ "SQ_PERFCOUNTER1_LO\0" /* 10334 */
+ "SQ_PERFCOUNTER1_HI\0" /* 10353 */
+ "SQ_PERFCOUNTER2_LO\0" /* 10372 */
+ "SQ_PERFCOUNTER2_HI\0" /* 10391 */
+ "SQ_PERFCOUNTER3_LO\0" /* 10410 */
+ "SQ_PERFCOUNTER3_HI\0" /* 10429 */
+ "SQ_PERFCOUNTER4_LO\0" /* 10448 */
+ "SQ_PERFCOUNTER4_HI\0" /* 10467 */
+ "SQ_PERFCOUNTER5_LO\0" /* 10486 */
+ "SQ_PERFCOUNTER5_HI\0" /* 10505 */
+ "SQ_PERFCOUNTER6_LO\0" /* 10524 */
+ "SQ_PERFCOUNTER6_HI\0" /* 10543 */
+ "SQ_PERFCOUNTER7_LO\0" /* 10562 */
+ "SQ_PERFCOUNTER7_HI\0" /* 10581 */
+ "SQ_PERFCOUNTER8_LO\0" /* 10600 */
+ "SQ_PERFCOUNTER8_HI\0" /* 10619 */
+ "SQ_PERFCOUNTER9_LO\0" /* 10638 */
+ "SQ_PERFCOUNTER9_HI\0" /* 10657 */
+ "SQ_PERFCOUNTER10_LO\0" /* 10676 */
+ "SQ_PERFCOUNTER10_HI\0" /* 10696 */
+ "SQ_PERFCOUNTER11_LO\0" /* 10716 */
+ "SQ_PERFCOUNTER11_HI\0" /* 10736 */
+ "SQ_PERFCOUNTER12_LO\0" /* 10756 */
+ "SQ_PERFCOUNTER12_HI\0" /* 10776 */
+ "SQ_PERFCOUNTER13_LO\0" /* 10796 */
+ "SQ_PERFCOUNTER13_HI\0" /* 10816 */
+ "SQ_PERFCOUNTER14_LO\0" /* 10836 */
+ "SQ_PERFCOUNTER14_HI\0" /* 10856 */
+ "SQ_PERFCOUNTER15_LO\0" /* 10876 */
+ "SQ_PERFCOUNTER15_HI\0" /* 10896 */
+ "SX_PERFCOUNTER0_LO\0" /* 10916 */
+ "SX_PERFCOUNTER0_HI\0" /* 10935 */
+ "SX_PERFCOUNTER1_LO\0" /* 10954 */
+ "SX_PERFCOUNTER1_HI\0" /* 10973 */
+ "SX_PERFCOUNTER2_LO\0" /* 10992 */
+ "SX_PERFCOUNTER2_HI\0" /* 11011 */
+ "SX_PERFCOUNTER3_LO\0" /* 11030 */
+ "SX_PERFCOUNTER3_HI\0" /* 11049 */
+ "GDS_PERFCOUNTER0_LO\0" /* 11068 */
+ "GDS_PERFCOUNTER0_HI\0" /* 11088 */
+ "GDS_PERFCOUNTER1_LO\0" /* 11108 */
+ "GDS_PERFCOUNTER1_HI\0" /* 11128 */
+ "GDS_PERFCOUNTER2_LO\0" /* 11148 */
+ "GDS_PERFCOUNTER2_HI\0" /* 11168 */
+ "GDS_PERFCOUNTER3_LO\0" /* 11188 */
+ "GDS_PERFCOUNTER3_HI\0" /* 11208 */
+ "TA_PERFCOUNTER0_LO\0" /* 11228 */
+ "TA_PERFCOUNTER0_HI\0" /* 11247 */
+ "TA_PERFCOUNTER1_LO\0" /* 11266 */
+ "TA_PERFCOUNTER1_HI\0" /* 11285 */
+ "TD_PERFCOUNTER0_LO\0" /* 11304 */
+ "TD_PERFCOUNTER0_HI\0" /* 11323 */
+ "TD_PERFCOUNTER1_LO\0" /* 11342 */
+ "TD_PERFCOUNTER1_HI\0" /* 11361 */
+ "TCP_PERFCOUNTER0_LO\0" /* 11380 */
+ "TCP_PERFCOUNTER0_HI\0" /* 11400 */
+ "TCP_PERFCOUNTER1_LO\0" /* 11420 */
+ "TCP_PERFCOUNTER1_HI\0" /* 11440 */
+ "TCP_PERFCOUNTER2_LO\0" /* 11460 */
+ "TCP_PERFCOUNTER2_HI\0" /* 11480 */
+ "TCP_PERFCOUNTER3_LO\0" /* 11500 */
+ "TCP_PERFCOUNTER3_HI\0" /* 11520 */
+ "TCC_PERFCOUNTER0_LO\0" /* 11540 */
+ "TCC_PERFCOUNTER0_HI\0" /* 11560 */
+ "TCC_PERFCOUNTER1_LO\0" /* 11580 */
+ "TCC_PERFCOUNTER1_HI\0" /* 11600 */
+ "TCC_PERFCOUNTER2_LO\0" /* 11620 */
+ "TCC_PERFCOUNTER2_HI\0" /* 11640 */
+ "TCC_PERFCOUNTER3_LO\0" /* 11660 */
+ "TCC_PERFCOUNTER3_HI\0" /* 11680 */
+ "TCA_PERFCOUNTER0_LO\0" /* 11700 */
+ "TCA_PERFCOUNTER0_HI\0" /* 11720 */
+ "TCA_PERFCOUNTER1_LO\0" /* 11740 */
+ "TCA_PERFCOUNTER1_HI\0" /* 11760 */
+ "TCA_PERFCOUNTER2_LO\0" /* 11780 */
+ "TCA_PERFCOUNTER2_HI\0" /* 11800 */
+ "TCA_PERFCOUNTER3_LO\0" /* 11820 */
+ "TCA_PERFCOUNTER3_HI\0" /* 11840 */
+ "CB_PERFCOUNTER0_LO\0" /* 11860 */
+ "CB_PERFCOUNTER0_HI\0" /* 11879 */
+ "CB_PERFCOUNTER1_LO\0" /* 11898 */
+ "CB_PERFCOUNTER1_HI\0" /* 11917 */
+ "CB_PERFCOUNTER2_LO\0" /* 11936 */
+ "CB_PERFCOUNTER2_HI\0" /* 11955 */
+ "CB_PERFCOUNTER3_LO\0" /* 11974 */
+ "CB_PERFCOUNTER3_HI\0" /* 11993 */
+ "DB_PERFCOUNTER0_LO\0" /* 12012 */
+ "DB_PERFCOUNTER0_HI\0" /* 12031 */
+ "DB_PERFCOUNTER1_LO\0" /* 12050 */
+ "DB_PERFCOUNTER1_HI\0" /* 12069 */
+ "DB_PERFCOUNTER2_LO\0" /* 12088 */
+ "DB_PERFCOUNTER2_HI\0" /* 12107 */
+ "DB_PERFCOUNTER3_LO\0" /* 12126 */
+ "DB_PERFCOUNTER3_HI\0" /* 12145 */
+ "RLC_PERFCOUNTER0_LO\0" /* 12164 */
+ "RLC_PERFCOUNTER0_HI\0" /* 12184 */
+ "RLC_PERFCOUNTER1_LO\0" /* 12204 */
+ "RLC_PERFCOUNTER1_HI\0" /* 12224 */
+ "CPG_PERFCOUNTER1_SELECT\0" /* 12244 */
+ "CPG_PERFCOUNTER0_SELECT1\0" /* 12268 */
+ "CPG_PERFCOUNTER0_SELECT\0" /* 12293 */
+ "CPC_PERFCOUNTER1_SELECT\0" /* 12317 */
+ "CPC_PERFCOUNTER0_SELECT1\0" /* 12341 */
+ "CPF_PERFCOUNTER1_SELECT\0" /* 12366 */
+ "CPF_PERFCOUNTER0_SELECT1\0" /* 12390 */
+ "CPF_PERFCOUNTER0_SELECT\0" /* 12415 */
+ "CP_PERFMON_CNTL\0" /* 12439 */
+ "CPC_PERFCOUNTER0_SELECT\0" /* 12455 */
+ "GRBM_PERFCOUNTER0_SELECT\0" /* 12479 */
+ "GRBM_PERFCOUNTER1_SELECT\0" /* 12504 */
+ "GRBM_SE0_PERFCOUNTER_SELECT\0" /* 12529, 12538 */
+ "GRBM_SE1_PERFCOUNTER_SELECT\0" /* 12557 */
+ "GRBM_SE2_PERFCOUNTER_SELECT\0" /* 12585 */
+ "GRBM_SE3_PERFCOUNTER_SELECT\0" /* 12613 */
+ "WD_PERFCOUNTER0_SELECT\0" /* 12641 */
+ "WD_PERFCOUNTER1_SELECT\0" /* 12664 */
+ "WD_PERFCOUNTER2_SELECT\0" /* 12687 */
+ "WD_PERFCOUNTER3_SELECT\0" /* 12710 */
+ "IA_PERFCOUNTER0_SELECT\0" /* 12733 */
+ "IA_PERFCOUNTER1_SELECT\0" /* 12756 */
+ "IA_PERFCOUNTER2_SELECT\0" /* 12779 */
+ "IA_PERFCOUNTER3_SELECT\0" /* 12802 */
+ "IA_PERFCOUNTER0_SELECT1\0" /* 12825 */
+ "VGT_PERFCOUNTER0_SELECT\0" /* 12849 */
+ "VGT_PERFCOUNTER1_SELECT\0" /* 12873 */
+ "VGT_PERFCOUNTER2_SELECT\0" /* 12897 */
+ "VGT_PERFCOUNTER3_SELECT\0" /* 12921 */
+ "VGT_PERFCOUNTER0_SELECT1\0" /* 12945 */
+ "VGT_PERFCOUNTER1_SELECT1\0" /* 12970 */
+ "VGT_PERFCOUNTER_SEID_MASK\0" /* 12995 */
+ "PA_SU_PERFCOUNTER0_SELECT\0" /* 13021 */
+ "PA_SU_PERFCOUNTER0_SELECT1\0" /* 13047 */
+ "PA_SU_PERFCOUNTER1_SELECT\0" /* 13074 */
+ "PA_SU_PERFCOUNTER1_SELECT1\0" /* 13100 */
+ "PA_SU_PERFCOUNTER2_SELECT\0" /* 13127 */
+ "PA_SU_PERFCOUNTER3_SELECT\0" /* 13153 */
+ "PA_SC_PERFCOUNTER0_SELECT\0" /* 13179 */
+ "PA_SC_PERFCOUNTER0_SELECT1\0" /* 13205 */
+ "PA_SC_PERFCOUNTER1_SELECT\0" /* 13232 */
+ "PA_SC_PERFCOUNTER2_SELECT\0" /* 13258 */
+ "PA_SC_PERFCOUNTER3_SELECT\0" /* 13284 */
+ "PA_SC_PERFCOUNTER4_SELECT\0" /* 13310 */
+ "PA_SC_PERFCOUNTER5_SELECT\0" /* 13336 */
+ "PA_SC_PERFCOUNTER6_SELECT\0" /* 13362 */
+ "PA_SC_PERFCOUNTER7_SELECT\0" /* 13388 */
+ "SPI_PERFCOUNTER0_SELECT\0" /* 13414 */
+ "SPI_PERFCOUNTER1_SELECT\0" /* 13438 */
+ "SPI_PERFCOUNTER2_SELECT\0" /* 13462 */
+ "SPI_PERFCOUNTER3_SELECT\0" /* 13486 */
+ "SPI_PERFCOUNTER0_SELECT1\0" /* 13510 */
+ "SPI_PERFCOUNTER1_SELECT1\0" /* 13535 */
+ "SPI_PERFCOUNTER2_SELECT1\0" /* 13560 */
+ "SPI_PERFCOUNTER3_SELECT1\0" /* 13585 */
+ "SPI_PERFCOUNTER4_SELECT\0" /* 13610 */
+ "SPI_PERFCOUNTER5_SELECT\0" /* 13634 */
+ "SPI_PERFCOUNTER_BINS\0" /* 13658 */
+ "SQ_PERFCOUNTER0_SELECT\0" /* 13679 */
+ "SQ_PERFCOUNTER1_SELECT\0" /* 13702 */
+ "SQ_PERFCOUNTER2_SELECT\0" /* 13725 */
+ "SQ_PERFCOUNTER3_SELECT\0" /* 13748 */
+ "SQ_PERFCOUNTER4_SELECT\0" /* 13771 */
+ "SQ_PERFCOUNTER5_SELECT\0" /* 13794 */
+ "SQ_PERFCOUNTER6_SELECT\0" /* 13817 */
+ "SQ_PERFCOUNTER7_SELECT\0" /* 13840 */
+ "SQ_PERFCOUNTER8_SELECT\0" /* 13863 */
+ "SQ_PERFCOUNTER9_SELECT\0" /* 13886 */
+ "SQ_PERFCOUNTER10_SELECT\0" /* 13909 */
+ "SQ_PERFCOUNTER11_SELECT\0" /* 13933 */
+ "SQ_PERFCOUNTER12_SELECT\0" /* 13957 */
+ "SQ_PERFCOUNTER13_SELECT\0" /* 13981 */
+ "SQ_PERFCOUNTER14_SELECT\0" /* 14005 */
+ "SQ_PERFCOUNTER15_SELECT\0" /* 14029 */
+ "SQ_PERFCOUNTER_CTRL\0" /* 14053 */
+ "SQ_PERFCOUNTER_MASK\0" /* 14073 */
+ "SQ_PERFCOUNTER_CTRL2\0" /* 14093 */
+ "SX_PERFCOUNTER0_SELECT\0" /* 14114 */
+ "SX_PERFCOUNTER1_SELECT\0" /* 14137 */
+ "SX_PERFCOUNTER2_SELECT\0" /* 14160 */
+ "SX_PERFCOUNTER3_SELECT\0" /* 14183 */
+ "SX_PERFCOUNTER0_SELECT1\0" /* 14206 */
+ "SX_PERFCOUNTER1_SELECT1\0" /* 14230 */
+ "GDS_PERFCOUNTER0_SELECT\0" /* 14254 */
+ "GDS_PERFCOUNTER1_SELECT\0" /* 14278 */
+ "GDS_PERFCOUNTER2_SELECT\0" /* 14302 */
+ "GDS_PERFCOUNTER3_SELECT\0" /* 14326 */
+ "GDS_PERFCOUNTER0_SELECT1\0" /* 14350 */
+ "TA_PERFCOUNTER0_SELECT\0" /* 14375 */
+ "TA_PERFCOUNTER0_SELECT1\0" /* 14398 */
+ "TA_PERFCOUNTER1_SELECT\0" /* 14422 */
+ "TD_PERFCOUNTER0_SELECT\0" /* 14445 */
+ "TD_PERFCOUNTER0_SELECT1\0" /* 14468 */
+ "TD_PERFCOUNTER1_SELECT\0" /* 14492 */
+ "TCP_PERFCOUNTER0_SELECT\0" /* 14515 */
+ "TCP_PERFCOUNTER0_SELECT1\0" /* 14539 */
+ "TCP_PERFCOUNTER1_SELECT\0" /* 14564 */
+ "TCP_PERFCOUNTER1_SELECT1\0" /* 14588 */
+ "TCP_PERFCOUNTER2_SELECT\0" /* 14613 */
+ "TCP_PERFCOUNTER3_SELECT\0" /* 14637 */
+ "TCC_PERFCOUNTER0_SELECT\0" /* 14661 */
+ "TCC_PERFCOUNTER0_SELECT1\0" /* 14685 */
+ "TCC_PERFCOUNTER1_SELECT\0" /* 14710 */
+ "TCC_PERFCOUNTER1_SELECT1\0" /* 14734 */
+ "TCC_PERFCOUNTER2_SELECT\0" /* 14759 */
+ "TCC_PERFCOUNTER3_SELECT\0" /* 14783 */
+ "TCA_PERFCOUNTER0_SELECT\0" /* 14807 */
+ "TCA_PERFCOUNTER0_SELECT1\0" /* 14831 */
+ "TCA_PERFCOUNTER1_SELECT\0" /* 14856 */
+ "TCA_PERFCOUNTER1_SELECT1\0" /* 14880 */
+ "TCA_PERFCOUNTER2_SELECT\0" /* 14905 */
+ "TCA_PERFCOUNTER3_SELECT\0" /* 14929 */
+ "CB_PERFCOUNTER_FILTER\0" /* 14953 */
+ "CB_PERFCOUNTER0_SELECT\0" /* 14975 */
+ "CB_PERFCOUNTER0_SELECT1\0" /* 14998 */
+ "CB_PERFCOUNTER1_SELECT\0" /* 15022 */
+ "CB_PERFCOUNTER2_SELECT\0" /* 15045 */
+ "CB_PERFCOUNTER3_SELECT\0" /* 15068 */
+ "DB_PERFCOUNTER0_SELECT\0" /* 15091 */
+ "DB_PERFCOUNTER0_SELECT1\0" /* 15114 */
+ "DB_PERFCOUNTER1_SELECT\0" /* 15138 */
+ "DB_PERFCOUNTER1_SELECT1\0" /* 15161 */
+ "DB_PERFCOUNTER2_SELECT\0" /* 15185 */
+ "DB_PERFCOUNTER3_SELECT\0" /* 15208 */
+ "DB_RENDER_CONTROL\0" /* 15231 */
+ "DB_COUNT_CONTROL\0" /* 15249 */
+ "DB_DEPTH_VIEW\0" /* 15266 */
+ "DB_RENDER_OVERRIDE\0" /* 15280, 15296 */
+ "DB_RENDER_OVERRIDE2\0" /* 15299 */
+ "DB_HTILE_DATA_BASE\0" /* 15319 */
+ "DB_DEPTH_BOUNDS_MIN\0" /* 15338 */
+ "DB_DEPTH_BOUNDS_MAX\0" /* 15358 */
+ "DB_STENCIL_CLEAR\0" /* 15378, 15389 */
+ "DB_DEPTH_CLEAR\0" /* 15395 */
+ "PA_SC_SCREEN_SCISSOR_TL\0" /* 15410 */
+ "PA_SC_SCREEN_SCISSOR_BR\0" /* 15434 */
+ "DB_DEPTH_INFO\0" /* 15458 */
+ "DB_Z_INFO\0" /* 15472 */
+ "DB_STENCIL_INFO\0" /* 15482 */
+ "DB_Z_READ_BASE\0" /* 15498 */
+ "DB_STENCIL_READ_BASE\0" /* 15513 */
+ "DB_Z_WRITE_BASE\0" /* 15534 */
+ "DB_STENCIL_WRITE_BASE\0" /* 15550 */
+ "DB_DEPTH_SIZE\0" /* 15572 */
+ "DB_DEPTH_SLICE\0" /* 15586 */
+ "TA_BC_BASE_ADDR\0" /* 15601 */
+ "TA_BC_BASE_ADDR_HI\0" /* 15617 */
+ "COHER_DEST_BASE_HI_0\0" /* 15636 */
+ "COHER_DEST_BASE_HI_1\0" /* 15657 */
+ "COHER_DEST_BASE_HI_2\0" /* 15678 */
+ "COHER_DEST_BASE_HI_3\0" /* 15699 */
+ "COHER_DEST_BASE_2\0" /* 15720 */
+ "COHER_DEST_BASE_3\0" /* 15738 */
+ "PA_SC_WINDOW_OFFSET\0" /* 15756, 15767 */
+ "PA_SC_WINDOW_SCISSOR_TL\0" /* 15776 */
+ "PA_SC_WINDOW_SCISSOR_BR\0" /* 15800 */
+ "PA_SC_CLIPRECT_RULE\0" /* 15824 */
+ "PA_SC_CLIPRECT_0_TL\0" /* 15844 */
+ "PA_SC_CLIPRECT_0_BR\0" /* 15864 */
+ "PA_SC_CLIPRECT_1_TL\0" /* 15884 */
+ "PA_SC_CLIPRECT_1_BR\0" /* 15904 */
+ "PA_SC_CLIPRECT_2_TL\0" /* 15924 */
+ "PA_SC_CLIPRECT_2_BR\0" /* 15944 */
+ "PA_SC_CLIPRECT_3_TL\0" /* 15964 */
+ "PA_SC_CLIPRECT_3_BR\0" /* 15984 */
+ "PA_SC_EDGERULE\0" /* 16004 */
+ "PA_SU_HARDWARE_SCREEN_OFFSET\0" /* 16019 */
+ "CB_TARGET_MASK\0" /* 16048 */
+ "CB_SHADER_MASK\0" /* 16063 */
+ "PA_SC_GENERIC_SCISSOR_TL\0" /* 16078 */
+ "PA_SC_GENERIC_SCISSOR_BR\0" /* 16103 */
+ "COHER_DEST_BASE_0\0" /* 16128 */
+ "COHER_DEST_BASE_1\0" /* 16146 */
+ "PA_SC_VPORT_SCISSOR_0_TL\0" /* 16164 */
+ "PA_SC_VPORT_SCISSOR_0_BR\0" /* 16189 */
+ "PA_SC_VPORT_SCISSOR_1_TL\0" /* 16214 */
+ "PA_SC_VPORT_SCISSOR_1_BR\0" /* 16239 */
+ "PA_SC_VPORT_SCISSOR_2_TL\0" /* 16264 */
+ "PA_SC_VPORT_SCISSOR_2_BR\0" /* 16289 */
+ "PA_SC_VPORT_SCISSOR_3_TL\0" /* 16314 */
+ "PA_SC_VPORT_SCISSOR_3_BR\0" /* 16339 */
+ "PA_SC_VPORT_SCISSOR_4_TL\0" /* 16364 */
+ "PA_SC_VPORT_SCISSOR_4_BR\0" /* 16389 */
+ "PA_SC_VPORT_SCISSOR_5_TL\0" /* 16414 */
+ "PA_SC_VPORT_SCISSOR_5_BR\0" /* 16439 */
+ "PA_SC_VPORT_SCISSOR_6_TL\0" /* 16464 */
+ "PA_SC_VPORT_SCISSOR_6_BR\0" /* 16489 */
+ "PA_SC_VPORT_SCISSOR_7_TL\0" /* 16514 */
+ "PA_SC_VPORT_SCISSOR_7_BR\0" /* 16539 */
+ "PA_SC_VPORT_SCISSOR_8_TL\0" /* 16564 */
+ "PA_SC_VPORT_SCISSOR_8_BR\0" /* 16589 */
+ "PA_SC_VPORT_SCISSOR_9_TL\0" /* 16614 */
+ "PA_SC_VPORT_SCISSOR_9_BR\0" /* 16639 */
+ "PA_SC_VPORT_SCISSOR_10_TL\0" /* 16664 */
+ "PA_SC_VPORT_SCISSOR_10_BR\0" /* 16690 */
+ "PA_SC_VPORT_SCISSOR_11_TL\0" /* 16716 */
+ "PA_SC_VPORT_SCISSOR_11_BR\0" /* 16742 */
+ "PA_SC_VPORT_SCISSOR_12_TL\0" /* 16768 */
+ "PA_SC_VPORT_SCISSOR_12_BR\0" /* 16794 */
+ "PA_SC_VPORT_SCISSOR_13_TL\0" /* 16820 */
+ "PA_SC_VPORT_SCISSOR_13_BR\0" /* 16846 */
+ "PA_SC_VPORT_SCISSOR_14_TL\0" /* 16872 */
+ "PA_SC_VPORT_SCISSOR_14_BR\0" /* 16898 */
+ "PA_SC_VPORT_SCISSOR_15_TL\0" /* 16924 */
+ "PA_SC_VPORT_SCISSOR_15_BR\0" /* 16950 */
+ "PA_SC_VPORT_ZMIN_0\0" /* 16976 */
+ "PA_SC_VPORT_ZMAX_0\0" /* 16995 */
+ "PA_SC_VPORT_ZMIN_1\0" /* 17014 */
+ "PA_SC_VPORT_ZMAX_1\0" /* 17033 */
+ "PA_SC_VPORT_ZMIN_2\0" /* 17052 */
+ "PA_SC_VPORT_ZMAX_2\0" /* 17071 */
+ "PA_SC_VPORT_ZMIN_3\0" /* 17090 */
+ "PA_SC_VPORT_ZMAX_3\0" /* 17109 */
+ "PA_SC_VPORT_ZMIN_4\0" /* 17128 */
+ "PA_SC_VPORT_ZMAX_4\0" /* 17147 */
+ "PA_SC_VPORT_ZMIN_5\0" /* 17166 */
+ "PA_SC_VPORT_ZMAX_5\0" /* 17185 */
+ "PA_SC_VPORT_ZMIN_6\0" /* 17204 */
+ "PA_SC_VPORT_ZMAX_6\0" /* 17223 */
+ "PA_SC_VPORT_ZMIN_7\0" /* 17242 */
+ "PA_SC_VPORT_ZMAX_7\0" /* 17261 */
+ "PA_SC_VPORT_ZMIN_8\0" /* 17280 */
+ "PA_SC_VPORT_ZMAX_8\0" /* 17299 */
+ "PA_SC_VPORT_ZMIN_9\0" /* 17318 */
+ "PA_SC_VPORT_ZMAX_9\0" /* 17337 */
+ "PA_SC_VPORT_ZMIN_10\0" /* 17356 */
+ "PA_SC_VPORT_ZMAX_10\0" /* 17376 */
+ "PA_SC_VPORT_ZMIN_11\0" /* 17396 */
+ "PA_SC_VPORT_ZMAX_11\0" /* 17416 */
+ "PA_SC_VPORT_ZMIN_12\0" /* 17436 */
+ "PA_SC_VPORT_ZMAX_12\0" /* 17456 */
+ "PA_SC_VPORT_ZMIN_13\0" /* 17476 */
+ "PA_SC_VPORT_ZMAX_13\0" /* 17496 */
+ "PA_SC_VPORT_ZMIN_14\0" /* 17516 */
+ "PA_SC_VPORT_ZMAX_14\0" /* 17536 */
+ "PA_SC_VPORT_ZMIN_15\0" /* 17556 */
+ "PA_SC_VPORT_ZMAX_15\0" /* 17576 */
+ "PA_SC_RASTER_CONFIG\0" /* 17596 */
+ "PA_SC_RASTER_CONFIG_1\0" /* 17616 */
+ "PA_SC_SCREEN_EXTENT_CONTROL\0" /* 17638 */
+ "VGT_MAX_VTX_INDX\0" /* 17666 */
+ "VGT_MIN_VTX_INDX\0" /* 17683 */
+ "VGT_INDX_OFFSET\0" /* 17707, 17700 */
+ "VGT_MULTI_PRIM_IB_RESET_INDX\0" /* 17716 */
+ "CB_BLEND_RED\0" /* 17745 */
+ "CB_BLEND_GREEN\0" /* 17758 */
+ "CB_BLEND_BLUE\0" /* 17773 */
+ "CB_BLEND_ALPHA\0" /* 17787 */
+ "CB_DCC_CONTROL\0" /* 17802 */
+ "DB_STENCIL_CONTROL\0" /* 17817 */
+ "DB_STENCILREFMASK\0" /* 17836 */
+ "DB_STENCILREFMASK_BF\0" /* 17854 */
+ "PA_CL_VPORT_XSCALE\0" /* 17875 */
+ "PA_CL_VPORT_XOFFSET\0" /* 17894 */
+ "PA_CL_VPORT_YSCALE\0" /* 17914 */
+ "PA_CL_VPORT_YOFFSET\0" /* 17933 */
+ "PA_CL_VPORT_ZSCALE\0" /* 17953 */
+ "PA_CL_VPORT_ZOFFSET\0" /* 17972 */
+ "PA_CL_VPORT_XSCALE_1\0" /* 17992 */
+ "PA_CL_VPORT_XOFFSET_1\0" /* 18013 */
+ "PA_CL_VPORT_YSCALE_1\0" /* 18035 */
+ "PA_CL_VPORT_YOFFSET_1\0" /* 18056 */
+ "PA_CL_VPORT_ZSCALE_1\0" /* 18078 */
+ "PA_CL_VPORT_ZOFFSET_1\0" /* 18099 */
+ "PA_CL_VPORT_XSCALE_2\0" /* 18121 */
+ "PA_CL_VPORT_XOFFSET_2\0" /* 18142 */
+ "PA_CL_VPORT_YSCALE_2\0" /* 18164 */
+ "PA_CL_VPORT_YOFFSET_2\0" /* 18185 */
+ "PA_CL_VPORT_ZSCALE_2\0" /* 18207 */
+ "PA_CL_VPORT_ZOFFSET_2\0" /* 18228 */
+ "PA_CL_VPORT_XSCALE_3\0" /* 18250 */
+ "PA_CL_VPORT_XOFFSET_3\0" /* 18271 */
+ "PA_CL_VPORT_YSCALE_3\0" /* 18293 */
+ "PA_CL_VPORT_YOFFSET_3\0" /* 18314 */
+ "PA_CL_VPORT_ZSCALE_3\0" /* 18336 */
+ "PA_CL_VPORT_ZOFFSET_3\0" /* 18357 */
+ "PA_CL_VPORT_XSCALE_4\0" /* 18379 */
+ "PA_CL_VPORT_XOFFSET_4\0" /* 18400 */
+ "PA_CL_VPORT_YSCALE_4\0" /* 18422 */
+ "PA_CL_VPORT_YOFFSET_4\0" /* 18443 */
+ "PA_CL_VPORT_ZSCALE_4\0" /* 18465 */
+ "PA_CL_VPORT_ZOFFSET_4\0" /* 18486 */
+ "PA_CL_VPORT_XSCALE_5\0" /* 18508 */
+ "PA_CL_VPORT_XOFFSET_5\0" /* 18529 */
+ "PA_CL_VPORT_YSCALE_5\0" /* 18551 */
+ "PA_CL_VPORT_YOFFSET_5\0" /* 18572 */
+ "PA_CL_VPORT_ZSCALE_5\0" /* 18594 */
+ "PA_CL_VPORT_ZOFFSET_5\0" /* 18615 */
+ "PA_CL_VPORT_XSCALE_6\0" /* 18637 */
+ "PA_CL_VPORT_XOFFSET_6\0" /* 18658 */
+ "PA_CL_VPORT_YSCALE_6\0" /* 18680 */
+ "PA_CL_VPORT_YOFFSET_6\0" /* 18701 */
+ "PA_CL_VPORT_ZSCALE_6\0" /* 18723 */
+ "PA_CL_VPORT_ZOFFSET_6\0" /* 18744 */
+ "PA_CL_VPORT_XSCALE_7\0" /* 18766 */
+ "PA_CL_VPORT_XOFFSET_7\0" /* 18787 */
+ "PA_CL_VPORT_YSCALE_7\0" /* 18809 */
+ "PA_CL_VPORT_YOFFSET_7\0" /* 18830 */
+ "PA_CL_VPORT_ZSCALE_7\0" /* 18852 */
+ "PA_CL_VPORT_ZOFFSET_7\0" /* 18873 */
+ "PA_CL_VPORT_XSCALE_8\0" /* 18895 */
+ "PA_CL_VPORT_XOFFSET_8\0" /* 18916 */
+ "PA_CL_VPORT_YSCALE_8\0" /* 18938 */
+ "PA_CL_VPORT_YOFFSET_8\0" /* 18959 */
+ "PA_CL_VPORT_ZSCALE_8\0" /* 18981 */
+ "PA_CL_VPORT_ZOFFSET_8\0" /* 19002 */
+ "PA_CL_VPORT_XSCALE_9\0" /* 19024 */
+ "PA_CL_VPORT_XOFFSET_9\0" /* 19045 */
+ "PA_CL_VPORT_YSCALE_9\0" /* 19067 */
+ "PA_CL_VPORT_YOFFSET_9\0" /* 19088 */
+ "PA_CL_VPORT_ZSCALE_9\0" /* 19110 */
+ "PA_CL_VPORT_ZOFFSET_9\0" /* 19131 */
+ "PA_CL_VPORT_XSCALE_10\0" /* 19153 */
+ "PA_CL_VPORT_XOFFSET_10\0" /* 19175 */
+ "PA_CL_VPORT_YSCALE_10\0" /* 19198 */
+ "PA_CL_VPORT_YOFFSET_10\0" /* 19220 */
+ "PA_CL_VPORT_ZSCALE_10\0" /* 19243 */
+ "PA_CL_VPORT_ZOFFSET_10\0" /* 19265 */
+ "PA_CL_VPORT_XSCALE_11\0" /* 19288 */
+ "PA_CL_VPORT_XOFFSET_11\0" /* 19310 */
+ "PA_CL_VPORT_YSCALE_11\0" /* 19333 */
+ "PA_CL_VPORT_YOFFSET_11\0" /* 19355 */
+ "PA_CL_VPORT_ZSCALE_11\0" /* 19378 */
+ "PA_CL_VPORT_ZOFFSET_11\0" /* 19400 */
+ "PA_CL_VPORT_XSCALE_12\0" /* 19423 */
+ "PA_CL_VPORT_XOFFSET_12\0" /* 19445 */
+ "PA_CL_VPORT_YSCALE_12\0" /* 19468 */
+ "PA_CL_VPORT_YOFFSET_12\0" /* 19490 */
+ "PA_CL_VPORT_ZSCALE_12\0" /* 19513 */
+ "PA_CL_VPORT_ZOFFSET_12\0" /* 19535 */
+ "PA_CL_VPORT_XSCALE_13\0" /* 19558 */
+ "PA_CL_VPORT_XOFFSET_13\0" /* 19580 */
+ "PA_CL_VPORT_YSCALE_13\0" /* 19603 */
+ "PA_CL_VPORT_YOFFSET_13\0" /* 19625 */
+ "PA_CL_VPORT_ZSCALE_13\0" /* 19648 */
+ "PA_CL_VPORT_ZOFFSET_13\0" /* 19670 */
+ "PA_CL_VPORT_XSCALE_14\0" /* 19693 */
+ "PA_CL_VPORT_XOFFSET_14\0" /* 19715 */
+ "PA_CL_VPORT_YSCALE_14\0" /* 19738 */
+ "PA_CL_VPORT_YOFFSET_14\0" /* 19760 */
+ "PA_CL_VPORT_ZSCALE_14\0" /* 19783 */
+ "PA_CL_VPORT_ZOFFSET_14\0" /* 19805 */
+ "PA_CL_VPORT_XSCALE_15\0" /* 19828 */
+ "PA_CL_VPORT_XOFFSET_15\0" /* 19850 */
+ "PA_CL_VPORT_YSCALE_15\0" /* 19873 */
+ "PA_CL_VPORT_YOFFSET_15\0" /* 19895 */
+ "PA_CL_VPORT_ZSCALE_15\0" /* 19918 */
+ "PA_CL_VPORT_ZOFFSET_15\0" /* 19940 */
+ "PA_CL_UCP_0_X\0" /* 19963 */
+ "PA_CL_UCP_0_Y\0" /* 19977 */
+ "PA_CL_UCP_0_Z\0" /* 19991 */
+ "PA_CL_UCP_0_W\0" /* 20005 */
+ "PA_CL_UCP_1_X\0" /* 20019 */
+ "PA_CL_UCP_1_Y\0" /* 20033 */
+ "PA_CL_UCP_1_Z\0" /* 20047 */
+ "PA_CL_UCP_1_W\0" /* 20061 */
+ "PA_CL_UCP_2_X\0" /* 20075 */
+ "PA_CL_UCP_2_Y\0" /* 20089 */
+ "PA_CL_UCP_2_Z\0" /* 20103 */
+ "PA_CL_UCP_2_W\0" /* 20117 */
+ "PA_CL_UCP_3_X\0" /* 20131 */
+ "PA_CL_UCP_3_Y\0" /* 20145 */
+ "PA_CL_UCP_3_Z\0" /* 20159 */
+ "PA_CL_UCP_3_W\0" /* 20173 */
+ "PA_CL_UCP_4_X\0" /* 20187 */
+ "PA_CL_UCP_4_Y\0" /* 20201 */
+ "PA_CL_UCP_4_Z\0" /* 20215 */
+ "PA_CL_UCP_4_W\0" /* 20229 */
+ "PA_CL_UCP_5_X\0" /* 20243 */
+ "PA_CL_UCP_5_Y\0" /* 20257 */
+ "PA_CL_UCP_5_Z\0" /* 20271 */
+ "PA_CL_UCP_5_W\0" /* 20285 */
+ "SPI_PS_INPUT_CNTL_0\0" /* 20299 */
+ "SPI_PS_INPUT_CNTL_1\0" /* 20319 */
+ "SPI_PS_INPUT_CNTL_2\0" /* 20339 */
+ "SPI_PS_INPUT_CNTL_3\0" /* 20359 */
+ "SPI_PS_INPUT_CNTL_4\0" /* 20379 */
+ "SPI_PS_INPUT_CNTL_5\0" /* 20399 */
+ "SPI_PS_INPUT_CNTL_6\0" /* 20419 */
+ "SPI_PS_INPUT_CNTL_7\0" /* 20439 */
+ "SPI_PS_INPUT_CNTL_8\0" /* 20459 */
+ "SPI_PS_INPUT_CNTL_9\0" /* 20479 */
+ "SPI_PS_INPUT_CNTL_10\0" /* 20499 */
+ "SPI_PS_INPUT_CNTL_11\0" /* 20520 */
+ "SPI_PS_INPUT_CNTL_12\0" /* 20541 */
+ "SPI_PS_INPUT_CNTL_13\0" /* 20562 */
+ "SPI_PS_INPUT_CNTL_14\0" /* 20583 */
+ "SPI_PS_INPUT_CNTL_15\0" /* 20604 */
+ "SPI_PS_INPUT_CNTL_16\0" /* 20625 */
+ "SPI_PS_INPUT_CNTL_17\0" /* 20646 */
+ "SPI_PS_INPUT_CNTL_18\0" /* 20667 */
+ "SPI_PS_INPUT_CNTL_19\0" /* 20688 */
+ "SPI_PS_INPUT_CNTL_20\0" /* 20709 */
+ "SPI_PS_INPUT_CNTL_21\0" /* 20730 */
+ "SPI_PS_INPUT_CNTL_22\0" /* 20751 */
+ "SPI_PS_INPUT_CNTL_23\0" /* 20772 */
+ "SPI_PS_INPUT_CNTL_24\0" /* 20793 */
+ "SPI_PS_INPUT_CNTL_25\0" /* 20814 */
+ "SPI_PS_INPUT_CNTL_26\0" /* 20835 */
+ "SPI_PS_INPUT_CNTL_27\0" /* 20856 */
+ "SPI_PS_INPUT_CNTL_28\0" /* 20877 */
+ "SPI_PS_INPUT_CNTL_29\0" /* 20898 */
+ "SPI_PS_INPUT_CNTL_30\0" /* 20919 */
+ "SPI_PS_INPUT_CNTL_31\0" /* 20940 */
+ "SPI_VS_OUT_CONFIG\0" /* 20961 */
+ "SPI_PS_INPUT_ENA\0" /* 20979 */
+ "SPI_PS_INPUT_ADDR\0" /* 20996 */
+ "SPI_INTERP_CONTROL_0\0" /* 21014 */
+ "SPI_PS_IN_CONTROL\0" /* 21035 */
+ "SPI_BARYC_CNTL\0" /* 21053 */
+ "SPI_TMPRING_SIZE\0" /* 21068 */
+ "SPI_WAVE_MGMT_1\0" /* 21085 */
+ "SPI_WAVE_MGMT_2\0" /* 21101 */
+ "SPI_SHADER_POS_FORMAT\0" /* 21132, 21117 */
+ "SPI_SHADER_Z_FORMAT\0" /* 21139 */
+ "SPI_SHADER_COL_FORMAT\0" /* 21159 */
+ "SX_PS_DOWNCONVERT\0" /* 21181 */
+ "SX_BLEND_OPT_EPSILON\0" /* 21199 */
+ "SX_BLEND_OPT_CONTROL\0" /* 21220 */
+ "SX_MRT0_BLEND_OPT\0" /* 21241 */
+ "SX_MRT1_BLEND_OPT\0" /* 21259 */
+ "SX_MRT2_BLEND_OPT\0" /* 21277 */
+ "SX_MRT3_BLEND_OPT\0" /* 21295 */
+ "SX_MRT4_BLEND_OPT\0" /* 21313 */
+ "SX_MRT5_BLEND_OPT\0" /* 21331 */
+ "SX_MRT6_BLEND_OPT\0" /* 21349 */
+ "SX_MRT7_BLEND_OPT\0" /* 21367 */
+ "CB_BLEND0_CONTROL\0" /* 21385 */
+ "CB_BLEND1_CONTROL\0" /* 21403 */
+ "CB_BLEND2_CONTROL\0" /* 21421 */
+ "CB_BLEND3_CONTROL\0" /* 21439 */
+ "CB_BLEND4_CONTROL\0" /* 21457 */
+ "CB_BLEND5_CONTROL\0" /* 21475 */
+ "CB_BLEND6_CONTROL\0" /* 21493 */
+ "CB_BLEND7_CONTROL\0" /* 21511 */
+ "CS_COPY_STATE\0" /* 21529 */
+ "PA_CL_POINT_X_RAD\0" /* 21543 */
+ "PA_CL_POINT_Y_RAD\0" /* 21561 */
+ "PA_CL_POINT_SIZE\0" /* 21579 */
+ "PA_CL_POINT_CULL_RAD\0" /* 21596 */
+ "VGT_DMA_BASE_HI\0" /* 21617 */
+ "VGT_DMA_BASE\0" /* 21633 */
+ "VGT_DRAW_INITIATOR\0" /* 21646 */
+ "VGT_IMMED_DATA\0" /* 21665 */
+ "VGT_EVENT_ADDRESS_REG\0" /* 21680 */
+ "DB_DEPTH_CONTROL\0" /* 21702 */
+ "DB_EQAA\0" /* 21719 */
+ "CB_COLOR_CONTROL\0" /* 21727 */
+ "DB_SHADER_CONTROL\0" /* 21744 */
+ "PA_CL_CLIP_CNTL\0" /* 21762 */
+ "PA_SU_SC_MODE_CNTL\0" /* 21778 */
+ "PA_CL_VTE_CNTL\0" /* 21797 */
+ "PA_CL_VS_OUT_CNTL\0" /* 21812 */
+ "PA_CL_NANINF_CNTL\0" /* 21830 */
+ "PA_SU_LINE_STIPPLE_CNTL\0" /* 21848 */
+ "PA_SU_LINE_STIPPLE_SCALE\0" /* 21872 */
+ "PA_SU_PRIM_FILTER_CNTL\0" /* 21897 */
+ "PA_SU_SMALL_PRIM_FILTER_CNTL\0" /* 21920 */
+ "PA_SU_POINT_SIZE\0" /* 21949 */
+ "PA_SU_POINT_MINMAX\0" /* 21966 */
+ "PA_SU_LINE_CNTL\0" /* 21985 */
+ "PA_SC_LINE_STIPPLE\0" /* 22001 */
+ "VGT_OUTPUT_PATH_CNTL\0" /* 22020 */
+ "VGT_HOS_CNTL\0" /* 22041 */
+ "VGT_HOS_MAX_TESS_LEVEL\0" /* 22054 */
+ "VGT_HOS_MIN_TESS_LEVEL\0" /* 22077 */
+ "VGT_HOS_REUSE_DEPTH\0" /* 22108, 22114, 22100 */
+ "VGT_GROUP_PRIM_TYPE\0" /* 22120, 22130 */
+ "VGT_GROUP_FIRST_DECR\0" /* 22156, 22140, 22150 */
+ "VGT_GROUP_DECR\0" /* 22161 */
+ "VGT_GROUP_VECT_0_CNTL\0" /* 22176 */
+ "VGT_GROUP_VECT_1_CNTL\0" /* 22198 */
+ "VGT_GROUP_VECT_0_FMT_CNTL\0" /* 22220 */
+ "VGT_GROUP_VECT_1_FMT_CNTL\0" /* 22246 */
+ "VGT_GS_MODE\0" /* 22272, 22279 */
+ "VGT_GS_ONCHIP_CNTL\0" /* 22284 */
+ "PA_SC_MODE_CNTL_0\0" /* 22303 */
+ "PA_SC_MODE_CNTL_1\0" /* 22321 */
+ "VGT_ENHANCE\0" /* 22339 */
+ "VGT_GS_PER_ES\0" /* 22355, 22351 */
+ "VGT_ES_PER_GS\0" /* 22369, 22365 */
+ "VGT_GS_PER_VS\0" /* 22379, 22383 */
+ "VGT_GSVS_RING_OFFSET_1\0" /* 22393 */
+ "VGT_GSVS_RING_OFFSET_2\0" /* 22416 */
+ "VGT_GSVS_RING_OFFSET_3\0" /* 22439 */
+ "VGT_GS_OUT_PRIM_TYPE\0" /* 22462 */
+ "IA_ENHANCE\0" /* 22483 */
+ "VGT_DMA_SIZE\0" /* 22494 */
+ "VGT_DMA_MAX_SIZE\0" /* 22507, 22515 */
+ "VGT_DMA_INDEX_TYPE\0" /* 22524 */
+ "WD_ENHANCE\0" /* 22543 */
+ "VGT_PRIMITIVEID_EN\0" /* 22554, 22558 */
+ "VGT_DMA_NUM_INSTANCES\0" /* 22573 */
+ "VGT_PRIMITIVEID_RESET\0" /* 22595 */
+ "VGT_EVENT_INITIATOR\0" /* 22617 */
+ "VGT_MULTI_PRIM_IB_RESET_EN\0" /* 22637, 22655 */
+ "VGT_INSTANCE_STEP_RATE_0\0" /* 22664 */
+ "VGT_INSTANCE_STEP_RATE_1\0" /* 22689 */
+ "IA_MULTI_VGT_PARAM\0" /* 22714 */
+ "VGT_ESGS_RING_ITEMSIZE\0" /* 22747, 22733 */
+ "VGT_GSVS_RING_ITEMSIZE\0" /* 22756 */
+ "VGT_REUSE_OFF\0" /* 22779, 22783 */
+ "VGT_VTX_CNT_EN\0" /* 22793, 22797 */
+ "DB_HTILE_SURFACE\0" /* 22808, 22820 */
+ "DB_SRESULTS_COMPARE_STATE0\0" /* 22825 */
+ "DB_SRESULTS_COMPARE_STATE1\0" /* 22852 */
+ "DB_PRELOAD_CONTROL\0" /* 22879 */
+ "VGT_STRMOUT_BUFFER_SIZE_0\0" /* 22898 */
+ "VGT_STRMOUT_VTX_STRIDE_0\0" /* 22924 */
+ "VGT_STRMOUT_BUFFER_OFFSET_0\0" /* 22949 */
+ "VGT_STRMOUT_BUFFER_SIZE_1\0" /* 22977 */
+ "VGT_STRMOUT_VTX_STRIDE_1\0" /* 23003 */
+ "VGT_STRMOUT_BUFFER_OFFSET_1\0" /* 23028 */
+ "VGT_STRMOUT_BUFFER_SIZE_2\0" /* 23056 */
+ "VGT_STRMOUT_VTX_STRIDE_2\0" /* 23082 */
+ "VGT_STRMOUT_BUFFER_OFFSET_2\0" /* 23107 */
+ "VGT_STRMOUT_BUFFER_SIZE_3\0" /* 23135 */
+ "VGT_STRMOUT_VTX_STRIDE_3\0" /* 23161 */
+ "VGT_STRMOUT_BUFFER_OFFSET_3\0" /* 23186 */
+ "VGT_STRMOUT_DRAW_OPAQUE_OFFSET\0" /* 23214 */
+ "VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE\0" /* 23245 */
+ "VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE\0" /* 23288, 23312, 23319 */
+ "VGT_GS_MAX_VERT_OUT\0" /* 23333, 23326 */
+ "VGT_TESS_DISTRIBUTION\0" /* 23346 */
+ "VGT_SHADER_STAGES_EN\0" /* 23368, 23383 */
+ "VGT_LS_HS_CONFIG\0" /* 23389 */
+ "VGT_GS_VERT_ITEMSIZE\0" /* 23406 */
+ "VGT_GS_VERT_ITEMSIZE_1\0" /* 23427 */
+ "VGT_GS_VERT_ITEMSIZE_2\0" /* 23450 */
+ "VGT_GS_VERT_ITEMSIZE_3\0" /* 23473 */
+ "VGT_TF_PARAM\0" /* 23496 */
+ "DB_ALPHA_TO_MASK\0" /* 23509 */
+ "VGT_DISPATCH_DRAW_INDEX\0" /* 23526 */
+ "PA_SU_POLY_OFFSET_DB_FMT_CNTL\0" /* 23550 */
+ "PA_SU_POLY_OFFSET_CLAMP\0" /* 23580 */
+ "PA_SU_POLY_OFFSET_FRONT_SCALE\0" /* 23604 */
+ "PA_SU_POLY_OFFSET_FRONT_OFFSET\0" /* 23634 */
+ "PA_SU_POLY_OFFSET_BACK_SCALE\0" /* 23665 */
+ "PA_SU_POLY_OFFSET_BACK_OFFSET\0" /* 23694 */
+ "VGT_GS_INSTANCE_CNT\0" /* 23740, 23724 */
+ "VGT_STRMOUT_CONFIG\0" /* 23744 */
+ "VGT_STRMOUT_BUFFER_CONFIG\0" /* 23763 */
+ "PA_SC_CENTROID_PRIORITY_0\0" /* 23789 */
+ "PA_SC_CENTROID_PRIORITY_1\0" /* 23815 */
+ "PA_SC_LINE_CNTL\0" /* 23841 */
+ "PA_SC_AA_CONFIG\0" /* 23857 */
+ "PA_SU_VTX_CNTL\0" /* 23873 */
+ "PA_CL_GB_VERT_CLIP_ADJ\0" /* 23888 */
+ "PA_CL_GB_VERT_DISC_ADJ\0" /* 23911 */
+ "PA_CL_GB_HORZ_CLIP_ADJ\0" /* 23934 */
+ "PA_CL_GB_HORZ_DISC_ADJ\0" /* 23957 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0\0" /* 23980 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1\0" /* 24014 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2\0" /* 24048 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3\0" /* 24082 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0\0" /* 24116 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1\0" /* 24150 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2\0" /* 24184 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3\0" /* 24218 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0\0" /* 24252 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1\0" /* 24286 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2\0" /* 24320 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3\0" /* 24354 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0\0" /* 24388 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1\0" /* 24422 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2\0" /* 24456 */
+ "PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3\0" /* 24490 */
+ "PA_SC_AA_MASK_X0Y0_X1Y0\0" /* 24524 */
+ "PA_SC_AA_MASK_X0Y1_X1Y1\0" /* 24548 */
+ "PA_SC_SHADER_CONTROL\0" /* 24572 */
+ "VGT_VERTEX_REUSE_BLOCK_CNTL\0" /* 24593 */
+ "VGT_OUT_DEALLOC_CNTL\0" /* 24621 */
+ "CB_COLOR0_BASE\0" /* 24642 */
+ "CB_COLOR0_PITCH\0" /* 24657 */
+ "CB_COLOR0_SLICE\0" /* 24673 */
+ "CB_COLOR0_VIEW\0" /* 24689 */
+ "CB_COLOR0_INFO\0" /* 24704 */
+ "CB_COLOR0_ATTRIB\0" /* 24719 */
+ "CB_COLOR0_DCC_CONTROL\0" /* 24736 */
+ "CB_COLOR0_CMASK\0" /* 24758 */
+ "CB_COLOR0_CMASK_SLICE\0" /* 24774 */
+ "CB_COLOR0_FMASK\0" /* 24796 */
+ "CB_COLOR0_FMASK_SLICE\0" /* 24812 */
+ "CB_COLOR0_CLEAR_WORD0\0" /* 24834 */
+ "CB_COLOR0_CLEAR_WORD1\0" /* 24856 */
+ "CB_COLOR0_DCC_BASE\0" /* 24878 */
+ "CB_COLOR1_BASE\0" /* 24897 */
+ "CB_COLOR1_PITCH\0" /* 24912 */
+ "CB_COLOR1_SLICE\0" /* 24928 */
+ "CB_COLOR1_VIEW\0" /* 24944 */
+ "CB_COLOR1_INFO\0" /* 24959 */
+ "CB_COLOR1_ATTRIB\0" /* 24974 */
+ "CB_COLOR1_DCC_CONTROL\0" /* 24991 */
+ "CB_COLOR1_CMASK\0" /* 25013 */
+ "CB_COLOR1_CMASK_SLICE\0" /* 25029 */
+ "CB_COLOR1_FMASK\0" /* 25051 */
+ "CB_COLOR1_FMASK_SLICE\0" /* 25067 */
+ "CB_COLOR1_CLEAR_WORD0\0" /* 25089 */
+ "CB_COLOR1_CLEAR_WORD1\0" /* 25111 */
+ "CB_COLOR1_DCC_BASE\0" /* 25133 */
+ "CB_COLOR2_BASE\0" /* 25152 */
+ "CB_COLOR2_PITCH\0" /* 25167 */
+ "CB_COLOR2_SLICE\0" /* 25183 */
+ "CB_COLOR2_VIEW\0" /* 25199 */
+ "CB_COLOR2_INFO\0" /* 25214 */
+ "CB_COLOR2_ATTRIB\0" /* 25229 */
+ "CB_COLOR2_DCC_CONTROL\0" /* 25246 */
+ "CB_COLOR2_CMASK\0" /* 25268 */
+ "CB_COLOR2_CMASK_SLICE\0" /* 25284 */
+ "CB_COLOR2_FMASK\0" /* 25306 */
+ "CB_COLOR2_FMASK_SLICE\0" /* 25322 */
+ "CB_COLOR2_CLEAR_WORD0\0" /* 25344 */
+ "CB_COLOR2_CLEAR_WORD1\0" /* 25366 */
+ "CB_COLOR2_DCC_BASE\0" /* 25388 */
+ "CB_COLOR3_BASE\0" /* 25407 */
+ "CB_COLOR3_PITCH\0" /* 25422 */
+ "CB_COLOR3_SLICE\0" /* 25438 */
+ "CB_COLOR3_VIEW\0" /* 25454 */
+ "CB_COLOR3_INFO\0" /* 25469 */
+ "CB_COLOR3_ATTRIB\0" /* 25484 */
+ "CB_COLOR3_DCC_CONTROL\0" /* 25501 */
+ "CB_COLOR3_CMASK\0" /* 25523 */
+ "CB_COLOR3_CMASK_SLICE\0" /* 25539 */
+ "CB_COLOR3_FMASK\0" /* 25561 */
+ "CB_COLOR3_FMASK_SLICE\0" /* 25577 */
+ "CB_COLOR3_CLEAR_WORD0\0" /* 25599 */
+ "CB_COLOR3_CLEAR_WORD1\0" /* 25621 */
+ "CB_COLOR3_DCC_BASE\0" /* 25643 */
+ "CB_COLOR4_BASE\0" /* 25662 */
+ "CB_COLOR4_PITCH\0" /* 25677 */
+ "CB_COLOR4_SLICE\0" /* 25693 */
+ "CB_COLOR4_VIEW\0" /* 25709 */
+ "CB_COLOR4_INFO\0" /* 25724 */
+ "CB_COLOR4_ATTRIB\0" /* 25739 */
+ "CB_COLOR4_DCC_CONTROL\0" /* 25756 */
+ "CB_COLOR4_CMASK\0" /* 25778 */
+ "CB_COLOR4_CMASK_SLICE\0" /* 25794 */
+ "CB_COLOR4_FMASK\0" /* 25816 */
+ "CB_COLOR4_FMASK_SLICE\0" /* 25832 */
+ "CB_COLOR4_CLEAR_WORD0\0" /* 25854 */
+ "CB_COLOR4_CLEAR_WORD1\0" /* 25876 */
+ "CB_COLOR4_DCC_BASE\0" /* 25898 */
+ "CB_COLOR5_BASE\0" /* 25917 */
+ "CB_COLOR5_PITCH\0" /* 25932 */
+ "CB_COLOR5_SLICE\0" /* 25948 */
+ "CB_COLOR5_VIEW\0" /* 25964 */
+ "CB_COLOR5_INFO\0" /* 25979 */
+ "CB_COLOR5_ATTRIB\0" /* 25994 */
+ "CB_COLOR5_DCC_CONTROL\0" /* 26011 */
+ "CB_COLOR5_CMASK\0" /* 26033 */
+ "CB_COLOR5_CMASK_SLICE\0" /* 26049 */
+ "CB_COLOR5_FMASK\0" /* 26071 */
+ "CB_COLOR5_FMASK_SLICE\0" /* 26087 */
+ "CB_COLOR5_CLEAR_WORD0\0" /* 26109 */
+ "CB_COLOR5_CLEAR_WORD1\0" /* 26131 */
+ "CB_COLOR5_DCC_BASE\0" /* 26153 */
+ "CB_COLOR6_BASE\0" /* 26172 */
+ "CB_COLOR6_PITCH\0" /* 26187 */
+ "CB_COLOR6_SLICE\0" /* 26203 */
+ "CB_COLOR6_VIEW\0" /* 26219 */
+ "CB_COLOR6_INFO\0" /* 26234 */
+ "CB_COLOR6_ATTRIB\0" /* 26249 */
+ "CB_COLOR6_DCC_CONTROL\0" /* 26266 */
+ "CB_COLOR6_CMASK\0" /* 26288 */
+ "CB_COLOR6_CMASK_SLICE\0" /* 26304 */
+ "CB_COLOR6_FMASK\0" /* 26326 */
+ "CB_COLOR6_FMASK_SLICE\0" /* 26342 */
+ "CB_COLOR6_CLEAR_WORD0\0" /* 26364 */
+ "CB_COLOR6_CLEAR_WORD1\0" /* 26386 */
+ "CB_COLOR6_DCC_BASE\0" /* 26408 */
+ "CB_COLOR7_BASE\0" /* 26427 */
+ "CB_COLOR7_PITCH\0" /* 26442 */
+ "CB_COLOR7_SLICE\0" /* 26458 */
+ "CB_COLOR7_VIEW\0" /* 26474 */
+ "CB_COLOR7_INFO\0" /* 26489 */
+ "CB_COLOR7_ATTRIB\0" /* 26504 */
+ "CB_COLOR7_DCC_CONTROL\0" /* 26521 */
+ "CB_COLOR7_CMASK\0" /* 26543 */
+ "CB_COLOR7_CMASK_SLICE\0" /* 26559 */
+ "CB_COLOR7_FMASK\0" /* 26581 */
+ "CB_COLOR7_FMASK_SLICE\0" /* 26597 */
+ "CB_COLOR7_CLEAR_WORD0\0" /* 26619 */
+ "CB_COLOR7_CLEAR_WORD1\0" /* 26641 */
+ "CB_COLOR7_DCC_BASE\0" /* 26663 */
+ "VGT_TF_MEMORY_BASE_HI\0" /* 26682 */
+ "WD_POS_BUF_BASE\0" /* 26704 */
+ "WD_POS_BUF_BASE_HI\0" /* 26720 */
+ "WD_CNTL_SB_BUF_BASE\0" /* 26739 */
+ "WD_CNTL_SB_BUF_BASE_HI\0" /* 26759 */
+ "WD_INDEX_BUF_BASE\0" /* 26782 */
+ "WD_INDEX_BUF_BASE_HI\0" /* 26800 */
+ "VGT_OBJECT_ID\0" /* 26821 */
+ "VGT_INSTANCE_BASE_ID\0" /* 26835 */
+ "SQC_WRITEBACK\0" /* 26856, 26860 */
+ "TA_GRAD_ADJ_UCONFIG\0" /* 26870 */
+ "SPI_CONFIG_CNTL_2\0" /* 26890 */
+ "SPI_SHADER_USER_DATA_PS_16\0" /* 26908 */
+ "SPI_SHADER_USER_DATA_PS_17\0" /* 26935 */
+ "SPI_SHADER_USER_DATA_PS_18\0" /* 26962 */
+ "SPI_SHADER_USER_DATA_PS_19\0" /* 26989 */
+ "SPI_SHADER_USER_DATA_PS_20\0" /* 27016 */
+ "SPI_SHADER_USER_DATA_PS_21\0" /* 27043 */
+ "SPI_SHADER_USER_DATA_PS_22\0" /* 27070 */
+ "SPI_SHADER_USER_DATA_PS_23\0" /* 27097 */
+ "SPI_SHADER_USER_DATA_PS_24\0" /* 27124 */
+ "SPI_SHADER_USER_DATA_PS_25\0" /* 27151 */
+ "SPI_SHADER_USER_DATA_PS_26\0" /* 27178 */
+ "SPI_SHADER_USER_DATA_PS_27\0" /* 27205 */
+ "SPI_SHADER_USER_DATA_PS_28\0" /* 27232 */
+ "SPI_SHADER_USER_DATA_PS_29\0" /* 27259 */
+ "SPI_SHADER_USER_DATA_PS_30\0" /* 27286 */
+ "SPI_SHADER_USER_DATA_PS_31\0" /* 27313 */
+ "SPI_SHADER_USER_DATA_VS_16\0" /* 27340 */
+ "SPI_SHADER_USER_DATA_VS_17\0" /* 27367 */
+ "SPI_SHADER_USER_DATA_VS_18\0" /* 27394 */
+ "SPI_SHADER_USER_DATA_VS_19\0" /* 27421 */
+ "SPI_SHADER_USER_DATA_VS_20\0" /* 27448 */
+ "SPI_SHADER_USER_DATA_VS_21\0" /* 27475 */
+ "SPI_SHADER_USER_DATA_VS_22\0" /* 27502 */
+ "SPI_SHADER_USER_DATA_VS_23\0" /* 27529 */
+ "SPI_SHADER_USER_DATA_VS_24\0" /* 27556 */
+ "SPI_SHADER_USER_DATA_VS_25\0" /* 27583 */
+ "SPI_SHADER_USER_DATA_VS_26\0" /* 27610 */
+ "SPI_SHADER_USER_DATA_VS_27\0" /* 27637 */
+ "SPI_SHADER_USER_DATA_VS_28\0" /* 27664 */
+ "SPI_SHADER_USER_DATA_VS_29\0" /* 27691 */
+ "SPI_SHADER_USER_DATA_VS_30\0" /* 27718 */
+ "SPI_SHADER_USER_DATA_VS_31\0" /* 27745 */
+ "SPI_SHADER_PGM_RSRC2_GS_VS\0" /* 27772 */
+ "SPI_SHADER_PGM_RSRC4_GS\0" /* 27799 */
+ "SPI_SHADER_USER_DATA_ADDR_LO_GS\0" /* 27823 */
+ "SPI_SHADER_USER_DATA_ADDR_HI_GS\0" /* 27855 */
+ "SPI_SHADER_USER_DATA_ES_16\0" /* 27887 */
+ "SPI_SHADER_USER_DATA_ES_17\0" /* 27914 */
+ "SPI_SHADER_USER_DATA_ES_18\0" /* 27941 */
+ "SPI_SHADER_USER_DATA_ES_19\0" /* 27968 */
+ "SPI_SHADER_USER_DATA_ES_20\0" /* 27995 */
+ "SPI_SHADER_USER_DATA_ES_21\0" /* 28022 */
+ "SPI_SHADER_USER_DATA_ES_22\0" /* 28049 */
+ "SPI_SHADER_USER_DATA_ES_23\0" /* 28076 */
+ "SPI_SHADER_USER_DATA_ES_24\0" /* 28103 */
+ "SPI_SHADER_USER_DATA_ES_25\0" /* 28130 */
+ "SPI_SHADER_USER_DATA_ES_26\0" /* 28157 */
+ "SPI_SHADER_USER_DATA_ES_27\0" /* 28184 */
+ "SPI_SHADER_USER_DATA_ES_28\0" /* 28211 */
+ "SPI_SHADER_USER_DATA_ES_29\0" /* 28238 */
+ "SPI_SHADER_USER_DATA_ES_30\0" /* 28265 */
+ "SPI_SHADER_USER_DATA_ES_31\0" /* 28292 */
+ "SPI_SHADER_PGM_RSRC4_HS\0" /* 28319 */
+ "SPI_SHADER_USER_DATA_ADDR_LO_HS\0" /* 28343 */
+ "SPI_SHADER_USER_DATA_ADDR_HI_HS\0" /* 28375 */
+ "SPI_SHADER_USER_DATA_LS_16\0" /* 28407 */
+ "SPI_SHADER_USER_DATA_LS_17\0" /* 28434 */
+ "SPI_SHADER_USER_DATA_LS_18\0" /* 28461 */
+ "SPI_SHADER_USER_DATA_LS_19\0" /* 28488 */
+ "SPI_SHADER_USER_DATA_LS_20\0" /* 28515 */
+ "SPI_SHADER_USER_DATA_LS_21\0" /* 28542 */
+ "SPI_SHADER_USER_DATA_LS_22\0" /* 28569 */
+ "SPI_SHADER_USER_DATA_LS_23\0" /* 28596 */
+ "SPI_SHADER_USER_DATA_LS_24\0" /* 28623 */
+ "SPI_SHADER_USER_DATA_LS_25\0" /* 28650 */
+ "SPI_SHADER_USER_DATA_LS_26\0" /* 28677 */
+ "SPI_SHADER_USER_DATA_LS_27\0" /* 28704 */
+ "SPI_SHADER_USER_DATA_LS_28\0" /* 28731 */
+ "SPI_SHADER_USER_DATA_LS_29\0" /* 28758 */
+ "SPI_SHADER_USER_DATA_LS_30\0" /* 28785 */
+ "SPI_SHADER_USER_DATA_LS_31\0" /* 28812 */
+ "SPI_SHADER_USER_DATA_COMMON_0\0" /* 28839 */
+ "SPI_SHADER_USER_DATA_COMMON_1\0" /* 28869 */
+ "SPI_SHADER_USER_DATA_COMMON_2\0" /* 28899 */
+ "SPI_SHADER_USER_DATA_COMMON_3\0" /* 28929 */
+ "SPI_SHADER_USER_DATA_COMMON_4\0" /* 28959 */
+ "SPI_SHADER_USER_DATA_COMMON_5\0" /* 28989 */
+ "SPI_SHADER_USER_DATA_COMMON_6\0" /* 29019 */
+ "SPI_SHADER_USER_DATA_COMMON_7\0" /* 29049 */
+ "SPI_SHADER_USER_DATA_COMMON_8\0" /* 29079 */
+ "SPI_SHADER_USER_DATA_COMMON_9\0" /* 29109 */
+ "SPI_SHADER_USER_DATA_COMMON_10\0" /* 29139 */
+ "SPI_SHADER_USER_DATA_COMMON_11\0" /* 29170 */
+ "SPI_SHADER_USER_DATA_COMMON_12\0" /* 29201 */
+ "SPI_SHADER_USER_DATA_COMMON_13\0" /* 29232 */
+ "SPI_SHADER_USER_DATA_COMMON_14\0" /* 29263 */
+ "SPI_SHADER_USER_DATA_COMMON_15\0" /* 29294 */
+ "SPI_SHADER_USER_DATA_COMMON_16\0" /* 29325 */
+ "SPI_SHADER_USER_DATA_COMMON_17\0" /* 29356 */
+ "SPI_SHADER_USER_DATA_COMMON_18\0" /* 29387 */
+ "SPI_SHADER_USER_DATA_COMMON_19\0" /* 29418 */
+ "SPI_SHADER_USER_DATA_COMMON_20\0" /* 29449 */
+ "SPI_SHADER_USER_DATA_COMMON_21\0" /* 29480 */
+ "SPI_SHADER_USER_DATA_COMMON_22\0" /* 29511 */
+ "SPI_SHADER_USER_DATA_COMMON_23\0" /* 29542 */
+ "SPI_SHADER_USER_DATA_COMMON_24\0" /* 29573 */
+ "SPI_SHADER_USER_DATA_COMMON_25\0" /* 29604 */
+ "SPI_SHADER_USER_DATA_COMMON_26\0" /* 29635 */
+ "SPI_SHADER_USER_DATA_COMMON_27\0" /* 29666 */
+ "SPI_SHADER_USER_DATA_COMMON_28\0" /* 29697 */
+ "SPI_SHADER_USER_DATA_COMMON_29\0" /* 29728 */
+ "SPI_SHADER_USER_DATA_COMMON_30\0" /* 29759 */
+ "SPI_SHADER_USER_DATA_COMMON_31\0" /* 29790 */
+ "COMPUTE_DISPATCH_PKT_ADDR_LO\0" /* 29821 */
+ "COMPUTE_DISPATCH_PKT_ADDR_HI\0" /* 29850 */
+ "COMPUTE_DISPATCH_SCRATCH_BASE_LO\0" /* 29879 */
+ "COMPUTE_DISPATCH_SCRATCH_BASE_HI\0" /* 29912 */
+ "CPF_LATENCY_STATS_DATA\0" /* 29945 */
+ "CPG_LATENCY_STATS_DATA\0" /* 29968 */
+ "CPC_LATENCY_STATS_DATA\0" /* 29991 */
+ "CPF_TC_PERF_COUNTER_WINDOW_SELECT\0" /* 30014 */
+ "CPG_TC_PERF_COUNTER_WINDOW_SELECT\0" /* 30048 */
+ "CPF_LATENCY_STATS_SELECT\0" /* 30082 */
+ "CPG_LATENCY_STATS_SELECT\0" /* 30107 */
+ "CPC_LATENCY_STATS_SELECT\0" /* 30132 */
+ "DB_HTILE_DATA_BASE_HI\0" /* 30157 */
+ "DB_Z_READ_BASE_HI\0" /* 30179 */
+ "DB_STENCIL_READ_BASE_HI\0" /* 30197 */
+ "DB_Z_WRITE_BASE_HI\0" /* 30221 */
+ "DB_STENCIL_WRITE_BASE_HI\0" /* 30240 */
+ "DB_DFSM_CONTROL\0" /* 30265 */
+ "DB_RENDER_FILTER\0" /* 30281 */
+ "DB_Z_INFO2\0" /* 30298 */
+ "DB_STENCIL_INFO2\0" /* 30309 */
+ "PA_SC_TILE_STEERING_OVERRIDE\0" /* 30326 */
+ "CP_PERFMON_CNTX_CNTL\0" /* 30355 */
+ "PA_SC_RIGHT_VERT_GRID\0" /* 30376 */
+ "PA_SC_LEFT_VERT_GRID\0" /* 30398 */
+ "PA_SC_HORIZ_GRID\0" /* 30419 */
+ "PA_SC_FOV_WINDOW_LR\0" /* 30436 */
+ "PA_SC_FOV_WINDOW_TB\0" /* 30456 */
+ "CB_MRT0_EPITCH\0" /* 30484, 30476 */
+ "CB_MRT1_EPITCH\0" /* 30491 */
+ "CB_MRT2_EPITCH\0" /* 30506 */
+ "CB_MRT3_EPITCH\0" /* 30521 */
+ "CB_MRT4_EPITCH\0" /* 30536 */
+ "CB_MRT5_EPITCH\0" /* 30551 */
+ "CB_MRT6_EPITCH\0" /* 30566 */
+ "CB_MRT7_EPITCH\0" /* 30581 */
+ "PA_CL_OBJPRIM_ID_CNTL\0" /* 30596 */
+ "PA_CL_NGG_CNTL\0" /* 30618 */
+ "PA_SU_OVER_RASTERIZATION_CNTL\0" /* 30633 */
+ "VGT_GS_MAX_PRIMS_PER_SUBGROUP\0" /* 30670, 30663 */
+ "VGT_DRAW_PAYLOAD_CNTL\0" /* 30693 */
+ "VGT_INDEX_PAYLOAD_CNTL\0" /* 30715 */
+ "VGT_DMA_EVENT_INITIATOR\0" /* 30738 */
+ "PA_SC_BINNER_CNTL_0\0" /* 30762 */
+ "PA_SC_BINNER_CNTL_1\0" /* 30782 */
+ "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL\0" /* 30802 */
+ "PA_SC_NGG_MODE_CNTL\0" /* 30840 */
+ "CB_COLOR0_BASE_EXT\0" /* 30860 */
+ "CB_COLOR0_ATTRIB2\0" /* 30879 */
+ "CB_COLOR0_CMASK_BASE_EXT\0" /* 30897 */
+ "CB_COLOR0_FMASK_BASE_EXT\0" /* 30922 */
+ "CB_COLOR0_DCC_BASE_EXT\0" /* 30947 */
+ "CB_COLOR1_BASE_EXT\0" /* 30970 */
+ "CB_COLOR1_ATTRIB2\0" /* 30989 */
+ "CB_COLOR1_CMASK_BASE_EXT\0" /* 31007 */
+ "CB_COLOR1_FMASK_BASE_EXT\0" /* 31032 */
+ "CB_COLOR1_DCC_BASE_EXT\0" /* 31057 */
+ "CB_COLOR2_BASE_EXT\0" /* 31080 */
+ "CB_COLOR2_ATTRIB2\0" /* 31099 */
+ "CB_COLOR2_CMASK_BASE_EXT\0" /* 31117 */
+ "CB_COLOR2_FMASK_BASE_EXT\0" /* 31142 */
+ "CB_COLOR2_DCC_BASE_EXT\0" /* 31167 */
+ "CB_COLOR3_BASE_EXT\0" /* 31190 */
+ "CB_COLOR3_ATTRIB2\0" /* 31209 */
+ "CB_COLOR3_CMASK_BASE_EXT\0" /* 31227 */
+ "CB_COLOR3_FMASK_BASE_EXT\0" /* 31252 */
+ "CB_COLOR3_DCC_BASE_EXT\0" /* 31277 */
+ "CB_COLOR4_BASE_EXT\0" /* 31300 */
+ "CB_COLOR4_ATTRIB2\0" /* 31319 */
+ "CB_COLOR4_CMASK_BASE_EXT\0" /* 31337 */
+ "CB_COLOR4_FMASK_BASE_EXT\0" /* 31362 */
+ "CB_COLOR4_DCC_BASE_EXT\0" /* 31387 */
+ "CB_COLOR5_BASE_EXT\0" /* 31410 */
+ "CB_COLOR5_ATTRIB2\0" /* 31429 */
+ "CB_COLOR5_CMASK_BASE_EXT\0" /* 31447 */
+ "CB_COLOR5_FMASK_BASE_EXT\0" /* 31472 */
+ "CB_COLOR5_DCC_BASE_EXT\0" /* 31497 */
+ "CB_COLOR6_BASE_EXT\0" /* 31520 */
+ "CB_COLOR6_ATTRIB2\0" /* 31539 */
+ "CB_COLOR6_CMASK_BASE_EXT\0" /* 31557 */
+ "CB_COLOR6_FMASK_BASE_EXT\0" /* 31582 */
+ "CB_COLOR6_DCC_BASE_EXT\0" /* 31607 */
+ "CB_COLOR7_BASE_EXT\0" /* 31630 */
+ "CB_COLOR7_ATTRIB2\0" /* 31649 */
+ "CB_COLOR7_CMASK_BASE_EXT\0" /* 31667 */
+ "CB_COLOR7_FMASK_BASE_EXT\0" /* 31692 */
+ "CB_COLOR7_DCC_BASE_EXT\0" /* 31717 */
+ "COUNT_INDIRECT_ENABLE\0" /* 31740 */
+ "DRAW_INDEX_ENABLE\0" /* 31762 */
+ "PFP\0" /* 31780 */
+ "ENGINE_SEL\0" /* 31784 */
+ "WR_CONFIRM\0" /* 31795 */
+ "WR_ONE_ADDR\0" /* 31806 */
+ "MEM_MAPPED_REGISTER\0" /* 31818, 31829 */
+ "MEMORY_SYNC\0" /* 31838 */
+ "TC_L2\0" /* 31850 */
+ "GDS\0" /* 31856 */
+ "MEM_ASYNC\0" /* 31860 */
+ "DST_SEL\0" /* 31870 */
+ "IB_SIZE\0" /* 31878 */
+ "CHAIN\0" /* 31886 */
+ "VALID\0" /* 31892 */
+ "CP_SYNC\0" /* 31898 */
+ "SRC_ADDR\0" /* 31906 */
+ "SRC_ADDR_TC_L2\0" /* 31915 */
+ "SRC_SEL\0" /* 31930 */
+ "ENGINE\0" /* 31938 */
+ "DST_ADDR\0" /* 31945 */
+ "DST_ADDR_TC_L2\0" /* 31954 */
+ "DSL_SEL\0" /* 31969 */
+ "BYTE_COUNT_GFX6\0" /* 31977 */
+ "BYTE_COUNT_GFX9\0" /* 31993 */
+ "DISABLE_WR_CONFIRM_GFX6\0" /* 32009 */
+ "NONE\0" /* 32033 */
+ "8_IN_16\0" /* 32038 */
+ "8_IN_32\0" /* 32046 */
+ "8_IN_64\0" /* 32054 */
+ "SRC_SWAP\0" /* 32062 */
+ "DST_SWAP\0" /* 32071 */
+ "MEMORY\0" /* 32080 */
+ "SAS\0" /* 32087 */
+ "DAS\0" /* 32091 */
+ "INCREMENT\0" /* 32095 */
+ "NO_INCREMENT\0" /* 32105 */
+ "SAIC\0" /* 32118 */
+ "DAIC\0" /* 32123 */
+ "RAW_WAIT\0" /* 32128 */
+ "DISABLE_WR_CONFIRM_GFX9\0" /* 32137 */
+ "SDMA_RQ_PENDING\0" /* 32161 */
+ "TST_RQ_PENDING\0" /* 32177 */
+ "SDMA1_RQ_PENDING\0" /* 32192 */
+ "VCE0_RQ_PENDING\0" /* 32209 */
+ "VP8_BUSY\0" /* 32225 */
+ "SDMA_BUSY\0" /* 32234, 32235 */
+ "SDMA1_BUSY\0" /* 32244 */
+ "VCE0_BUSY\0" /* 32255 */
+ "XDMA_BUSY\0" /* 32265 */
+ "CHUB_BUSY\0" /* 32275 */
+ "SDMA2_BUSY\0" /* 32285 */
+ "SDMA3_BUSY\0" /* 32296 */
+ "SAMSCP_BUSY\0" /* 32307, 32311 */
+ "ISP_BUSY\0" /* 32319 */
+ "VCE1_BUSY\0" /* 32328 */
+ "ODE_BUSY\0" /* 32338 */
+ "SDMA2_RQ_PENDING\0" /* 32347 */
+ "SDMA3_RQ_PENDING\0" /* 32364 */
+ "SAMSCP_RQ_PENDING\0" /* 32381 */
+ "ISP_RQ_PENDING\0" /* 32399 */
+ "VCE1_RQ_PENDING\0" /* 32414 */
+ "UVD_RQ_PENDING\0" /* 32430 */
+ "SAMMSP_RQ_PENDING\0" /* 32445 */
+ "ACP_RQ_PENDING\0" /* 32463 */
+ "SMU_RQ_PENDING\0" /* 32478 */
+ "GRBM_RQ_PENDING\0" /* 32493 */
+ "HI_RQ_PENDING\0" /* 32509 */
+ "VMC_BUSY\0" /* 32523 */
+ "MCB_BUSY\0" /* 32532, 32533 */
+ "MCB_NON_DISPLAY_BUSY\0" /* 32541 */
+ "MCC_BUSY\0" /* 32562 */
+ "MCD_BUSY\0" /* 32571 */
+ "VMC1_BUSY\0" /* 32580 */
+ "SEM_BUSY\0" /* 32590 */
+ "ACP_BUSY\0" /* 32599 */
+ "IH_BUSY\0" /* 32608 */
+ "UVD_BUSY\0" /* 32616 */
+ "SAMMSP_BUSY\0" /* 32625 */
+ "GCATCL2_BUSY\0" /* 32637 */
+ "OSATCL2_BUSY\0" /* 32650 */
+ "BIF_BUSY\0" /* 32663 */
+ "MCC0_BUSY\0" /* 32672 */
+ "MCC1_BUSY\0" /* 32682 */
+ "MCC2_BUSY\0" /* 32692 */
+ "MCC3_BUSY\0" /* 32702 */
+ "MCC4_BUSY\0" /* 32712 */
+ "MCC5_BUSY\0" /* 32722 */
+ "MCC6_BUSY\0" /* 32732 */
+ "MCC7_BUSY\0" /* 32742 */
+ "MCD0_BUSY\0" /* 32752 */
+ "MCD1_BUSY\0" /* 32762 */
+ "MCD2_BUSY\0" /* 32772 */
+ "MCD3_BUSY\0" /* 32782 */
+ "MCD4_BUSY\0" /* 32792 */
+ "MCD5_BUSY\0" /* 32802 */
+ "MCD6_BUSY\0" /* 32812 */
+ "MCD7_BUSY\0" /* 32822 */
+ "IDLE\0" /* 32832 */
+ "REG_IDLE\0" /* 32837 */
+ "RB_EMPTY\0" /* 32846 */
+ "RB_FULL\0" /* 32855 */
+ "RB_CMD_IDLE\0" /* 32863 */
+ "RB_CMD_FULL\0" /* 32875 */
+ "IB_CMD_IDLE\0" /* 32887 */
+ "IB_CMD_FULL\0" /* 32899 */
+ "BLOCK_IDLE\0" /* 32911 */
+ "INSIDE_IB\0" /* 32922 */
+ "EX_IDLE\0" /* 32932 */
+ "EX_IDLE_POLL_TIMER_EXPIRE\0" /* 32940 */
+ "PACKET_READY\0" /* 32966 */
+ "MC_WR_IDLE\0" /* 32979 */
+ "SRBM_IDLE\0" /* 32990 */
+ "CONTEXT_EMPTY\0" /* 33000 */
+ "DELTA_RPTR_FULL\0" /* 33014 */
+ "RB_MC_RREQ_IDLE\0" /* 33030 */
+ "IB_MC_RREQ_IDLE\0" /* 33046 */
+ "MC_RD_IDLE\0" /* 33062 */
+ "DELTA_RPTR_EMPTY\0" /* 33073 */
+ "MC_RD_RET_STALL\0" /* 33090 */
+ "MC_RD_NO_POLL_IDLE\0" /* 33106 */
+ "PREV_CMD_IDLE\0" /* 33125 */
+ "SEM_IDLE\0" /* 33139 */
+ "SEM_REQ_STALL\0" /* 33148 */
+ "SEM_RESP_STATE\0" /* 33162 */
+ "INT_IDLE\0" /* 33177 */
+ "INT_REQ_STALL\0" /* 33186 */
+ "ME0PIPE1_CMDFIFO_AVAIL\0" /* 33200 */
+ "ME0PIPE1_CF_RQ_PENDING\0" /* 33223 */
+ "ME0PIPE1_PF_RQ_PENDING\0" /* 33246 */
+ "ME1PIPE0_RQ_PENDING\0" /* 33269 */
+ "ME1PIPE1_RQ_PENDING\0" /* 33289 */
+ "ME1PIPE2_RQ_PENDING\0" /* 33309 */
+ "ME1PIPE3_RQ_PENDING\0" /* 33329 */
+ "ME2PIPE0_RQ_PENDING\0" /* 33349 */
+ "ME2PIPE1_RQ_PENDING\0" /* 33369 */
+ "ME2PIPE2_RQ_PENDING\0" /* 33389 */
+ "ME2PIPE3_RQ_PENDING\0" /* 33409 */
+ "RLC_RQ_PENDING\0" /* 33429 */
+ "RLC_BUSY\0" /* 33444 */
+ "TC_BUSY\0" /* 33453 */
+ "TCC_CC_RESIDENT\0" /* 33461 */
+ "CPF_BUSY\0" /* 33477 */
+ "CPC_BUSY\0" /* 33486 */
+ "CPG_BUSY\0" /* 33495 */
+ "ME0PIPE0_CMDFIFO_AVAIL\0" /* 33504 */
+ "SRBM_RQ_PENDING\0" /* 33527 */
+ "ME0PIPE0_CF_RQ_PENDING\0" /* 33543 */
+ "ME0PIPE0_PF_RQ_PENDING\0" /* 33566 */
+ "GDS_DMA_RQ_PENDING\0" /* 33589 */
+ "DB_CLEAN\0" /* 33608 */
+ "CB_CLEAN\0" /* 33617 */
+ "TA_BUSY\0" /* 33626 */
+ "GDS_BUSY\0" /* 33634 */
+ "WD_BUSY_NO_DMA\0" /* 33643 */
+ "VGT_BUSY\0" /* 33658 */
+ "IA_BUSY_NO_DMA\0" /* 33667 */
+ "IA_BUSY\0" /* 33682 */
+ "SX_BUSY\0" /* 33690 */
+ "WD_BUSY\0" /* 33698 */
+ "SPI_BUSY\0" /* 33706 */
+ "BCI_BUSY\0" /* 33715 */
+ "SC_BUSY\0" /* 33724 */
+ "PA_BUSY\0" /* 33732 */
+ "DB_BUSY\0" /* 33740 */
+ "CP_COHERENCY_BUSY\0" /* 33748 */
+ "GUI_ACTIVE\0" /* 33766 */
+ "OFFSET_UPDATE_DONE\0" /* 33777, 33791 */
+ "DEST_BASE_0_ENA\0" /* 33796 */
+ "DEST_BASE_1_ENA\0" /* 33812 */
+ "CB0_DEST_BASE_ENA\0" /* 33828 */
+ "CB1_DEST_BASE_ENA\0" /* 33846 */
+ "CB2_DEST_BASE_ENA\0" /* 33864 */
+ "CB3_DEST_BASE_ENA\0" /* 33882 */
+ "CB4_DEST_BASE_ENA\0" /* 33900 */
+ "CB5_DEST_BASE_ENA\0" /* 33918 */
+ "CB6_DEST_BASE_ENA\0" /* 33936 */
+ "CB7_DEST_BASE_ENA\0" /* 33954 */
+ "DB_DEST_BASE_ENA\0" /* 33972 */
+ "DEST_BASE_2_ENA\0" /* 33989 */
+ "DEST_BASE_3_ENA\0" /* 34005 */
+ "TCL1_ACTION_ENA\0" /* 34021 */
+ "TC_ACTION_ENA\0" /* 34037 */
+ "CB_ACTION_ENA\0" /* 34051 */
+ "DB_ACTION_ENA\0" /* 34065 */
+ "SH_KCACHE_ACTION_ENA\0" /* 34079 */
+ "SH_ICACHE_ACTION_ENA\0" /* 34100 */
+ "COHER_BASE_HI_256B\0" /* 34121 */
+ "START_DELAY_COUNT\0" /* 34140 */
+ "TC_SD_ACTION_ENA\0" /* 34158 */
+ "TC_NC_ACTION_ENA\0" /* 34175 */
+ "TCL1_VOL_ACTION_ENA\0" /* 34192 */
+ "TC_VOL_ACTION_ENA\0" /* 34212 */
+ "TC_WB_ACTION_ENA\0" /* 34230 */
+ "SH_KCACHE_VOL_ACTION_ENA\0" /* 34247 */
+ "SH_KCACHE_WB_ACTION_ENA\0" /* 34272 */
+ "SH_SD_ACTION_ENA\0" /* 34296 */
+ "MATCHING_GFX_CNTX\0" /* 34313 */
+ "MEID\0" /* 34331 */
+ "PHASE1_STATUS\0" /* 34336 */
+ "MEC1_BUSY\0" /* 34350 */
+ "MEC2_BUSY\0" /* 34360 */
+ "DC0_BUSY\0" /* 34370 */
+ "DC1_BUSY\0" /* 34379 */
+ "RCIU1_BUSY\0" /* 34388 */
+ "RCIU2_BUSY\0" /* 34399 */
+ "ROQ1_BUSY\0" /* 34410 */
+ "ROQ2_BUSY\0" /* 34420 */
+ "TCIU_BUSY\0" /* 34430 */
+ "SCRATCH_RAM_BUSY\0" /* 34440 */
+ "QU_BUSY\0" /* 34457 */
+ "ATCL2IU_BUSY\0" /* 34465 */
+ "CPG_CPC_BUSY\0" /* 34478 */
+ "CPF_CPC_BUSY\0" /* 34491 */
+ "MEC1_LOAD_BUSY\0" /* 34504 */
+ "MEC1_SEMAPOHRE_BUSY\0" /* 34519 */
+ "MEC1_MUTEX_BUSY\0" /* 34539 */
+ "MEC1_MESSAGE_BUSY\0" /* 34555 */
+ "MEC1_EOP_QUEUE_BUSY\0" /* 34573 */
+ "MEC1_IQ_QUEUE_BUSY\0" /* 34593 */
+ "MEC1_IB_QUEUE_BUSY\0" /* 34612 */
+ "MEC1_TC_BUSY\0" /* 34631 */
+ "MEC1_DMA_BUSY\0" /* 34644 */
+ "MEC1_PARTIAL_FLUSH_BUSY\0" /* 34658 */
+ "MEC1_PIPE0_BUSY\0" /* 34682 */
+ "MEC1_PIPE1_BUSY\0" /* 34698 */
+ "MEC1_PIPE2_BUSY\0" /* 34714 */
+ "MEC1_PIPE3_BUSY\0" /* 34730 */
+ "MEC2_LOAD_BUSY\0" /* 34746 */
+ "MEC2_SEMAPOHRE_BUSY\0" /* 34761 */
+ "MEC2_MUTEX_BUSY\0" /* 34781 */
+ "MEC2_MESSAGE_BUSY\0" /* 34797 */
+ "MEC2_EOP_QUEUE_BUSY\0" /* 34815 */
+ "MEC2_IQ_QUEUE_BUSY\0" /* 34835 */
+ "MEC2_IB_QUEUE_BUSY\0" /* 34854 */
+ "MEC2_TC_BUSY\0" /* 34873 */
+ "MEC2_DMA_BUSY\0" /* 34886 */
+ "MEC2_PARTIAL_FLUSH_BUSY\0" /* 34900 */
+ "MEC2_PIPE0_BUSY\0" /* 34924 */
+ "MEC2_PIPE1_BUSY\0" /* 34940 */
+ "MEC2_PIPE2_BUSY\0" /* 34956 */
+ "MEC2_PIPE3_BUSY\0" /* 34972 */
+ "RCIU_TX_FREE_STALL\0" /* 34988 */
+ "RCIU_PRIV_VIOLATION\0" /* 35007 */
+ "TCIU_TX_FREE_STALL\0" /* 35027 */
+ "MEC1_DECODING_PACKET\0" /* 35046 */
+ "MEC1_WAIT_ON_RCIU\0" /* 35067 */
+ "MEC1_WAIT_ON_RCIU_READ\0" /* 35085 */
+ "MEC1_WAIT_ON_ROQ_DATA\0" /* 35108 */
+ "MEC2_DECODING_PACKET\0" /* 35130 */
+ "MEC2_WAIT_ON_RCIU\0" /* 35151 */
+ "MEC2_WAIT_ON_RCIU_READ\0" /* 35169 */
+ "MEC2_WAIT_ON_ROQ_DATA\0" /* 35192 */
+ "ATCL2IU_WAITING_ON_FREE\0" /* 35214 */
+ "ATCL2IU_WAITING_ON_TAGS\0" /* 35238 */
+ "ATCL1_WAITING_ON_TRANS\0" /* 35262 */
+ "POST_WPTR_GFX_BUSY\0" /* 35285 */
+ "CSF_BUSY\0" /* 35304 */
+ "ROQ_ALIGN_BUSY\0" /* 35313 */
+ "ROQ_RING_BUSY\0" /* 35328 */
+ "ROQ_INDIRECT1_BUSY\0" /* 35342 */
+ "ROQ_INDIRECT2_BUSY\0" /* 35361 */
+ "ROQ_STATE_BUSY\0" /* 35380 */
+ "ROQ_CE_RING_BUSY\0" /* 35395 */
+ "ROQ_CE_INDIRECT1_BUSY\0" /* 35412 */
+ "ROQ_CE_INDIRECT2_BUSY\0" /* 35434 */
+ "SEMAPHORE_BUSY\0" /* 35456 */
+ "INTERRUPT_BUSY\0" /* 35471 */
+ "HQD_BUSY\0" /* 35486 */
+ "PRT_BUSY\0" /* 35495 */
+ "CPF_GFX_BUSY\0" /* 35504 */
+ "CPF_CMP_BUSY\0" /* 35517 */
+ "GRBM_CPF_STAT_BUSY\0" /* 35530 */
+ "CPC_CPF_BUSY\0" /* 35549 */
+ "REG_BUS_FIFO_BUSY\0" /* 35562 */
+ "CSF_RING_BUSY\0" /* 35580 */
+ "CSF_INDIRECT1_BUSY\0" /* 35594 */
+ "CSF_INDIRECT2_BUSY\0" /* 35613 */
+ "CSF_STATE_BUSY\0" /* 35632 */
+ "CSF_CE_INDR1_BUSY\0" /* 35647 */
+ "CSF_CE_INDR2_BUSY\0" /* 35665 */
+ "CSF_ARBITER_BUSY\0" /* 35683 */
+ "CSF_INPUT_BUSY\0" /* 35700 */
+ "OUTSTANDING_READ_TAGS\0" /* 35715 */
+ "HPD_PROCESSING_EOP_BUSY\0" /* 35737 */
+ "HQD_DISPATCH_BUSY\0" /* 35761 */
+ "HQD_IQ_TIMER_BUSY\0" /* 35779 */
+ "HQD_DMA_OFFLOAD_BUSY\0" /* 35797 */
+ "HQD_WAIT_SEMAPHORE_BUSY\0" /* 35818 */
+ "HQD_SIGNAL_SEMAPHORE_BUSY\0" /* 35842 */
+ "HQD_MESSAGE_BUSY\0" /* 35868 */
+ "HQD_PQ_FETCHER_BUSY\0" /* 35885 */
+ "HQD_IB_FETCHER_BUSY\0" /* 35905 */
+ "HQD_IQ_FETCHER_BUSY\0" /* 35925 */
+ "HQD_EOP_FETCHER_BUSY\0" /* 35945 */
+ "HQD_CONSUMED_RPTR_BUSY\0" /* 35966 */
+ "HQD_FETCHER_ARB_BUSY\0" /* 35989 */
+ "HQD_ROQ_ALIGN_BUSY\0" /* 36010 */
+ "HQD_ROQ_EOP_BUSY\0" /* 36029 */
+ "HQD_ROQ_IQ_BUSY\0" /* 36046 */
+ "HQD_ROQ_PQ_BUSY\0" /* 36062 */
+ "HQD_ROQ_IB_BUSY\0" /* 36078 */
+ "HQD_WPTR_POLL_BUSY\0" /* 36094 */
+ "HQD_PQ_BUSY\0" /* 36113 */
+ "HQD_IB_BUSY\0" /* 36125 */
+ "RING_FETCHING_DATA\0" /* 36137 */
+ "INDR1_FETCHING_DATA\0" /* 36156 */
+ "INDR2_FETCHING_DATA\0" /* 36176 */
+ "STATE_FETCHING_DATA\0" /* 36196 */
+ "TCIU_WAITING_ON_FREE\0" /* 36216 */
+ "TCIU_WAITING_ON_TAGS\0" /* 36237 */
+ "COHER_SIZE_HI_256B\0" /* 36258 */
+ "PRIM_COUNT\0" /* 36277 */
+ "VS_NO_EXTRA_BUFFER\0" /* 36288 */
+ "STREAMOUT_FULL_FLUSH\0" /* 36307 */
+ "ES_LIMIT\0" /* 36328, 36331 */
+ "VERT_REUSE\0" /* 36337 */
+ "DI_PT_NONE\0" /* 36348 */
+ "DI_PT_POINTLIST\0" /* 36359 */
+ "DI_PT_LINELIST\0" /* 36375 */
+ "DI_PT_LINESTRIP\0" /* 36390 */
+ "DI_PT_TRILIST\0" /* 36406 */
+ "DI_PT_TRIFAN\0" /* 36420 */
+ "DI_PT_TRISTRIP\0" /* 36433 */
+ "DI_PT_UNUSED_0\0" /* 36448 */
+ "DI_PT_UNUSED_1\0" /* 36463 */
+ "DI_PT_PATCH\0" /* 36478 */
+ "DI_PT_LINELIST_ADJ\0" /* 36490 */
+ "DI_PT_LINESTRIP_ADJ\0" /* 36509 */
+ "DI_PT_TRILIST_ADJ\0" /* 36529 */
+ "DI_PT_TRISTRIP_ADJ\0" /* 36547 */
+ "DI_PT_UNUSED_3\0" /* 36566 */
+ "DI_PT_UNUSED_4\0" /* 36581 */
+ "DI_PT_TRI_WITH_WFLAGS\0" /* 36596 */
+ "DI_PT_RECTLIST\0" /* 36618 */
+ "DI_PT_LINELOOP\0" /* 36633 */
+ "DI_PT_QUADLIST\0" /* 36648 */
+ "DI_PT_QUADSTRIP\0" /* 36663 */
+ "DI_PT_POLYGON\0" /* 36679 */
+ "DI_PT_2D_COPY_RECT_LIST_V0\0" /* 36693 */
+ "DI_PT_2D_COPY_RECT_LIST_V1\0" /* 36720 */
+ "DI_PT_2D_COPY_RECT_LIST_V2\0" /* 36747 */
+ "DI_PT_2D_COPY_RECT_LIST_V3\0" /* 36774 */
+ "DI_PT_2D_FILL_RECT_LIST\0" /* 36801 */
+ "DI_PT_2D_LINE_STRIP\0" /* 36825 */
+ "DI_PT_2D_TRI_STRIP\0" /* 36845 */
+ "DI_INDEX_SIZE_16_BIT\0" /* 36864 */
+ "DI_INDEX_SIZE_32_BIT\0" /* 36885 */
+ "OFFCHIP_BUFFERING\0" /* 36906 */
+ "CLIP_VTX_REORDER_ENA\0" /* 36924 */
+ "NUM_CLIP_SEQ\0" /* 36945 */
+ "CLIPPED_PRIM_SEQ_STALL\0" /* 36958 */
+ "VE_NAN_PROC_DISABLE\0" /* 36981 */
+ "CURRENT_PTR\0" /* 37001 */
+ "CURRENT_COUNT\0" /* 37013 */
+ "CE_TO_CSF_NOT_RDY_TO_RCV\0" /* 37027 */
+ "CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV\0" /* 37052 */
+ "CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER\0" /* 37090 */
+ "CE_TO_RAM_INIT_NOT_RDY\0" /* 37131 */
+ "CE_TO_RAM_DUMP_NOT_RDY\0" /* 37154 */
+ "CE_TO_RAM_WRITE_NOT_RDY\0" /* 37177 */
+ "CE_TO_INC_FIFO_NOT_RDY_TO_RCV\0" /* 37201 */
+ "CE_TO_WR_FIFO_NOT_RDY_TO_RCV\0" /* 37231 */
+ "CE_WAITING_ON_BUFFER_DATA\0" /* 37260 */
+ "CE_WAITING_ON_CE_BUFFER_FLAG\0" /* 37286 */
+ "CE_WAITING_ON_DE_COUNTER\0" /* 37315 */
+ "CE_WAITING_ON_DE_COUNTER_UNDERFLOW\0" /* 37340 */
+ "CE_STALLED_ON_TC_WR_CONFIRM\0" /* 37375 */
+ "CE_STALLED_ON_ATOMIC_RTN_DATA\0" /* 37403 */
+ "RBIU_TO_DMA_NOT_RDY_TO_RCV\0" /* 37433 */
+ "RBIU_TO_SEM_NOT_RDY_TO_RCV\0" /* 37460 */
+ "RBIU_TO_MEMWR_NOT_RDY_TO_RCV\0" /* 37487 */
+ "ME_HAS_ACTIVE_CE_BUFFER_FLAG\0" /* 37516 */
+ "ME_HAS_ACTIVE_DE_BUFFER_FLAG\0" /* 37545 */
+ "ME_STALLED_ON_TC_WR_CONFIRM\0" /* 37574 */
+ "ME_STALLED_ON_ATOMIC_RTN_DATA\0" /* 37602 */
+ "ME_WAITING_ON_TC_READ_DATA\0" /* 37632 */
+ "ME_WAITING_ON_REG_READ_DATA\0" /* 37659 */
+ "RCIU_WAITING_ON_GDS_FREE\0" /* 37687 */
+ "RCIU_WAITING_ON_GRBM_FREE\0" /* 37712 */
+ "RCIU_WAITING_ON_VGT_FREE\0" /* 37738 */
+ "RCIU_STALLED_ON_ME_READ\0" /* 37763 */
+ "RCIU_STALLED_ON_DMA_READ\0" /* 37787 */
+ "RCIU_STALLED_ON_APPEND_READ\0" /* 37812 */
+ "RCIU_HALTED_BY_REG_VIOLATION\0" /* 37840 */
+ "PFP_TO_CSF_NOT_RDY_TO_RCV\0" /* 37869 */
+ "PFP_TO_MEQ_NOT_RDY_TO_RCV\0" /* 37895 */
+ "PFP_TO_RCIU_NOT_RDY_TO_RCV\0" /* 37921 */
+ "PFP_TO_VGT_WRITES_PENDING\0" /* 37948 */
+ "PFP_RCIU_READ_PENDING\0" /* 37974 */
+ "PFP_WAITING_ON_BUFFER_DATA\0" /* 37996 */
+ "ME_WAIT_ON_CE_COUNTER\0" /* 38023 */
+ "ME_WAIT_ON_AVAIL_BUFFER\0" /* 38045 */
+ "GFX_CNTX_NOT_AVAIL_TO_ME\0" /* 38069 */
+ "ME_RCIU_NOT_RDY_TO_RCV\0" /* 38094 */
+ "ME_TO_CONST_NOT_RDY_TO_RCV\0" /* 38117 */
+ "ME_WAITING_DATA_FROM_PFP\0" /* 38144 */
+ "ME_WAITING_ON_PARTIAL_FLUSH\0" /* 38169 */
+ "MEQ_TO_ME_NOT_RDY_TO_RCV\0" /* 38197 */
+ "STQ_TO_ME_NOT_RDY_TO_RCV\0" /* 38222 */
+ "ME_WAITING_DATA_FROM_STQ\0" /* 38247 */
+ "PFP_STALLED_ON_TC_WR_CONFIRM\0" /* 38272 */
+ "PFP_STALLED_ON_ATOMIC_RTN_DATA\0" /* 38301 */
+ "EOPD_FIFO_NEEDS_SC_EOP_DONE\0" /* 38332 */
+ "EOPD_FIFO_NEEDS_WR_CONFIRM\0" /* 38360 */
+ "STRMO_WR_OF_PRIM_DATA_PENDING\0" /* 38387 */
+ "PIPE_STATS_WR_DATA_PENDING\0" /* 38417 */
+ "APPEND_RDY_WAIT_ON_CS_DONE\0" /* 38444, 38463 */
+ "APPEND_RDY_WAIT_ON_PS_DONE\0" /* 38490, 38471 */
+ "APPEND_WAIT_ON_WR_CONFIRM\0" /* 38498 */
+ "APPEND_ACTIVE_PARTITION\0" /* 38524 */
+ "APPEND_WAITING_TO_SEND_MEMWRITE\0" /* 38548 */
+ "SURF_SYNC_NEEDS_IDLE_CNTXS\0" /* 38580 */
+ "SURF_SYNC_NEEDS_ALL_CLEAN\0" /* 38607 */
+ "DC_BUSY\0" /* 38633 */
+ "PFP_BUSY\0" /* 38641 */
+ "MEQ_BUSY\0" /* 38650 */
+ "ME_BUSY\0" /* 38659 */
+ "QUERY_BUSY\0" /* 38667 */
+ "SURFACE_SYNC_BUSY\0" /* 38678 */
+ "RCIU_BUSY\0" /* 38696 */
+ "CPC_CPG_BUSY\0" /* 38706 */
+ "CE_BUSY\0" /* 38719 */
+ "INSTANCE_INDEX\0" /* 38727 */
+ "SH_INDEX\0" /* 38742 */
+ "SE_INDEX\0" /* 38751 */
+ "SH_BROADCAST_WRITES\0" /* 38760 */
+ "INSTANCE_BROADCAST_WRITES\0" /* 38780 */
+ "SE_BROADCAST_WRITES\0" /* 38806 */
+ "X_8K_DWORDS\0" /* 38826 */
+ "X_4K_DWORDS\0" /* 38838 */
+ "X_2K_DWORDS\0" /* 38850 */
+ "X_1K_DWORDS\0" /* 38862 */
+ "OFFCHIP_GRANULARITY\0" /* 38874 */
+ "ENABLE_PA_SC_OUT_OF_ORDER\0" /* 38894 */
+ "DISABLE_SC_DB_TILE_FIX\0" /* 38920 */
+ "DISABLE_AA_MASK_FULL_FIX\0" /* 38943 */
+ "ENABLE_1XMSAA_SAMPLE_LOCATIONS\0" /* 38968 */
+ "ENABLE_1XMSAA_SAMPLE_LOC_CENTROID\0" /* 38999 */
+ "DISABLE_SCISSOR_FIX\0" /* 39033 */
+ "DISABLE_PW_BUBBLE_COLLAPSE\0" /* 39053 */
+ "SEND_UNLIT_STILES_TO_PACKER\0" /* 39080 */
+ "DISABLE_DUALGRAD_PERF_OPTIMIZATION\0" /* 39108 */
+ "INST_INVALIDATE\0" /* 39148, 39143 */
+ "DATA_INVALIDATE\0" /* 39159 */
+ "INVALIDATE_VOLATILE\0" /* 39175 */
+ "RET\0" /* 39195 */
+ "RUI\0" /* 39199 */
+ "RNG\0" /* 39203 */
+ "SQ_EXP_MRT\0" /* 39207 */
+ "SQ_EXP_MRTZ\0" /* 39218 */
+ "SQ_EXP_NULL\0" /* 39230 */
+ "SQ_EXP_POS\0" /* 39242 */
+ "SQ_EXP_PARAM\0" /* 39253 */
+ "TGT\0" /* 39266 */
+ "COMPR\0" /* 39270 */
+ "VM\0" /* 39276 */
+ "SQ_ENC_EXP_FIELD\0" /* 39279 */
+ "ENCODING\0" /* 39296 */
+ "ADDRESS\0" /* 39305 */
+ "BASE_ADDRESS_HI\0" /* 39313, 39318 */
+ "CACHE_SWIZZLE\0" /* 39329 */
+ "SWIZZLE_ENABLE\0" /* 39343 */
+ "SQ_SEL_0\0" /* 39358 */
+ "SQ_SEL_1\0" /* 39367 */
+ "SQ_SEL_RESERVED_0\0" /* 39376, 39383 */
+ "SQ_SEL_RESERVED_1\0" /* 39401, 39394 */
+ "SQ_SEL_X\0" /* 39412 */
+ "SQ_SEL_Y\0" /* 39421 */
+ "SQ_SEL_Z\0" /* 39430 */
+ "SQ_SEL_W\0" /* 39439 */
+ "DST_SEL_X\0" /* 39448 */
+ "DST_SEL_Y\0" /* 39458 */
+ "DST_SEL_Z\0" /* 39468 */
+ "DST_SEL_W\0" /* 39478 */
+ "BUF_NUM_FORMAT_UNORM\0" /* 39488 */
+ "BUF_NUM_FORMAT_SNORM\0" /* 39509 */
+ "BUF_NUM_FORMAT_USCALED\0" /* 39530 */
+ "BUF_NUM_FORMAT_SSCALED\0" /* 39553 */
+ "BUF_NUM_FORMAT_UINT\0" /* 39576 */
+ "BUF_NUM_FORMAT_SINT\0" /* 39596 */
+ "BUF_NUM_FORMAT_SNORM_OGL\0" /* 39616 */
+ "BUF_NUM_FORMAT_FLOAT\0" /* 39641 */
+ "NUM_FORMAT\0" /* 39662 */
+ "BUF_DATA_FORMAT_INVALID\0" /* 39673 */
+ "BUF_DATA_FORMAT_8\0" /* 39697 */
+ "BUF_DATA_FORMAT_16\0" /* 39715 */
+ "BUF_DATA_FORMAT_8_8\0" /* 39734 */
+ "BUF_DATA_FORMAT_32\0" /* 39754 */
+ "BUF_DATA_FORMAT_16_16\0" /* 39773 */
+ "BUF_DATA_FORMAT_10_11_11\0" /* 39795 */
+ "BUF_DATA_FORMAT_11_11_10\0" /* 39820 */
+ "BUF_DATA_FORMAT_10_10_10_2\0" /* 39845 */
+ "BUF_DATA_FORMAT_2_10_10_10\0" /* 39872 */
+ "BUF_DATA_FORMAT_8_8_8_8\0" /* 39899 */
+ "BUF_DATA_FORMAT_32_32\0" /* 39923 */
+ "BUF_DATA_FORMAT_16_16_16_16\0" /* 39945 */
+ "BUF_DATA_FORMAT_32_32_32\0" /* 39973 */
+ "BUF_DATA_FORMAT_32_32_32_32\0" /* 39998 */
+ "BUF_DATA_FORMAT_RESERVED_15\0" /* 40026 */
+ "DATA_FORMAT\0" /* 40054 */
+ "ELEMENT_SIZE\0" /* 40066 */
+ "INDEX_STRIDE\0" /* 40079 */
+ "ADD_TID_ENABLE\0" /* 40092 */
+ "ATC\0" /* 40107 */
+ "HASH_ENABLE\0" /* 40111 */
+ "HEAP\0" /* 40123 */
+ "MTYPE\0" /* 40128 */
+ "SQ_RSRC_BUF\0" /* 40134 */
+ "SQ_RSRC_BUF_RSVD_1\0" /* 40146 */
+ "SQ_RSRC_BUF_RSVD_2\0" /* 40165 */
+ "SQ_RSRC_BUF_RSVD_3\0" /* 40184 */
+ "MIN_LOD\0" /* 40203 */
+ "IMG_DATA_FORMAT_INVALID\0" /* 40211 */
+ "IMG_DATA_FORMAT_8\0" /* 40235 */
+ "IMG_DATA_FORMAT_16\0" /* 40253 */
+ "IMG_DATA_FORMAT_8_8\0" /* 40272 */
+ "IMG_DATA_FORMAT_32\0" /* 40292 */
+ "IMG_DATA_FORMAT_16_16\0" /* 40311 */
+ "IMG_DATA_FORMAT_10_11_11\0" /* 40333 */
+ "IMG_DATA_FORMAT_11_11_10\0" /* 40358 */
+ "IMG_DATA_FORMAT_10_10_10_2\0" /* 40383 */
+ "IMG_DATA_FORMAT_2_10_10_10\0" /* 40410 */
+ "IMG_DATA_FORMAT_8_8_8_8\0" /* 40437 */
+ "IMG_DATA_FORMAT_32_32\0" /* 40461 */
+ "IMG_DATA_FORMAT_16_16_16_16\0" /* 40483 */
+ "IMG_DATA_FORMAT_32_32_32\0" /* 40511 */
+ "IMG_DATA_FORMAT_32_32_32_32\0" /* 40536 */
+ "IMG_DATA_FORMAT_RESERVED_15\0" /* 40564 */
+ "IMG_DATA_FORMAT_5_6_5\0" /* 40592 */
+ "IMG_DATA_FORMAT_1_5_5_5\0" /* 40614 */
+ "IMG_DATA_FORMAT_5_5_5_1\0" /* 40638 */
+ "IMG_DATA_FORMAT_4_4_4_4\0" /* 40662 */
+ "IMG_DATA_FORMAT_8_24\0" /* 40686 */
+ "IMG_DATA_FORMAT_24_8\0" /* 40707 */
+ "IMG_DATA_FORMAT_X24_8_32\0" /* 40728 */
+ "IMG_DATA_FORMAT_8_AS_8_8_8_8\0" /* 40753 */
+ "IMG_DATA_FORMAT_ETC2_RGB\0" /* 40782 */
+ "IMG_DATA_FORMAT_ETC2_RGBA\0" /* 40807 */
+ "IMG_DATA_FORMAT_ETC2_R\0" /* 40833 */
+ "IMG_DATA_FORMAT_ETC2_RG\0" /* 40856 */
+ "IMG_DATA_FORMAT_ETC2_RGBA1\0" /* 40880 */
+ "IMG_DATA_FORMAT_RESERVED_29\0" /* 40907 */
+ "IMG_DATA_FORMAT_RESERVED_30\0" /* 40935 */
+ "IMG_DATA_FORMAT_RESERVED_31\0" /* 40963 */
+ "IMG_DATA_FORMAT_GB_GR\0" /* 40991 */
+ "IMG_DATA_FORMAT_BG_RG\0" /* 41013 */
+ "IMG_DATA_FORMAT_5_9_9_9\0" /* 41035 */
+ "IMG_DATA_FORMAT_BC1\0" /* 41059 */
+ "IMG_DATA_FORMAT_BC2\0" /* 41079 */
+ "IMG_DATA_FORMAT_BC3\0" /* 41099 */
+ "IMG_DATA_FORMAT_BC4\0" /* 41119 */
+ "IMG_DATA_FORMAT_BC5\0" /* 41139 */
+ "IMG_DATA_FORMAT_BC6\0" /* 41159 */
+ "IMG_DATA_FORMAT_BC7\0" /* 41179 */
+ "IMG_DATA_FORMAT_16_AS_16_16_16_16_GFX6\0" /* 41199 */
+ "IMG_DATA_FORMAT_16_AS_32_32_32_32_GFX6\0" /* 41238 */
+ "IMG_DATA_FORMAT_FMASK8_S2_F1\0" /* 41277 */
+ "IMG_DATA_FORMAT_FMASK8_S4_F1\0" /* 41306 */
+ "IMG_DATA_FORMAT_FMASK8_S8_F1\0" /* 41335 */
+ "IMG_DATA_FORMAT_FMASK8_S2_F2\0" /* 41364 */
+ "IMG_DATA_FORMAT_FMASK8_S4_F2\0" /* 41393 */
+ "IMG_DATA_FORMAT_FMASK8_S4_F4\0" /* 41422 */
+ "IMG_DATA_FORMAT_FMASK16_S16_F1\0" /* 41451 */
+ "IMG_DATA_FORMAT_FMASK16_S8_F2\0" /* 41482 */
+ "IMG_DATA_FORMAT_FMASK32_S16_F2\0" /* 41512 */
+ "IMG_DATA_FORMAT_FMASK32_S8_F4\0" /* 41543 */
+ "IMG_DATA_FORMAT_FMASK32_S8_F8\0" /* 41573 */
+ "IMG_DATA_FORMAT_FMASK64_S16_F4\0" /* 41603 */
+ "IMG_DATA_FORMAT_FMASK64_S16_F8\0" /* 41634 */
+ "IMG_DATA_FORMAT_4_4\0" /* 41665 */
+ "IMG_DATA_FORMAT_6_5_5\0" /* 41685 */
+ "IMG_DATA_FORMAT_1\0" /* 41707 */
+ "IMG_DATA_FORMAT_1_REVERSED\0" /* 41725 */
+ "IMG_DATA_FORMAT_32_AS_8\0" /* 41752 */
+ "IMG_DATA_FORMAT_32_AS_8_8\0" /* 41776 */
+ "IMG_DATA_FORMAT_32_AS_32_32_32_32\0" /* 41802 */
+ "DATA_FORMAT_GFX6\0" /* 41836 */
+ "IMG_NUM_FORMAT_UNORM\0" /* 41853 */
+ "IMG_NUM_FORMAT_SNORM\0" /* 41874 */
+ "IMG_NUM_FORMAT_USCALED\0" /* 41895 */
+ "IMG_NUM_FORMAT_SSCALED\0" /* 41918 */
+ "IMG_NUM_FORMAT_UINT\0" /* 41941 */
+ "IMG_NUM_FORMAT_SINT\0" /* 41961 */
+ "IMG_NUM_FORMAT_SNORM_OGL\0" /* 41981 */
+ "IMG_NUM_FORMAT_FLOAT\0" /* 42006 */
+ "IMG_NUM_FORMAT_RESERVED_8\0" /* 42027 */
+ "IMG_NUM_FORMAT_SRGB\0" /* 42053 */
+ "IMG_NUM_FORMAT_UBNORM\0" /* 42073 */
+ "IMG_NUM_FORMAT_UBNORM_OGL\0" /* 42095 */
+ "IMG_NUM_FORMAT_UBINT\0" /* 42121 */
+ "IMG_NUM_FORMAT_UBSCALED\0" /* 42142 */
+ "IMG_NUM_FORMAT_RESERVED_14\0" /* 42166 */
+ "IMG_NUM_FORMAT_RESERVED_15\0" /* 42193 */
+ "NUM_FORMAT_GFX6\0" /* 42220 */
+ "WIDTH\0" /* 42236 */
+ "HEIGHT\0" /* 42242 */
+ "PERF_MOD\0" /* 42249 */
+ "INTERLACED\0" /* 42258 */
+ "BASE_LEVEL\0" /* 42269 */
+ "LAST_LEVEL\0" /* 42280 */
+ "TILING_INDEX\0" /* 42291 */
+ "POW2_PAD\0" /* 42304 */
+ "SQ_RSRC_IMG_RSVD_0\0" /* 42313 */
+ "SQ_RSRC_IMG_RSVD_1\0" /* 42332 */
+ "SQ_RSRC_IMG_RSVD_2\0" /* 42351 */
+ "SQ_RSRC_IMG_RSVD_3\0" /* 42370 */
+ "SQ_RSRC_IMG_RSVD_4\0" /* 42389 */
+ "SQ_RSRC_IMG_RSVD_5\0" /* 42408 */
+ "SQ_RSRC_IMG_RSVD_6\0" /* 42427 */
+ "SQ_RSRC_IMG_RSVD_7\0" /* 42446 */
+ "SQ_RSRC_IMG_1D\0" /* 42465, 42477 */
+ "SQ_RSRC_IMG_2D\0" /* 42480, 42492 */
+ "SQ_RSRC_IMG_3D\0" /* 42507, 42495 */
+ "SQ_RSRC_IMG_CUBE\0" /* 42510 */
+ "SQ_RSRC_IMG_1D_ARRAY\0" /* 42527 */
+ "SQ_RSRC_IMG_2D_ARRAY\0" /* 42548 */
+ "SQ_RSRC_IMG_2D_MSAA\0" /* 42569 */
+ "SQ_RSRC_IMG_2D_MSAA_ARRAY\0" /* 42589 */
+ "PITCH_GFX6\0" /* 42615 */
+ "BASE_ARRAY\0" /* 42626 */
+ "LAST_ARRAY\0" /* 42637 */
+ "MIN_LOD_WARN\0" /* 42648 */
+ "COUNTER_BANK_ID\0" /* 42661 */
+ "LOD_HDW_CNT_EN\0" /* 42677 */
+ "COMPRESSION_EN\0" /* 42692 */
+ "ALPHA_IS_ON_MSB\0" /* 42707 */
+ "COLOR_TRANSFORM\0" /* 42723 */
+ "LOST_ALPHA_BITS\0" /* 42739 */
+ "LOST_COLOR_BITS\0" /* 42755 */
+ "SQ_TEX_WRAP\0" /* 42771 */
+ "SQ_TEX_MIRROR\0" /* 42783 */
+ "SQ_TEX_CLAMP_LAST_TEXEL\0" /* 42797 */
+ "SQ_TEX_MIRROR_ONCE_LAST_TEXEL\0" /* 42821 */
+ "SQ_TEX_CLAMP_HALF_BORDER\0" /* 42851 */
+ "SQ_TEX_MIRROR_ONCE_HALF_BORDER\0" /* 42876 */
+ "SQ_TEX_CLAMP_BORDER\0" /* 42907 */
+ "SQ_TEX_MIRROR_ONCE_BORDER\0" /* 42927 */
+ "CLAMP_X\0" /* 42953 */
+ "CLAMP_Y\0" /* 42961 */
+ "CLAMP_Z\0" /* 42969 */
+ "MAX_ANISO_RATIO\0" /* 42977 */
+ "SQ_TEX_DEPTH_COMPARE_NEVER\0" /* 42993 */
+ "SQ_TEX_DEPTH_COMPARE_LESS\0" /* 43020 */
+ "SQ_TEX_DEPTH_COMPARE_EQUAL\0" /* 43046 */
+ "SQ_TEX_DEPTH_COMPARE_LESSEQUAL\0" /* 43073 */
+ "SQ_TEX_DEPTH_COMPARE_GREATER\0" /* 43104 */
+ "SQ_TEX_DEPTH_COMPARE_NOTEQUAL\0" /* 43133 */
+ "SQ_TEX_DEPTH_COMPARE_GREATEREQUAL\0" /* 43163 */
+ "SQ_TEX_DEPTH_COMPARE_ALWAYS\0" /* 43218, 43197 */
+ "DEPTH_COMPARE_FUNC\0" /* 43225 */
+ "FORCE_UNNORMALIZED\0" /* 43244 */
+ "ANISO_THRESHOLD\0" /* 43263 */
+ "MC_COORD_TRUNC\0" /* 43279 */
+ "FORCE_DEGAMMA\0" /* 43294 */
+ "ANISO_BIAS\0" /* 43308 */
+ "TRUNC_COORD\0" /* 43319 */
+ "DISABLE_CUBE_WRAP\0" /* 43331 */
+ "FILTER_MODE\0" /* 43349 */
+ "COMPAT_MODE\0" /* 43361 */
+ "MAX_LOD\0" /* 43373 */
+ "PERF_MIP\0" /* 43381 */
+ "PERF_Z\0" /* 43390 */
+ "LOD_BIAS\0" /* 43397 */
+ "LOD_BIAS_SEC\0" /* 43406 */
+ "SQ_TEX_XY_FILTER_POINT\0" /* 43433, 43419 */
+ "SQ_TEX_XY_FILTER_BILINEAR\0" /* 43442, 43461 */
+ "XY_MAG_FILTER\0" /* 43468 */
+ "SQ_TEX_XY_FILTER_ANISO_POINT\0" /* 43482 */
+ "SQ_TEX_XY_FILTER_ANISO_BILINEAR\0" /* 43511 */
+ "XY_MIN_FILTER\0" /* 43543 */
+ "SQ_TEX_Z_FILTER_NONE\0" /* 43557 */
+ "SQ_TEX_Z_FILTER_POINT\0" /* 43578 */
+ "SQ_TEX_Z_FILTER_LINEAR\0" /* 43600 */
+ "Z_FILTER\0" /* 43623 */
+ "MIP_FILTER\0" /* 43632 */
+ "MIP_POINT_PRECLAMP\0" /* 43643 */
+ "DISABLE_LSB_CEIL\0" /* 43662 */
+ "FILTER_PREC_FIX\0" /* 43679 */
+ "ANISO_OVERRIDE\0" /* 43695 */
+ "BORDER_COLOR_PTR\0" /* 43710 */
+ "UPGRADED_DEPTH\0" /* 43727 */
+ "SQ_TEX_BORDER_COLOR_TRANS_BLACK\0" /* 43742 */
+ "SQ_TEX_BORDER_COLOR_OPAQUE_BLACK\0" /* 43774 */
+ "SQ_TEX_BORDER_COLOR_OPAQUE_WHITE\0" /* 43807 */
+ "SQ_TEX_BORDER_COLOR_REGISTER\0" /* 43840 */
+ "BORDER_COLOR_TYPE\0" /* 43869 */
+ "VS_LOW_THRESHOLD\0" /* 43887 */
+ "GS_LOW_THRESHOLD\0" /* 43904 */
+ "ES_LOW_THRESHOLD\0" /* 43921 */
+ "HS_LOW_THRESHOLD\0" /* 43938 */
+ "LS_LOW_THRESHOLD\0" /* 43955 */
+ "PS_CU_EN\0" /* 43972, 43975 */
+ "VS_CU_EN\0" /* 43981 */
+ "GS_CU_EN\0" /* 43990 */
+ "ES_CU_EN\0" /* 43999 */
+ "LSHS_CU_EN\0" /* 44008 */
+ "X_R0\0" /* 44019 */
+ "RING_ORDER_TS0\0" /* 44024 */
+ "RING_ORDER_TS1\0" /* 44039 */
+ "RING_ORDER_TS2\0" /* 44054 */
+ "PIPE_ORDER_TS0\0" /* 44069 */
+ "PIPE_ORDER_TS1\0" /* 44084 */
+ "PIPE_ORDER_TS2\0" /* 44099 */
+ "PIPE_ORDER_TS3\0" /* 44114 */
+ "TS0_DUR_MULT\0" /* 44129 */
+ "TS1_DUR_MULT\0" /* 44142 */
+ "TS2_DUR_MULT\0" /* 44155 */
+ "TS3_DUR_MULT\0" /* 44168 */
+ "TS0_DURATION\0" /* 44181 */
+ "TS1_DURATION\0" /* 44194 */
+ "TS2_DURATION\0" /* 44207 */
+ "GPR_WRITE_PRIORITY\0" /* 44220 */
+ "EXP_PRIORITY_ORDER\0" /* 44239 */
+ "ENABLE_SQG_TOP_EVENTS\0" /* 44258 */
+ "ENABLE_SQG_BOP_EVENTS\0" /* 44280 */
+ "RSRC_MGMT_RESET\0" /* 44302 */
+ "X_DELAY_14_CLKS\0" /* 44318 */
+ "X_DELAY_16_CLKS\0" /* 44334 */
+ "X_DELAY_18_CLKS\0" /* 44350 */
+ "X_DELAY_20_CLKS\0" /* 44366 */
+ "X_DELAY_22_CLKS\0" /* 44382 */
+ "X_DELAY_24_CLKS\0" /* 44398 */
+ "X_DELAY_26_CLKS\0" /* 44414 */
+ "X_DELAY_28_CLKS\0" /* 44430 */
+ "X_DELAY_30_CLKS\0" /* 44446 */
+ "X_DELAY_32_CLKS\0" /* 44462 */
+ "X_DELAY_34_CLKS\0" /* 44478 */
+ "X_DELAY_4_CLKS\0" /* 44494 */
+ "X_DELAY_6_CLKS\0" /* 44509 */
+ "X_DELAY_8_CLKS\0" /* 44524 */
+ "X_DELAY_10_CLKS\0" /* 44539 */
+ "X_DELAY_12_CLKS\0" /* 44555 */
+ "VTX_DONE_DELAY\0" /* 44571 */
+ "INTERP_ONE_PRIM_PER_ROW\0" /* 44586 */
+ "PC_LIMIT_ENABLE\0" /* 44610 */
+ "PC_LIMIT_STRICT\0" /* 44626 */
+ "PC_LIMIT_SIZE\0" /* 44642 */
+ "TYPE_A\0" /* 44656 */
+ "VGPR_A\0" /* 44663 */
+ "SGPR_A\0" /* 44670 */
+ "LDS_A\0" /* 44677 */
+ "WAVES_A\0" /* 44683 */
+ "EN_A\0" /* 44691 */
+ "TYPE_B\0" /* 44696 */
+ "VGPR_B\0" /* 44703 */
+ "SGPR_B\0" /* 44710 */
+ "LDS_B\0" /* 44717 */
+ "WAVES_B\0" /* 44723 */
+ "EN_B\0" /* 44731 */
+ "MSAA1_X\0" /* 44736 */
+ "MSAA1_Y\0" /* 44744 */
+ "MSAA2_X\0" /* 44752 */
+ "MSAA2_Y\0" /* 44760 */
+ "MSAA4_X\0" /* 44768 */
+ "MSAA4_Y\0" /* 44776 */
+ "MSAA8_X\0" /* 44784 */
+ "MSAA8_Y\0" /* 44792 */
+ "MSAA16_X\0" /* 44800 */
+ "MSAA16_Y\0" /* 44809 */
+ "NUM_PIPES\0" /* 44818 */
+ "PIPE_INTERLEAVE_SIZE_GFX6\0" /* 44828 */
+ "BANK_INTERLEAVE_SIZE\0" /* 44854 */
+ "NUM_SHADER_ENGINES_GFX6\0" /* 44875 */
+ "SHADER_ENGINE_TILE_SIZE\0" /* 44899 */
+ "NUM_GPUS_GFX6\0" /* 44923 */
+ "MULTI_GPU_TILE_SIZE\0" /* 44937 */
+ "ROW_SIZE\0" /* 44957 */
+ "NUM_LOWER_PIPES\0" /* 44966 */
+ "ADDR_SURF_DISPLAY_MICRO_TILING\0" /* 44982 */
+ "ADDR_SURF_THIN_MICRO_TILING\0" /* 45013 */
+ "ADDR_SURF_DEPTH_MICRO_TILING\0" /* 45041 */
+ "ADDR_SURF_THICK_MICRO_TILING\0" /* 45070 */
+ "MICRO_TILE_MODE\0" /* 45099 */
+ "ARRAY_LINEAR_GENERAL\0" /* 45121, 45115 */
+ "ARRAY_LINEAR_ALIGNED\0" /* 45136 */
+ "ARRAY_1D_TILED_THIN1\0" /* 45157 */
+ "ARRAY_1D_TILED_THICK\0" /* 45178 */
+ "ARRAY_2D_TILED_THIN1\0" /* 45199 */
+ "ARRAY_2D_TILED_THICK\0" /* 45220 */
+ "ARRAY_2D_TILED_XTHICK\0" /* 45241 */
+ "ARRAY_3D_TILED_THIN1\0" /* 45263 */
+ "ARRAY_3D_TILED_THICK\0" /* 45284 */
+ "ARRAY_3D_TILED_XTHICK\0" /* 45305 */
+ "ARRAY_POWER_SAVE\0" /* 45327 */
+ "ARRAY_MODE\0" /* 45344 */
+ "ADDR_SURF_P2\0" /* 45355 */
+ "ADDR_SURF_P2_RESERVED0\0" /* 45368 */
+ "ADDR_SURF_P2_RESERVED1\0" /* 45391 */
+ "ADDR_SURF_P2_RESERVED2\0" /* 45427, 45414 */
+ "X_ADDR_SURF_P4_8X16\0" /* 45437 */
+ "X_ADDR_SURF_P4_16X16\0" /* 45457 */
+ "X_ADDR_SURF_P4_16X32\0" /* 45478 */
+ "X_ADDR_SURF_P4_32X32\0" /* 45499 */
+ "X_ADDR_SURF_P8_16X16_8X16\0" /* 45520 */
+ "X_ADDR_SURF_P8_16X32_8X16\0" /* 45546 */
+ "X_ADDR_SURF_P8_32X32_8X16\0" /* 45572 */
+ "X_ADDR_SURF_P8_16X32_16X16\0" /* 45598 */
+ "X_ADDR_SURF_P8_32X32_16X16\0" /* 45625 */
+ "X_ADDR_SURF_P8_32X32_16X32\0" /* 45652 */
+ "X_ADDR_SURF_P8_32X64_32X32\0" /* 45679 */
+ "PIPE_CONFIG\0" /* 45706 */
+ "ADDR_SURF_TILE_SPLIT_64B\0" /* 45718 */
+ "ADDR_SURF_TILE_SPLIT_128B\0" /* 45743 */
+ "ADDR_SURF_TILE_SPLIT_256B\0" /* 45769 */
+ "ADDR_SURF_TILE_SPLIT_512B\0" /* 45795 */
+ "ADDR_SURF_TILE_SPLIT_1KB\0" /* 45821 */
+ "ADDR_SURF_TILE_SPLIT_2KB\0" /* 45846 */
+ "ADDR_SURF_TILE_SPLIT_4KB\0" /* 45871 */
+ "TILE_SPLIT\0" /* 45896 */
+ "ADDR_SURF_BANK_WIDTH_1\0" /* 45907 */
+ "ADDR_SURF_BANK_WIDTH_2\0" /* 45930 */
+ "ADDR_SURF_BANK_WIDTH_4\0" /* 45953 */
+ "ADDR_SURF_BANK_WIDTH_8\0" /* 45976 */
+ "BANK_WIDTH\0" /* 45999 */
+ "ADDR_SURF_BANK_HEIGHT_1\0" /* 46010 */
+ "ADDR_SURF_BANK_HEIGHT_2\0" /* 46034 */
+ "ADDR_SURF_BANK_HEIGHT_4\0" /* 46058 */
+ "ADDR_SURF_BANK_HEIGHT_8\0" /* 46082 */
+ "BANK_HEIGHT\0" /* 46106 */
+ "ADDR_SURF_MACRO_ASPECT_1\0" /* 46118 */
+ "ADDR_SURF_MACRO_ASPECT_2\0" /* 46143 */
+ "ADDR_SURF_MACRO_ASPECT_4\0" /* 46168 */
+ "ADDR_SURF_MACRO_ASPECT_8\0" /* 46193 */
+ "MACRO_TILE_ASPECT\0" /* 46218 */
+ "ADDR_SURF_2_BANK\0" /* 46236 */
+ "ADDR_SURF_4_BANK\0" /* 46253 */
+ "ADDR_SURF_8_BANK\0" /* 46270 */
+ "ADDR_SURF_16_BANK\0" /* 46287 */
+ "NUM_BANKS\0" /* 46305 */
+ "ADDR_SURF_ROTATED_MICRO_TILING\0" /* 46315 */
+ "MICRO_TILE_MODE_NEW\0" /* 46346 */
+ "SAMPLE_SPLIT\0" /* 46366 */
+ "MEM_BASE\0" /* 46379 */
+ "WAVE_LIMIT\0" /* 46388 */
+ "LOCK_LOW_THRESHOLD\0" /* 46399 */
+ "VGPRS\0" /* 46418 */
+ "SGPRS\0" /* 46424 */
+ "FP_32_DENORMS\0" /* 46430 */
+ "FP_64_DENORMS\0" /* 46444 */
+ "FP_ALL_DENORMS\0" /* 46458 */
+ "FLOAT_MODE\0" /* 46473 */
+ "PRIV\0" /* 46484 */
+ "DX10_CLAMP\0" /* 46489 */
+ "DEBUG_MODE\0" /* 46500 */
+ "IEEE_MODE\0" /* 46511 */
+ "CU_GROUP_DISABLE\0" /* 46521 */
+ "CACHE_CTL\0" /* 46538 */
+ "CDBG_USER\0" /* 46548 */
+ "SCRATCH_EN\0" /* 46558 */
+ "USER_SGPR\0" /* 46569 */
+ "TRAP_PRESENT\0" /* 46579 */
+ "WAVE_CNT_EN\0" /* 46592 */
+ "EXTRA_LDS_SIZE\0" /* 46610, 46604 */
+ "EXCP_EN_SI\0" /* 46619 */
+ "EXCP_EN\0" /* 46630 */
+ "VGPR_COMP_CNT\0" /* 46638 */
+ "CU_GROUP_ENABLE\0" /* 46652 */
+ "OC_LDS_EN\0" /* 46668 */
+ "SO_BASE0_EN\0" /* 46678 */
+ "SO_BASE1_EN\0" /* 46690 */
+ "SO_BASE2_EN\0" /* 46702 */
+ "SO_BASE3_EN\0" /* 46714 */
+ "SO_EN\0" /* 46726 */
+ "DISPATCH_DRAW_EN\0" /* 46732 */
+ "GROUP_FIFO_DEPTH\0" /* 46749 */
+ "TG_SIZE_EN\0" /* 46766 */
+ "EXCP_EN_CIK_VI\0" /* 46777 */
+ "COMPUTE_SHADER_EN\0" /* 46792 */
+ "PARTIAL_TG_EN\0" /* 46810 */
+ "FORCE_START_AT_000\0" /* 46824 */
+ "ORDERED_APPEND_ENBL\0" /* 46843 */
+ "ORDERED_APPEND_MODE\0" /* 46863 */
+ "USE_THREAD_DIMENSIONS\0" /* 46883 */
+ "ORDER_MODE\0" /* 46905 */
+ "DISPATCH_CACHE_CNTL\0" /* 46916 */
+ "SCALAR_L1_INV_VOL\0" /* 46936, 46950 */
+ "VECTOR_L1_INV_VOL\0" /* 46954 */
+ "DATA_ATC\0" /* 46972 */
+ "RESTORE\0" /* 46981 */
+ "NUM_THREAD_FULL\0" /* 46989 */
+ "NUM_THREAD_PARTIAL\0" /* 47005 */
+ "INST_ATC\0" /* 47024 */
+ "BULKY\0" /* 47033 */
+ "TGID_X_EN\0" /* 47039 */
+ "TGID_Y_EN\0" /* 47049 */
+ "TGID_Z_EN\0" /* 47059 */
+ "TIDIG_COMP_CNT\0" /* 47069 */
+ "EXCP_EN_MSB\0" /* 47084 */
+ "WAVES_PER_SH_SI\0" /* 47096 */
+ "WAVES_PER_SH\0" /* 47112 */
+ "TG_PER_CU\0" /* 47125 */
+ "LOCK_THRESHOLD\0" /* 47135 */
+ "SIMD_DEST_CNTL\0" /* 47150 */
+ "FORCE_SIMD_DIST\0" /* 47165 */
+ "CU_GROUP_COUNT\0" /* 47181 */
+ "SH0_CU_EN\0" /* 47196 */
+ "SH1_CU_EN\0" /* 47206 */
+ "WAVES\0" /* 47216 */
+ "WAVESIZE\0" /* 47222 */
+ "SEND_SEID\0" /* 47231 */
+ "RESERVED3\0" /* 47241 */
+ "RESERVED4\0" /* 47251 */
+ "WAVE_ID_BASE\0" /* 47261 */
+ "PAYLOAD\0" /* 47274 */
+ "IS_EVENT\0" /* 47282 */
+ "IS_STATE\0" /* 47291 */
+ "PERF_SEL\0" /* 47300 */
+ "PERF_SEL1\0" /* 47309 */
+ "CNTR_MODE\0" /* 47319 */
+ "PERF_SEL2\0" /* 47329 */
+ "PERF_SEL3\0" /* 47339 */
+ "DISABLE_AND_RESET\0" /* 47349 */
+ "START_COUNTING\0" /* 47367 */
+ "STOP_COUNTING\0" /* 47382 */
+ "PERFMON_STATE\0" /* 47396 */
+ "SPM_PERFMON_STATE\0" /* 47410 */
+ "PERFMON_ENABLE_MODE\0" /* 47428 */
+ "PERFMON_SAMPLE_ENABLE\0" /* 47448 */
+ "DB_CLEAN_USER_DEFINED_MASK\0" /* 47470 */
+ "CB_CLEAN_USER_DEFINED_MASK\0" /* 47497 */
+ "VGT_BUSY_USER_DEFINED_MASK\0" /* 47524 */
+ "TA_BUSY_USER_DEFINED_MASK\0" /* 47551 */
+ "SX_BUSY_USER_DEFINED_MASK\0" /* 47577 */
+ "SPI_BUSY_USER_DEFINED_MASK\0" /* 47603 */
+ "SC_BUSY_USER_DEFINED_MASK\0" /* 47630 */
+ "PA_BUSY_USER_DEFINED_MASK\0" /* 47656 */
+ "GRBM_BUSY_USER_DEFINED_MASK\0" /* 47682 */
+ "DB_BUSY_USER_DEFINED_MASK\0" /* 47710 */
+ "CB_BUSY_USER_DEFINED_MASK\0" /* 47736 */
+ "CP_BUSY_USER_DEFINED_MASK\0" /* 47762 */
+ "IA_BUSY_USER_DEFINED_MASK\0" /* 47788 */
+ "GDS_BUSY_USER_DEFINED_MASK\0" /* 47814 */
+ "BCI_BUSY_USER_DEFINED_MASK\0" /* 47841 */
+ "RLC_BUSY_USER_DEFINED_MASK\0" /* 47868 */
+ "TC_BUSY_USER_DEFINED_MASK\0" /* 47895 */
+ "WD_BUSY_USER_DEFINED_MASK\0" /* 47921 */
+ "PERF_MODE\0" /* 47947 */
+ "PERF_MODE1\0" /* 47957 */
+ "PERF_MODE3\0" /* 47968 */
+ "PERF_MODE2\0" /* 47979 */
+ "PERF_SEID_IGNORE_MASK\0" /* 47990 */
+ "BIN0_MIN\0" /* 48012 */
+ "BIN0_MAX\0" /* 48021 */
+ "BIN1_MIN\0" /* 48030 */
+ "BIN1_MAX\0" /* 48039 */
+ "BIN2_MIN\0" /* 48048 */
+ "BIN2_MAX\0" /* 48057 */
+ "BIN3_MIN\0" /* 48066 */
+ "BIN3_MAX\0" /* 48075 */
+ "SQC_BANK_MASK\0" /* 48084 */
+ "SQC_CLIENT_MASK\0" /* 48098 */
+ "SPM_MODE\0" /* 48114 */
+ "SIMD_MASK\0" /* 48123 */
+ "PS_EN\0" /* 48133 */
+ "VS_EN\0" /* 48139 */
+ "GS_EN\0" /* 48145 */
+ "HS_EN\0" /* 48151 */
+ "LS_EN\0" /* 48157 */
+ "CS_EN\0" /* 48163 */
+ "CNTR_RATE\0" /* 48169 */
+ "DISABLE_FLUSH\0" /* 48179 */
+ "SH0_MASK\0" /* 48193 */
+ "SH1_MASK\0" /* 48202 */
+ "FORCE_EN\0" /* 48211 */
+ "PERFCOUNTER_SELECT1\0" /* 48220 */
+ "PERFCOUNTER_SELECT2\0" /* 48240 */
+ "PERFCOUNTER_SELECT3\0" /* 48260 */
+ "OP_FILTER_ENABLE\0" /* 48280 */
+ "OP_FILTER_SEL\0" /* 48297 */
+ "FORMAT_FILTER_ENABLE\0" /* 48311 */
+ "FORMAT_FILTER_SEL\0" /* 48332 */
+ "CLEAR_FILTER_ENABLE\0" /* 48350 */
+ "CLEAR_FILTER_SEL\0" /* 48370 */
+ "MRT_FILTER_ENABLE\0" /* 48387 */
+ "MRT_FILTER_SEL\0" /* 48405 */
+ "NUM_SAMPLES_FILTER_ENABLE\0" /* 48420 */
+ "NUM_SAMPLES_FILTER_SEL\0" /* 48446 */
+ "NUM_FRAGMENTS_FILTER_ENABLE\0" /* 48469 */
+ "NUM_FRAGMENTS_FILTER_SEL\0" /* 48497 */
+ "DEPTH_CLEAR_ENABLE\0" /* 48522 */
+ "STENCIL_CLEAR_ENABLE\0" /* 48541 */
+ "DEPTH_COPY\0" /* 48562 */
+ "STENCIL_COPY\0" /* 48573 */
+ "RESUMMARIZE_ENABLE\0" /* 48586 */
+ "STENCIL_COMPRESS_DISABLE\0" /* 48605 */
+ "DEPTH_COMPRESS_DISABLE\0" /* 48630 */
+ "COPY_CENTROID\0" /* 48653 */
+ "COPY_SAMPLE\0" /* 48667 */
+ "DECOMPRESS_ENABLE\0" /* 48679 */
+ "ZPASS_INCREMENT_DISABLE\0" /* 48697 */
+ "PERFECT_ZPASS_COUNTS\0" /* 48721 */
+ "SAMPLE_RATE\0" /* 48742 */
+ "ZPASS_ENABLE\0" /* 48754 */
+ "ZFAIL_ENABLE\0" /* 48767 */
+ "SFAIL_ENABLE\0" /* 48780 */
+ "DBFAIL_ENABLE\0" /* 48793 */
+ "SLICE_EVEN_ENABLE\0" /* 48807 */
+ "SLICE_ODD_ENABLE\0" /* 48825 */
+ "SLICE_START\0" /* 48842 */
+ "SLICE_MAX\0" /* 48854 */
+ "Z_READ_ONLY\0" /* 48864 */
+ "STENCIL_READ_ONLY\0" /* 48876 */
+ "FORCE_OFF\0" /* 48894 */
+ "FORCE_ENABLE\0" /* 48904 */
+ "FORCE_DISABLE\0" /* 48917 */
+ "FORCE_RESERVED\0" /* 48931 */
+ "FORCE_HIZ_ENABLE\0" /* 48946, 48954 */
+ "FORCE_HIS_ENABLE0\0" /* 48963, 48973 */
+ "FORCE_HIS_ENABLE1\0" /* 48981, 48991 */
+ "FORCE_SHADER_Z_ORDER\0" /* 49012, 48999 */
+ "FAST_Z_DISABLE\0" /* 49020 */
+ "FAST_STENCIL_DISABLE\0" /* 49035 */
+ "NOOP_CULL_DISABLE\0" /* 49056 */
+ "FORCE_COLOR_KILL\0" /* 49074 */
+ "FORCE_Z_READ\0" /* 49091 */
+ "FORCE_STENCIL_READ\0" /* 49104 */
+ "FORCE_FULL_Z_RANGE\0" /* 49123 */
+ "FORCE_QC_SMASK_CONFLICT\0" /* 49142 */
+ "DISABLE_VIEWPORT_CLAMP\0" /* 49166 */
+ "IGNORE_SC_ZRANGE\0" /* 49189 */
+ "DISABLE_FULLY_COVERED\0" /* 49206 */
+ "FORCE_SUMM_OFF\0" /* 49228 */
+ "FORCE_SUMM_MINZ\0" /* 49243 */
+ "FORCE_SUMM_MAXZ\0" /* 49259 */
+ "FORCE_SUMM_BOTH\0" /* 49275 */
+ "FORCE_Z_LIMIT_SUMM\0" /* 49291 */
+ "MAX_TILES_IN_DTT\0" /* 49310 */
+ "DISABLE_TILE_RATE_TILES\0" /* 49327 */
+ "FORCE_Z_DIRTY\0" /* 49359, 49351 */
+ "FORCE_STENCIL_DIRTY\0" /* 49365 */
+ "FORCE_Z_VALID\0" /* 49385 */
+ "FORCE_STENCIL_VALID\0" /* 49399 */
+ "PRESERVE_COMPRESSION\0" /* 49419, 49428 */
+ "PSLC_AUTO\0" /* 49440 */
+ "PSLC_ON_HANG_ONLY\0" /* 49450 */
+ "PSLC_ASAP\0" /* 49468 */
+ "PSLC_COUNTDOWN\0" /* 49478 */
+ "PARTIAL_SQUAD_LAUNCH_CONTROL\0" /* 49493 */
+ "PARTIAL_SQUAD_LAUNCH_COUNTDOWN\0" /* 49522 */
+ "DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION\0" /* 49553 */
+ "DISABLE_SMEM_EXPCLEAR_OPTIMIZATION\0" /* 49589 */
+ "DISABLE_COLOR_ON_VALIDATION\0" /* 49624 */
+ "DECOMPRESS_Z_ON_FLUSH\0" /* 49652 */
+ "DISABLE_REG_SNOOP\0" /* 49674 */
+ "DEPTH_BOUNDS_HIER_DEPTH_DISABLE\0" /* 49692 */
+ "SEPARATE_HIZS_FUNC_ENABLE\0" /* 49724 */
+ "HIZ_ZFUNC\0" /* 49754, 49750 */
+ "HIS_SFUNC_FF\0" /* 49760 */
+ "HIS_SFUNC_BF\0" /* 49773 */
+ "PRESERVE_ZRANGE\0" /* 49786 */
+ "PRESERVE_SRESULTS\0" /* 49802 */
+ "DISABLE_FAST_PASS\0" /* 49820 */
+ "TL_X\0" /* 49838 */
+ "TL_Y\0" /* 49843 */
+ "BR_X\0" /* 49848 */
+ "BR_Y\0" /* 49853 */
+ "ADDR5_SWIZZLE_MASK\0" /* 49858 */
+ "ARRAY_PRT_TILED_THIN1\0" /* 49877 */
+ "ARRAY_PRT_2D_TILED_THIN1\0" /* 49899 */
+ "X_ADDR_SURF_P16_32X32_8X16\0" /* 49924 */
+ "X_ADDR_SURF_P16_32X32_16X16\0" /* 49951 */
+ "Z_INVALID\0" /* 49979 */
+ "Z_16\0" /* 49989 */
+ "Z_24\0" /* 49994 */
+ "Z_32_FLOAT\0" /* 49999 */
+ "NUM_SAMPLES\0" /* 50010 */
+ "TILE_MODE_INDEX\0" /* 50022 */
+ "DECOMPRESS_ON_N_ZPLANES\0" /* 50038 */
+ "ALLOW_EXPCLEAR\0" /* 50062 */
+ "READ_SIZE\0" /* 50077 */
+ "TILE_SURFACE_ENABLE\0" /* 50087 */
+ "CLEAR_DISALLOWED\0" /* 50107 */
+ "ZRANGE_PRECISION\0" /* 50124 */
+ "STENCIL_INVALID\0" /* 50141 */
+ "STENCIL_8\0" /* 50157 */
+ "TILE_STENCIL_DISABLE\0" /* 50167 */
+ "PITCH_TILE_MAX\0" /* 50194, 50188 */
+ "HEIGHT_TILE_MAX\0" /* 50203 */
+ "SLICE_TILE_MAX\0" /* 50219 */
+ "WINDOW_X_OFFSET\0" /* 50234 */
+ "WINDOW_Y_OFFSET\0" /* 50257, 50250 */
+ "WINDOW_OFFSET_DISABLE\0" /* 50266 */
+ "CLIP_RULE\0" /* 50288 */
+ "ER_TRI\0" /* 50298 */
+ "ER_RECT\0" /* 50305 */
+ "ER_LINE_LR\0" /* 50313 */
+ "ER_LINE_RL\0" /* 50324 */
+ "ER_LINE_TB\0" /* 50335 */
+ "ER_LINE_BT\0" /* 50346 */
+ "HW_SCREEN_OFFSET_X\0" /* 50357 */
+ "HW_SCREEN_OFFSET_Y\0" /* 50376 */
+ "TARGET0_ENABLE\0" /* 50395 */
+ "TARGET1_ENABLE\0" /* 50410 */
+ "TARGET2_ENABLE\0" /* 50425 */
+ "TARGET3_ENABLE\0" /* 50440 */
+ "TARGET4_ENABLE\0" /* 50455 */
+ "TARGET5_ENABLE\0" /* 50470 */
+ "TARGET6_ENABLE\0" /* 50485 */
+ "TARGET7_ENABLE\0" /* 50500 */
+ "OUTPUT0_ENABLE\0" /* 50515 */
+ "OUTPUT1_ENABLE\0" /* 50530 */
+ "OUTPUT2_ENABLE\0" /* 50545 */
+ "OUTPUT3_ENABLE\0" /* 50560 */
+ "OUTPUT4_ENABLE\0" /* 50575 */
+ "OUTPUT5_ENABLE\0" /* 50590 */
+ "OUTPUT6_ENABLE\0" /* 50605 */
+ "OUTPUT7_ENABLE\0" /* 50620 */
+ "RASTER_CONFIG_RB_MAP_0\0" /* 50635 */
+ "RASTER_CONFIG_RB_MAP_1\0" /* 50658 */
+ "RASTER_CONFIG_RB_MAP_2\0" /* 50681 */
+ "RASTER_CONFIG_RB_MAP_3\0" /* 50704 */
+ "RB_MAP_PKR0\0" /* 50727 */
+ "RB_MAP_PKR1\0" /* 50739 */
+ "RASTER_CONFIG_RB_XSEL2_0\0" /* 50751 */
+ "RASTER_CONFIG_RB_XSEL2_1\0" /* 50776 */
+ "RASTER_CONFIG_RB_XSEL2_2\0" /* 50801 */
+ "RASTER_CONFIG_RB_XSEL2_3\0" /* 50826 */
+ "RB_XSEL2\0" /* 50851 */
+ "RB_XSEL\0" /* 50860 */
+ "RB_YSEL\0" /* 50868 */
+ "RASTER_CONFIG_PKR_MAP_0\0" /* 50876 */
+ "RASTER_CONFIG_PKR_MAP_1\0" /* 50900 */
+ "RASTER_CONFIG_PKR_MAP_2\0" /* 50924 */
+ "RASTER_CONFIG_PKR_MAP_3\0" /* 50948 */
+ "PKR_MAP\0" /* 50972 */
+ "RASTER_CONFIG_PKR_XSEL_0\0" /* 50980 */
+ "RASTER_CONFIG_PKR_XSEL_1\0" /* 51005 */
+ "RASTER_CONFIG_PKR_XSEL_2\0" /* 51030 */
+ "RASTER_CONFIG_PKR_XSEL_3\0" /* 51055 */
+ "PKR_XSEL\0" /* 51080 */
+ "RASTER_CONFIG_PKR_YSEL_0\0" /* 51089 */
+ "RASTER_CONFIG_PKR_YSEL_1\0" /* 51114 */
+ "RASTER_CONFIG_PKR_YSEL_2\0" /* 51139 */
+ "RASTER_CONFIG_PKR_YSEL_3\0" /* 51164 */
+ "PKR_YSEL\0" /* 51189 */
+ "RASTER_CONFIG_PKR_XSEL2_0\0" /* 51198 */
+ "RASTER_CONFIG_PKR_XSEL2_1\0" /* 51224 */
+ "RASTER_CONFIG_PKR_XSEL2_2\0" /* 51250 */
+ "RASTER_CONFIG_PKR_XSEL2_3\0" /* 51276 */
+ "PKR_XSEL2\0" /* 51302 */
+ "RASTER_CONFIG_SC_MAP_0\0" /* 51312 */
+ "RASTER_CONFIG_SC_MAP_1\0" /* 51335 */
+ "RASTER_CONFIG_SC_MAP_2\0" /* 51358 */
+ "RASTER_CONFIG_SC_MAP_3\0" /* 51381 */
+ "SC_MAP\0" /* 51404 */
+ "RASTER_CONFIG_SC_XSEL_8_WIDE_TILE\0" /* 51411 */
+ "RASTER_CONFIG_SC_XSEL_16_WIDE_TILE\0" /* 51445 */
+ "RASTER_CONFIG_SC_XSEL_32_WIDE_TILE\0" /* 51480 */
+ "RASTER_CONFIG_SC_XSEL_64_WIDE_TILE\0" /* 51515 */
+ "SC_XSEL\0" /* 51550 */
+ "RASTER_CONFIG_SC_YSEL_8_WIDE_TILE\0" /* 51558 */
+ "RASTER_CONFIG_SC_YSEL_16_WIDE_TILE\0" /* 51592 */
+ "RASTER_CONFIG_SC_YSEL_32_WIDE_TILE\0" /* 51627 */
+ "RASTER_CONFIG_SC_YSEL_64_WIDE_TILE\0" /* 51662 */
+ "SC_YSEL\0" /* 51697 */
+ "RASTER_CONFIG_SE_MAP_0\0" /* 51705 */
+ "RASTER_CONFIG_SE_MAP_1\0" /* 51728 */
+ "RASTER_CONFIG_SE_MAP_2\0" /* 51751 */
+ "RASTER_CONFIG_SE_MAP_3\0" /* 51774 */
+ "SE_MAP\0" /* 51797 */
+ "RASTER_CONFIG_SE_XSEL_8_WIDE_TILE\0" /* 51804 */
+ "RASTER_CONFIG_SE_XSEL_16_WIDE_TILE\0" /* 51838 */
+ "RASTER_CONFIG_SE_XSEL_32_WIDE_TILE\0" /* 51873 */
+ "RASTER_CONFIG_SE_XSEL_64_WIDE_TILE\0" /* 51908 */
+ "SE_XSEL_GFX6\0" /* 51943 */
+ "RASTER_CONFIG_SE_YSEL_8_WIDE_TILE\0" /* 51956 */
+ "RASTER_CONFIG_SE_YSEL_16_WIDE_TILE\0" /* 51990 */
+ "RASTER_CONFIG_SE_YSEL_32_WIDE_TILE\0" /* 52025 */
+ "RASTER_CONFIG_SE_YSEL_64_WIDE_TILE\0" /* 52060 */
+ "SE_YSEL_GFX6\0" /* 52095 */
+ "RASTER_CONFIG_SE_PAIR_MAP_0\0" /* 52108 */
+ "RASTER_CONFIG_SE_PAIR_MAP_1\0" /* 52136 */
+ "RASTER_CONFIG_SE_PAIR_MAP_2\0" /* 52164 */
+ "RASTER_CONFIG_SE_PAIR_MAP_3\0" /* 52192 */
+ "SE_PAIR_MAP\0" /* 52220 */
+ "RASTER_CONFIG_SE_PAIR_XSEL_8_WIDE_TILE\0" /* 52232 */
+ "RASTER_CONFIG_SE_PAIR_XSEL_16_WIDE_TILE\0" /* 52271 */
+ "RASTER_CONFIG_SE_PAIR_XSEL_32_WIDE_TILE\0" /* 52311 */
+ "RASTER_CONFIG_SE_PAIR_XSEL_64_WIDE_TILE\0" /* 52351 */
+ "SE_PAIR_XSEL_GFX6\0" /* 52391 */
+ "RASTER_CONFIG_SE_PAIR_YSEL_8_WIDE_TILE\0" /* 52409 */
+ "RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE\0" /* 52448 */
+ "RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE\0" /* 52488 */
+ "RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE\0" /* 52528 */
+ "SE_PAIR_YSEL_GFX6\0" /* 52568 */
+ "OVERWRITE_COMBINER_DISABLE\0" /* 52586 */
+ "OVERWRITE_COMBINER_MRT_SHARING_DISABLE\0" /* 52613 */
+ "OVERWRITE_COMBINER_WATERMARK\0" /* 52652 */
+ "STENCIL_KEEP\0" /* 52681 */
+ "STENCIL_ZERO\0" /* 52694 */
+ "STENCIL_ONES\0" /* 52707 */
+ "STENCIL_REPLACE_TEST\0" /* 52720 */
+ "STENCIL_REPLACE_OP\0" /* 52741 */
+ "STENCIL_ADD_CLAMP\0" /* 52760 */
+ "STENCIL_SUB_CLAMP\0" /* 52778 */
+ "STENCIL_INVERT\0" /* 52796 */
+ "STENCIL_ADD_WRAP\0" /* 52811 */
+ "STENCIL_SUB_WRAP\0" /* 52828 */
+ "STENCIL_AND\0" /* 52845 */
+ "STENCIL_OR\0" /* 52857 */
+ "STENCIL_XOR\0" /* 52868 */
+ "STENCIL_NAND\0" /* 52880 */
+ "STENCIL_NOR\0" /* 52893 */
+ "STENCIL_XNOR\0" /* 52905 */
+ "STENCILFAIL\0" /* 52918 */
+ "STENCILZPASS\0" /* 52930 */
+ "STENCILZFAIL\0" /* 52943 */
+ "STENCILFAIL_BF\0" /* 52956 */
+ "STENCILZPASS_BF\0" /* 52971 */
+ "STENCILZFAIL_BF\0" /* 52987 */
+ "STENCILTESTVAL\0" /* 53003 */
+ "STENCILMASK\0" /* 53018 */
+ "STENCILWRITEMASK\0" /* 53030 */
+ "STENCILOPVAL\0" /* 53047 */
+ "STENCILTESTVAL_BF\0" /* 53060 */
+ "STENCILMASK_BF\0" /* 53078 */
+ "STENCILWRITEMASK_BF\0" /* 53093 */
+ "STENCILOPVAL_BF\0" /* 53113 */
+ "X_0_0F\0" /* 53129 */
+ "DEFAULT_VAL\0" /* 53136 */
+ "FLAT_SHADE\0" /* 53148 */
+ "CYL_WRAP\0" /* 53159 */
+ "PT_SPRITE_TEX\0" /* 53168 */
+ "DUP\0" /* 53182 */
+ "FP16_INTERP_MODE\0" /* 53186 */
+ "USE_DEFAULT_ATTR1\0" /* 53203 */
+ "DEFAULT_VAL_ATTR1\0" /* 53221 */
+ "PT_SPRITE_TEX_ATTR1\0" /* 53239 */
+ "ATTR0_VALID\0" /* 53259 */
+ "ATTR1_VALID\0" /* 53271 */
+ "VS_EXPORT_COUNT\0" /* 53283 */
+ "VS_HALF_PACK\0" /* 53299 */
+ "VS_EXPORTS_FOG\0" /* 53312 */
+ "VS_OUT_FOG_VEC_ADDR\0" /* 53327 */
+ "PERSP_SAMPLE_ENA\0" /* 53347 */
+ "PERSP_CENTER_ENA\0" /* 53364 */
+ "PERSP_CENTROID_ENA\0" /* 53381 */
+ "PERSP_PULL_MODEL_ENA\0" /* 53400 */
+ "LINEAR_SAMPLE_ENA\0" /* 53421 */
+ "LINEAR_CENTER_ENA\0" /* 53439 */
+ "LINEAR_CENTROID_ENA\0" /* 53457 */
+ "LINE_STIPPLE_TEX_ENA\0" /* 53477 */
+ "POS_X_FLOAT_ENA\0" /* 53498 */
+ "POS_Y_FLOAT_ENA\0" /* 53514 */
+ "POS_Z_FLOAT_ENA\0" /* 53530 */
+ "POS_W_FLOAT_ENA\0" /* 53546 */
+ "FRONT_FACE_ENA\0" /* 53562 */
+ "ANCILLARY_ENA\0" /* 53577 */
+ "SAMPLE_COVERAGE_ENA\0" /* 53591 */
+ "POS_FIXED_PT_ENA\0" /* 53611 */
+ "FLAT_SHADE_ENA\0" /* 53628 */
+ "PNT_SPRITE_ENA\0" /* 53643 */
+ "SPI_PNT_SPRITE_SEL_0\0" /* 53658 */
+ "SPI_PNT_SPRITE_SEL_1\0" /* 53679 */
+ "SPI_PNT_SPRITE_SEL_S\0" /* 53700 */
+ "SPI_PNT_SPRITE_SEL_T\0" /* 53721 */
+ "SPI_PNT_SPRITE_SEL_NONE\0" /* 53742 */
+ "PNT_SPRITE_OVRD_X\0" /* 53766 */
+ "PNT_SPRITE_OVRD_Y\0" /* 53784 */
+ "PNT_SPRITE_OVRD_Z\0" /* 53802 */
+ "PNT_SPRITE_OVRD_W\0" /* 53820 */
+ "PNT_SPRITE_TOP_1\0" /* 53838 */
+ "NUM_INTERP\0" /* 53855 */
+ "PARAM_GEN\0" /* 53866 */
+ "FOG_ADDR\0" /* 53876 */
+ "BC_OPTIMIZE_DISABLE\0" /* 53885 */
+ "PASS_FOG_THROUGH_PS\0" /* 53905 */
+ "PERSP_CENTER_CNTL\0" /* 53925 */
+ "PERSP_CENTROID_CNTL\0" /* 53943 */
+ "LINEAR_CENTER_CNTL\0" /* 53963 */
+ "LINEAR_CENTROID_CNTL\0" /* 53982 */
+ "X_CALCULATE_PER_PIXEL_FLOATING_POINT_POSITION_AT\0" /* 54003 */
+ "POS_FLOAT_LOCATION\0" /* 54052 */
+ "POS_FLOAT_ULC\0" /* 54071 */
+ "FRONT_FACE_ALL_BITS\0" /* 54085 */
+ "NUM_PS_WAVES\0" /* 54105 */
+ "NUM_VS_WAVES\0" /* 54118 */
+ "NUM_GS_WAVES\0" /* 54131 */
+ "NUM_ES_WAVES\0" /* 54144 */
+ "NUM_HS_WAVES\0" /* 54157 */
+ "NUM_LS_WAVES\0" /* 54170 */
+ "SPI_SHADER_NONE\0" /* 54183 */
+ "SPI_SHADER_1COMP\0" /* 54199 */
+ "SPI_SHADER_2COMP\0" /* 54216 */
+ "SPI_SHADER_4COMPRESS\0" /* 54233 */
+ "SPI_SHADER_4COMP\0" /* 54254 */
+ "POS0_EXPORT_FORMAT\0" /* 54271 */
+ "POS1_EXPORT_FORMAT\0" /* 54290 */
+ "POS2_EXPORT_FORMAT\0" /* 54309 */
+ "POS3_EXPORT_FORMAT\0" /* 54328 */
+ "SPI_SHADER_ZERO\0" /* 54347 */
+ "SPI_SHADER_32_R\0" /* 54363 */
+ "SPI_SHADER_32_GR\0" /* 54379 */
+ "SPI_SHADER_32_AR\0" /* 54396 */
+ "SPI_SHADER_FP16_ABGR\0" /* 54413 */
+ "SPI_SHADER_UNORM16_ABGR\0" /* 54434 */
+ "SPI_SHADER_SNORM16_ABGR\0" /* 54458 */
+ "SPI_SHADER_UINT16_ABGR\0" /* 54482 */
+ "SPI_SHADER_SINT16_ABGR\0" /* 54505 */
+ "SPI_SHADER_32_ABGR\0" /* 54528 */
+ "Z_EXPORT_FORMAT\0" /* 54547 */
+ "COL0_EXPORT_FORMAT\0" /* 54563 */
+ "COL1_EXPORT_FORMAT\0" /* 54582 */
+ "COL2_EXPORT_FORMAT\0" /* 54601 */
+ "COL3_EXPORT_FORMAT\0" /* 54620 */
+ "COL4_EXPORT_FORMAT\0" /* 54639 */
+ "COL5_EXPORT_FORMAT\0" /* 54658 */
+ "COL6_EXPORT_FORMAT\0" /* 54677 */
+ "COL7_EXPORT_FORMAT\0" /* 54696 */
+ "SX_RT_EXPORT_NO_CONVERSION\0" /* 54715 */
+ "SX_RT_EXPORT_32_R\0" /* 54742 */
+ "SX_RT_EXPORT_32_A\0" /* 54760 */
+ "SX_RT_EXPORT_10_11_11\0" /* 54778 */
+ "SX_RT_EXPORT_2_10_10_10\0" /* 54800 */
+ "SX_RT_EXPORT_8_8_8_8\0" /* 54824 */
+ "SX_RT_EXPORT_5_6_5\0" /* 54845 */
+ "SX_RT_EXPORT_1_5_5_5\0" /* 54864 */
+ "SX_RT_EXPORT_4_4_4_4\0" /* 54885 */
+ "SX_RT_EXPORT_16_16_GR\0" /* 54906 */
+ "SX_RT_EXPORT_16_16_AR\0" /* 54928 */
+ "MRT0\0" /* 54950 */
+ "MRT1\0" /* 54955 */
+ "MRT2\0" /* 54960 */
+ "MRT3\0" /* 54965 */
+ "MRT4\0" /* 54970 */
+ "MRT5\0" /* 54975 */
+ "MRT6\0" /* 54980 */
+ "MRT7\0" /* 54985 */
+ "EXACT\0" /* 54990 */
+ "11BIT_FORMAT\0" /* 54996 */
+ "10BIT_FORMAT\0" /* 55009 */
+ "8BIT_FORMAT\0" /* 55022 */
+ "6BIT_FORMAT\0" /* 55034 */
+ "5BIT_FORMAT\0" /* 55046 */
+ "4BIT_FORMAT\0" /* 55058 */
+ "MRT0_EPSILON\0" /* 55070 */
+ "MRT1_EPSILON\0" /* 55083 */
+ "MRT2_EPSILON\0" /* 55096 */
+ "MRT3_EPSILON\0" /* 55109 */
+ "MRT4_EPSILON\0" /* 55122 */
+ "MRT5_EPSILON\0" /* 55135 */
+ "MRT6_EPSILON\0" /* 55148 */
+ "MRT7_EPSILON\0" /* 55161 */
+ "MRT0_COLOR_OPT_DISABLE\0" /* 55174 */
+ "MRT0_ALPHA_OPT_DISABLE\0" /* 55197 */
+ "MRT1_COLOR_OPT_DISABLE\0" /* 55220 */
+ "MRT1_ALPHA_OPT_DISABLE\0" /* 55243 */
+ "MRT2_COLOR_OPT_DISABLE\0" /* 55266 */
+ "MRT2_ALPHA_OPT_DISABLE\0" /* 55289 */
+ "MRT3_COLOR_OPT_DISABLE\0" /* 55312 */
+ "MRT3_ALPHA_OPT_DISABLE\0" /* 55335 */
+ "MRT4_COLOR_OPT_DISABLE\0" /* 55358 */
+ "MRT4_ALPHA_OPT_DISABLE\0" /* 55381 */
+ "MRT5_COLOR_OPT_DISABLE\0" /* 55404 */
+ "MRT5_ALPHA_OPT_DISABLE\0" /* 55427 */
+ "MRT6_COLOR_OPT_DISABLE\0" /* 55450 */
+ "MRT6_ALPHA_OPT_DISABLE\0" /* 55473 */
+ "MRT7_COLOR_OPT_DISABLE\0" /* 55496 */
+ "MRT7_ALPHA_OPT_DISABLE\0" /* 55519 */
+ "PIXEN_ZERO_OPT_DISABLE\0" /* 55542 */
+ "BLEND_OPT_PRESERVE_NONE_IGNORE_ALL\0" /* 55565 */
+ "BLEND_OPT_PRESERVE_ALL_IGNORE_NONE\0" /* 55600 */
+ "BLEND_OPT_PRESERVE_C1_IGNORE_C0\0" /* 55635 */
+ "BLEND_OPT_PRESERVE_C0_IGNORE_C1\0" /* 55667 */
+ "BLEND_OPT_PRESERVE_A1_IGNORE_A0\0" /* 55699 */
+ "BLEND_OPT_PRESERVE_A0_IGNORE_A1\0" /* 55731 */
+ "BLEND_OPT_PRESERVE_NONE_IGNORE_A0\0" /* 55763 */
+ "BLEND_OPT_PRESERVE_NONE_IGNORE_NONE\0" /* 55797 */
+ "COLOR_SRC_OPT\0" /* 55833 */
+ "COLOR_DST_OPT\0" /* 55847 */
+ "OPT_COMB_NONE\0" /* 55861 */
+ "OPT_COMB_ADD\0" /* 55875 */
+ "OPT_COMB_SUBTRACT\0" /* 55888 */
+ "OPT_COMB_MIN\0" /* 55906 */
+ "OPT_COMB_MAX\0" /* 55919 */
+ "OPT_COMB_REVSUBTRACT\0" /* 55932 */
+ "OPT_COMB_BLEND_DISABLED\0" /* 55953 */
+ "OPT_COMB_SAFE_ADD\0" /* 55977 */
+ "COLOR_COMB_FCN\0" /* 55995 */
+ "ALPHA_SRC_OPT\0" /* 56010 */
+ "ALPHA_DST_OPT\0" /* 56024 */
+ "ALPHA_COMB_FCN\0" /* 56038 */
+ "BLEND_ZERO\0" /* 56053 */
+ "BLEND_ONE\0" /* 56064 */
+ "BLEND_SRC_COLOR\0" /* 56074 */
+ "BLEND_ONE_MINUS_SRC_COLOR\0" /* 56090 */
+ "BLEND_SRC_ALPHA\0" /* 56116 */
+ "BLEND_ONE_MINUS_SRC_ALPHA\0" /* 56132 */
+ "BLEND_DST_ALPHA\0" /* 56158 */
+ "BLEND_ONE_MINUS_DST_ALPHA\0" /* 56174 */
+ "BLEND_DST_COLOR\0" /* 56200 */
+ "BLEND_ONE_MINUS_DST_COLOR\0" /* 56216 */
+ "BLEND_SRC_ALPHA_SATURATE\0" /* 56242 */
+ "BLEND_CONSTANT_COLOR\0" /* 56267 */
+ "BLEND_ONE_MINUS_CONSTANT_COLOR\0" /* 56288 */
+ "BLEND_SRC1_COLOR\0" /* 56319 */
+ "BLEND_INV_SRC1_COLOR\0" /* 56336 */
+ "BLEND_SRC1_ALPHA\0" /* 56357 */
+ "BLEND_INV_SRC1_ALPHA\0" /* 56374 */
+ "BLEND_CONSTANT_ALPHA\0" /* 56395 */
+ "BLEND_ONE_MINUS_CONSTANT_ALPHA\0" /* 56416 */
+ "COLOR_SRCBLEND\0" /* 56447 */
+ "COMB_DST_PLUS_SRC\0" /* 56462 */
+ "COMB_SRC_MINUS_DST\0" /* 56480 */
+ "COMB_MIN_DST_SRC\0" /* 56499 */
+ "COMB_MAX_DST_SRC\0" /* 56516 */
+ "COMB_DST_MINUS_SRC\0" /* 56533 */
+ "COLOR_DESTBLEND\0" /* 56552 */
+ "ALPHA_SRCBLEND\0" /* 56568 */
+ "ALPHA_DESTBLEND\0" /* 56583 */
+ "SEPARATE_ALPHA_BLEND\0" /* 56599 */
+ "DISABLE_ROP3\0" /* 56628, 56620 */
+ "SRC_STATE_ID\0" /* 56633 */
+ "BASE_ADDR_GFX6\0" /* 56646 */
+ "DI_SRC_SEL_DMA\0" /* 56661 */
+ "DI_SRC_SEL_IMMEDIATE\0" /* 56676 */
+ "DI_SRC_SEL_AUTO_INDEX\0" /* 56697 */
+ "DI_SRC_SEL_RESERVED\0" /* 56719 */
+ "SOURCE_SELECT\0" /* 56739 */
+ "DI_MAJOR_MODE_0\0" /* 56753 */
+ "DI_MAJOR_MODE_1\0" /* 56769 */
+ "MAJOR_MODE\0" /* 56785 */
+ "NOT_EOP\0" /* 56796 */
+ "USE_OPAQUE\0" /* 56804 */
+ "ADDRESS_LOW\0" /* 56815 */
+ "STENCIL_ENABLE\0" /* 56827 */
+ "Z_WRITE_ENABLE\0" /* 56842 */
+ "DEPTH_BOUNDS_ENABLE\0" /* 56857 */
+ "FRAG_NEVER\0" /* 56877 */
+ "FRAG_LESS\0" /* 56888 */
+ "FRAG_EQUAL\0" /* 56898 */
+ "FRAG_LEQUAL\0" /* 56909 */
+ "FRAG_GREATER\0" /* 56921 */
+ "FRAG_NOTEQUAL\0" /* 56934 */
+ "FRAG_GEQUAL\0" /* 56948 */
+ "FRAG_ALWAYS\0" /* 56960 */
+ "BACKFACE_ENABLE\0" /* 56972 */
+ "REF_NEVER\0" /* 56988 */
+ "REF_LESS\0" /* 56998 */
+ "REF_EQUAL\0" /* 57007 */
+ "REF_LEQUAL\0" /* 57017 */
+ "REF_GREATER\0" /* 57028 */
+ "REF_NOTEQUAL\0" /* 57040 */
+ "REF_GEQUAL\0" /* 57053 */
+ "REF_ALWAYS\0" /* 57064 */
+ "STENCILFUNC\0" /* 57075 */
+ "STENCILFUNC_BF\0" /* 57087 */
+ "ENABLE_COLOR_WRITES_ON_DEPTH_FAIL\0" /* 57102 */
+ "DISABLE_COLOR_WRITES_ON_DEPTH_PASS\0" /* 57136 */
+ "MAX_ANCHOR_SAMPLES\0" /* 57171 */
+ "PS_ITER_SAMPLES\0" /* 57190 */
+ "MASK_EXPORT_NUM_SAMPLES\0" /* 57206 */
+ "ALPHA_TO_MASK_NUM_SAMPLES\0" /* 57230 */
+ "HIGH_QUALITY_INTERSECTIONS\0" /* 57256 */
+ "INCOHERENT_EQAA_READS\0" /* 57283 */
+ "INTERPOLATE_COMP_Z\0" /* 57305 */
+ "INTERPOLATE_SRC_Z\0" /* 57324 */
+ "STATIC_ANCHOR_ASSOCIATIONS\0" /* 57342 */
+ "ALPHA_TO_MASK_EQAA_DISABLE\0" /* 57369 */
+ "OVERRASTERIZATION_AMOUNT\0" /* 57396 */
+ "ENABLE_POSTZ_OVERRASTERIZATION\0" /* 57421 */
+ "DISABLE_DUAL_QUAD\0" /* 57452 */
+ "DEGAMMA_ENABLE\0" /* 57470 */
+ "CB_DISABLE\0" /* 57485 */
+ "CB_NORMAL\0" /* 57496 */
+ "CB_ELIMINATE_FAST_CLEAR\0" /* 57506, 57519 */
+ "CB_RESOLVE\0" /* 57530 */
+ "CB_FMASK_DECOMPRESS\0" /* 57541 */
+ "CB_DCC_DECOMPRESS\0" /* 57561 */
+ "X_0X00\0" /* 57579 */
+ "X_0X05\0" /* 57586 */
+ "X_0X0A\0" /* 57593 */
+ "X_0X0F\0" /* 57600 */
+ "X_0X11\0" /* 57607 */
+ "X_0X22\0" /* 57614 */
+ "X_0X33\0" /* 57621 */
+ "X_0X44\0" /* 57628 */
+ "X_0X50\0" /* 57635 */
+ "X_0X55\0" /* 57642 */
+ "X_0X5A\0" /* 57649 */
+ "X_0X5F\0" /* 57656 */
+ "X_0X66\0" /* 57663 */
+ "X_0X77\0" /* 57670 */
+ "X_0X88\0" /* 57677 */
+ "X_0X99\0" /* 57684 */
+ "X_0XA0\0" /* 57691 */
+ "X_0XA5\0" /* 57698 */
+ "X_0XAA\0" /* 57705 */
+ "X_0XAF\0" /* 57712 */
+ "X_0XBB\0" /* 57719 */
+ "X_0XCC\0" /* 57726 */
+ "X_0XDD\0" /* 57733 */
+ "X_0XEE\0" /* 57740 */
+ "X_0XF0\0" /* 57747 */
+ "X_0XF5\0" /* 57754 */
+ "X_0XFA\0" /* 57761 */
+ "X_0XFF\0" /* 57768 */
+ "Z_EXPORT_ENABLE\0" /* 57775 */
+ "STENCIL_TEST_VAL_EXPORT_ENABLE\0" /* 57791 */
+ "STENCIL_OP_VAL_EXPORT_ENABLE\0" /* 57822 */
+ "LATE_Z\0" /* 57851 */
+ "EARLY_Z_THEN_LATE_Z\0" /* 57858 */
+ "RE_Z\0" /* 57878 */
+ "EARLY_Z_THEN_RE_Z\0" /* 57883 */
+ "KILL_ENABLE\0" /* 57901 */
+ "COVERAGE_TO_MASK_ENABLE\0" /* 57913 */
+ "MASK_EXPORT_ENABLE\0" /* 57937 */
+ "EXEC_ON_HIER_FAIL\0" /* 57956 */
+ "EXEC_ON_NOOP\0" /* 57974 */
+ "ALPHA_TO_MASK_DISABLE\0" /* 57987 */
+ "DEPTH_BEFORE_SHADER\0" /* 58009 */
+ "EXPORT_ANY_Z\0" /* 58029 */
+ "EXPORT_LESS_THAN_Z\0" /* 58042 */
+ "EXPORT_GREATER_THAN_Z\0" /* 58061 */
+ "EXPORT_RESERVED\0" /* 58083 */
+ "CONSERVATIVE_Z_EXPORT\0" /* 58099 */
+ "DUAL_QUAD_DISABLE\0" /* 58121 */
+ "UCP_ENA_0\0" /* 58139 */
+ "UCP_ENA_1\0" /* 58149 */
+ "UCP_ENA_2\0" /* 58159 */
+ "UCP_ENA_3\0" /* 58169 */
+ "UCP_ENA_4\0" /* 58179 */
+ "UCP_ENA_5\0" /* 58189 */
+ "PS_UCP_Y_SCALE_NEG\0" /* 58199 */
+ "PS_UCP_MODE\0" /* 58218 */
+ "CLIP_DISABLE\0" /* 58230 */
+ "UCP_CULL_ONLY_ENA\0" /* 58243 */
+ "BOUNDARY_EDGE_FLAG_ENA\0" /* 58261 */
+ "DX_CLIP_SPACE_DEF\0" /* 58284 */
+ "DIS_CLIP_ERR_DETECT\0" /* 58302 */
+ "VTX_KILL_OR\0" /* 58322 */
+ "DX_RASTERIZATION_KILL\0" /* 58334 */
+ "DX_LINEAR_ATTR_CLIP_ENA\0" /* 58356 */
+ "VTE_VPORT_PROVOKE_DISABLE\0" /* 58380 */
+ "ZCLIP_NEAR_DISABLE\0" /* 58406 */
+ "ZCLIP_FAR_DISABLE\0" /* 58425 */
+ "CULL_FRONT\0" /* 58443 */
+ "CULL_BACK\0" /* 58454 */
+ "X_DISABLE_POLY_MODE\0" /* 58464, 58474 */
+ "X_DUAL_MODE\0" /* 58484 */
+ "X_DRAW_POINTS\0" /* 58496 */
+ "X_DRAW_LINES\0" /* 58510 */
+ "X_DRAW_TRIANGLES\0" /* 58523 */
+ "POLYMODE_FRONT_PTYPE\0" /* 58540 */
+ "POLYMODE_BACK_PTYPE\0" /* 58561 */
+ "POLY_OFFSET_FRONT_ENABLE\0" /* 58581 */
+ "POLY_OFFSET_BACK_ENABLE\0" /* 58606 */
+ "POLY_OFFSET_PARA_ENABLE\0" /* 58630 */
+ "VTX_WINDOW_OFFSET_ENABLE\0" /* 58654 */
+ "PROVOKING_VTX_LAST\0" /* 58679 */
+ "PERSP_CORR_DIS\0" /* 58698 */
+ "MULTI_PRIM_IB_ENA\0" /* 58713 */
+ "VPORT_X_SCALE_ENA\0" /* 58731 */
+ "VPORT_X_OFFSET_ENA\0" /* 58749 */
+ "VPORT_Y_SCALE_ENA\0" /* 58768 */
+ "VPORT_Y_OFFSET_ENA\0" /* 58786 */
+ "VPORT_Z_SCALE_ENA\0" /* 58805 */
+ "VPORT_Z_OFFSET_ENA\0" /* 58823 */
+ "VTX_XY_FMT\0" /* 58842 */
+ "VTX_Z_FMT\0" /* 58853 */
+ "VTX_W0_FMT\0" /* 58863 */
+ "CLIP_DIST_ENA_0\0" /* 58874 */
+ "CLIP_DIST_ENA_1\0" /* 58890 */
+ "CLIP_DIST_ENA_2\0" /* 58906 */
+ "CLIP_DIST_ENA_3\0" /* 58922 */
+ "CLIP_DIST_ENA_4\0" /* 58938 */
+ "CLIP_DIST_ENA_5\0" /* 58954 */
+ "CLIP_DIST_ENA_6\0" /* 58970 */
+ "CLIP_DIST_ENA_7\0" /* 58986 */
+ "CULL_DIST_ENA_0\0" /* 59002 */
+ "CULL_DIST_ENA_1\0" /* 59018 */
+ "CULL_DIST_ENA_2\0" /* 59034 */
+ "CULL_DIST_ENA_3\0" /* 59050 */
+ "CULL_DIST_ENA_4\0" /* 59066 */
+ "CULL_DIST_ENA_5\0" /* 59082 */
+ "CULL_DIST_ENA_6\0" /* 59098 */
+ "CULL_DIST_ENA_7\0" /* 59114 */
+ "USE_VTX_POINT_SIZE\0" /* 59130 */
+ "USE_VTX_EDGE_FLAG\0" /* 59149 */
+ "USE_VTX_RENDER_TARGET_INDX\0" /* 59167 */
+ "USE_VTX_VIEWPORT_INDX\0" /* 59194 */
+ "USE_VTX_KILL_FLAG\0" /* 59216 */
+ "VS_OUT_MISC_VEC_ENA\0" /* 59234 */
+ "VS_OUT_CCDIST0_VEC_ENA\0" /* 59254 */
+ "VS_OUT_CCDIST1_VEC_ENA\0" /* 59277 */
+ "VS_OUT_MISC_SIDE_BUS_ENA\0" /* 59300 */
+ "USE_VTX_GS_CUT_FLAG\0" /* 59325 */
+ "USE_VTX_LINE_WIDTH\0" /* 59345 */
+ "VTE_XY_INF_DISCARD\0" /* 59364 */
+ "VTE_Z_INF_DISCARD\0" /* 59383 */
+ "VTE_W_INF_DISCARD\0" /* 59401 */
+ "VTE_0XNANINF_IS_0\0" /* 59419 */
+ "VTE_XY_NAN_RETAIN\0" /* 59437 */
+ "VTE_Z_NAN_RETAIN\0" /* 59455 */
+ "VTE_W_NAN_RETAIN\0" /* 59472 */
+ "VTE_W_RECIP_NAN_IS_0\0" /* 59489 */
+ "VS_XY_NAN_TO_INF\0" /* 59510 */
+ "VS_XY_INF_RETAIN\0" /* 59527 */
+ "VS_Z_NAN_TO_INF\0" /* 59544 */
+ "VS_Z_INF_RETAIN\0" /* 59560 */
+ "VS_W_NAN_TO_INF\0" /* 59576 */
+ "VS_W_INF_RETAIN\0" /* 59592 */
+ "VS_CLIP_DIST_INF_DISCARD\0" /* 59608 */
+ "VTE_NO_OUTPUT_NEG_0\0" /* 59633 */
+ "LINE_STIPPLE_RESET\0" /* 59653 */
+ "EXPAND_FULL_LENGTH\0" /* 59672 */
+ "FRACTIONAL_ACCUM\0" /* 59691 */
+ "DIAMOND_ADJUST\0" /* 59708 */
+ "TRIANGLE_FILTER_DISABLE\0" /* 59723 */
+ "LINE_FILTER_DISABLE\0" /* 59747 */
+ "POINT_FILTER_DISABLE\0" /* 59767 */
+ "RECTANGLE_FILTER_DISABLE\0" /* 59788 */
+ "TRIANGLE_EXPAND_ENA\0" /* 59813 */
+ "LINE_EXPAND_ENA\0" /* 59833 */
+ "POINT_EXPAND_ENA\0" /* 59849 */
+ "RECTANGLE_EXPAND_ENA\0" /* 59866 */
+ "PRIM_EXPAND_CONSTANT\0" /* 59887 */
+ "XMAX_RIGHT_EXCLUSION\0" /* 59908 */
+ "YMAX_BOTTOM_EXCLUSION\0" /* 59929 */
+ "SMALL_PRIM_FILTER_ENABLE\0" /* 59951 */
+ "MIN_SIZE\0" /* 59976 */
+ "LINE_PATTERN\0" /* 59985 */
+ "REPEAT_COUNT\0" /* 59998 */
+ "PATTERN_BIT_ORDER\0" /* 60011 */
+ "AUTO_RESET_CNTL\0" /* 60029 */
+ "VGT_OUTPATH_VTX_REUSE\0" /* 60045 */
+ "VGT_OUTPATH_TESS_EN\0" /* 60067 */
+ "VGT_OUTPATH_PASSTHRU\0" /* 60087 */
+ "VGT_OUTPATH_GS_BLOCK\0" /* 60108 */
+ "VGT_OUTPATH_HS_BLOCK\0" /* 60129 */
+ "PATH_SELECT\0" /* 60150 */
+ "TESS_MODE\0" /* 60162 */
+ "VGT_GRP_3D_POINT\0" /* 60172 */
+ "VGT_GRP_3D_LINE\0" /* 60189 */
+ "VGT_GRP_3D_TRI\0" /* 60205 */
+ "VGT_GRP_3D_RECT\0" /* 60220 */
+ "VGT_GRP_3D_QUAD\0" /* 60236 */
+ "VGT_GRP_2D_COPY_RECT_V0\0" /* 60252 */
+ "VGT_GRP_2D_COPY_RECT_V1\0" /* 60276 */
+ "VGT_GRP_2D_COPY_RECT_V2\0" /* 60300 */
+ "VGT_GRP_2D_COPY_RECT_V3\0" /* 60324 */
+ "VGT_GRP_2D_FILL_RECT\0" /* 60348 */
+ "VGT_GRP_2D_LINE\0" /* 60369 */
+ "VGT_GRP_2D_TRI\0" /* 60385 */
+ "VGT_GRP_PRIM_INDEX_LINE\0" /* 60400 */
+ "VGT_GRP_PRIM_INDEX_TRI\0" /* 60424 */
+ "VGT_GRP_PRIM_INDEX_QUAD\0" /* 60447 */
+ "VGT_GRP_3D_LINE_ADJ\0" /* 60471 */
+ "VGT_GRP_3D_TRI_ADJ\0" /* 60491 */
+ "VGT_GRP_3D_PATCH\0" /* 60510 */
+ "RETAIN_ORDER\0" /* 60527 */
+ "RETAIN_QUADS\0" /* 60540 */
+ "VGT_GRP_LIST\0" /* 60553 */
+ "VGT_GRP_STRIP\0" /* 60566 */
+ "VGT_GRP_FAN\0" /* 60580 */
+ "VGT_GRP_LOOP\0" /* 60592 */
+ "VGT_GRP_POLYGON\0" /* 60605 */
+ "PRIM_ORDER\0" /* 60621 */
+ "COMP_X_EN\0" /* 60632 */
+ "COMP_Y_EN\0" /* 60642 */
+ "COMP_Z_EN\0" /* 60652 */
+ "COMP_W_EN\0" /* 60662 */
+ "SHIFT\0" /* 60672 */
+ "VGT_GRP_INDEX_16\0" /* 60678 */
+ "VGT_GRP_INDEX_32\0" /* 60695 */
+ "VGT_GRP_UINT_16\0" /* 60712 */
+ "VGT_GRP_UINT_32\0" /* 60728 */
+ "VGT_GRP_SINT_16\0" /* 60744 */
+ "VGT_GRP_SINT_32\0" /* 60760 */
+ "VGT_GRP_FLOAT_32\0" /* 60776 */
+ "VGT_GRP_AUTO_PRIM\0" /* 60793 */
+ "VGT_GRP_FIX_1_23_TO_FLOAT\0" /* 60811 */
+ "X_CONV\0" /* 60841, 60837 */
+ "Y_CONV\0" /* 60844 */
+ "Z_CONV\0" /* 60851 */
+ "Z_OFFSET\0" /* 60858 */
+ "W_CONV\0" /* 60867 */
+ "GS_OFF\0" /* 60874 */
+ "GS_SCENARIO_A\0" /* 60881 */
+ "GS_SCENARIO_B\0" /* 60895 */
+ "GS_SCENARIO_G\0" /* 60909 */
+ "GS_SCENARIO_C\0" /* 60923 */
+ "SPRITE_EN\0" /* 60937 */
+ "GS_CUT_1024\0" /* 60947 */
+ "GS_CUT_512\0" /* 60959 */
+ "GS_CUT_256\0" /* 60970 */
+ "GS_CUT_128\0" /* 60981 */
+ "CUT_MODE\0" /* 60992 */
+ "GS_C_PACK_EN\0" /* 61001 */
+ "RESERVED_2\0" /* 61014 */
+ "ES_PASSTHRU\0" /* 61025 */
+ "COMPUTE_MODE\0" /* 61037 */
+ "FAST_COMPUTE_MODE\0" /* 61050 */
+ "ELEMENT_INFO_EN\0" /* 61068 */
+ "PARTIAL_THD_AT_EOI\0" /* 61084 */
+ "SUPPRESS_CUTS\0" /* 61103 */
+ "ES_WRITE_OPTIMIZE\0" /* 61117 */
+ "GS_WRITE_OPTIMIZE\0" /* 61135 */
+ "X_0_OFFCHIP_GS\0" /* 61153 */
+ "X_3_ES_AND_GS_ARE_ONCHIP\0" /* 61168, 61186 */
+ "ES_VERTS_PER_SUBGRP\0" /* 61193 */
+ "GS_PRIMS_PER_SUBGRP\0" /* 61213 */
+ "MSAA_ENABLE\0" /* 61233 */
+ "VPORT_SCISSOR_ENABLE\0" /* 61245 */
+ "LINE_STIPPLE_ENABLE\0" /* 61266 */
+ "SEND_UNLIT_STILES_TO_PKR\0" /* 61286 */
+ "WALK_SIZE\0" /* 61311 */
+ "WALK_ALIGNMENT\0" /* 61321 */
+ "WALK_ALIGN8_PRIM_FITS_ST\0" /* 61336 */
+ "WALK_FENCE_ENABLE\0" /* 61361 */
+ "WALK_FENCE_SIZE\0" /* 61379 */
+ "SUPERTILE_WALK_ORDER_ENABLE\0" /* 61400, 61395 */
+ "TILE_COVER_DISABLE\0" /* 61423 */
+ "TILE_COVER_NO_SCISSOR\0" /* 61442 */
+ "ZMM_LINE_EXTENT\0" /* 61464 */
+ "ZMM_LINE_OFFSET\0" /* 61480 */
+ "ZMM_RECT_EXTENT\0" /* 61496 */
+ "KILL_PIX_POST_HI_Z\0" /* 61512 */
+ "KILL_PIX_POST_DETAIL_MASK\0" /* 61531 */
+ "PS_ITER_SAMPLE\0" /* 61557 */
+ "MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE\0" /* 61572 */
+ "MULTI_GPU_SUPERTILE_ENABLE\0" /* 61612 */
+ "GPU_ID_OVERRIDE_ENABLE\0" /* 61639 */
+ "GPU_ID_OVERRIDE\0" /* 61662 */
+ "MULTI_GPU_PRIM_DISCARD_ENABLE\0" /* 61678 */
+ "FORCE_EOV_CNTDWN_ENABLE\0" /* 61708 */
+ "FORCE_EOV_REZ_ENABLE\0" /* 61732 */
+ "OUT_OF_ORDER_PRIMITIVE_ENABLE\0" /* 61753 */
+ "OUT_OF_ORDER_WATER_MARK\0" /* 61783 */
+ "OUTPRIM_TYPE_POINTLIST\0" /* 61807 */
+ "OUTPRIM_TYPE_LINESTRIP\0" /* 61830 */
+ "OUTPRIM_TYPE_TRISTRIP\0" /* 61853 */
+ "OUTPRIM_TYPE\0" /* 61875 */
+ "OUTPRIM_TYPE_1\0" /* 61888 */
+ "OUTPRIM_TYPE_2\0" /* 61903 */
+ "OUTPRIM_TYPE_3\0" /* 61918 */
+ "UNIQUE_TYPE_PER_STREAM\0" /* 61933 */
+ "VGT_INDEX_16\0" /* 61956 */
+ "VGT_INDEX_32\0" /* 61969 */
+ "VGT_INDEX_8\0" /* 61982 */
+ "VGT_DMA_SWAP_NONE\0" /* 61994 */
+ "VGT_DMA_SWAP_16_BIT\0" /* 62012 */
+ "VGT_DMA_SWAP_32_BIT\0" /* 62032 */
+ "VGT_DMA_SWAP_WORD\0" /* 62052 */
+ "SWAP_MODE\0" /* 62070 */
+ "VGT_DMA_BUF_MEM\0" /* 62080 */
+ "VGT_DMA_BUF_RING\0" /* 62096 */
+ "VGT_DMA_BUF_SETUP\0" /* 62113 */
+ "BUF_TYPE\0" /* 62131 */
+ "VGT_POLICY_LRU\0" /* 62140 */
+ "VGT_POLICY_STREAM\0" /* 62155 */
+ "RDREQ_POLICY_CIK\0" /* 62173 */
+ "RDREQ_POLICY\0" /* 62190 */
+ "REQ_PATH\0" /* 62203 */
+ "DISABLE_RESET_ON_EOI\0" /* 62212 */
+ "SAMPLE_STREAMOUTSTATS1\0" /* 62233 */
+ "SAMPLE_STREAMOUTSTATS2\0" /* 62256 */
+ "SAMPLE_STREAMOUTSTATS3\0" /* 62279 */
+ "CACHE_FLUSH_TS\0" /* 62302 */
+ "CONTEXT_DONE\0" /* 62317 */
+ "CACHE_FLUSH\0" /* 62330 */
+ "CS_PARTIAL_FLUSH\0" /* 62342 */
+ "VGT_STREAMOUT_SYNC\0" /* 62359 */
+ "VGT_STREAMOUT_RESET\0" /* 62378 */
+ "END_OF_PIPE_INCR_DE\0" /* 62398 */
+ "END_OF_PIPE_IB_END\0" /* 62418 */
+ "RST_PIX_CNT\0" /* 62437 */
+ "VS_PARTIAL_FLUSH\0" /* 62449 */
+ "PS_PARTIAL_FLUSH\0" /* 62466 */
+ "FLUSH_HS_OUTPUT\0" /* 62483 */
+ "FLUSH_LS_OUTPUT\0" /* 62499 */
+ "CACHE_FLUSH_AND_INV_TS_EVENT\0" /* 62515 */
+ "ZPASS_DONE\0" /* 62544 */
+ "CACHE_FLUSH_AND_INV_EVENT\0" /* 62555 */
+ "PERFCOUNTER_START\0" /* 62581 */
+ "PERFCOUNTER_STOP\0" /* 62599 */
+ "PIPELINESTAT_START\0" /* 62616 */
+ "PIPELINESTAT_STOP\0" /* 62635 */
+ "PERFCOUNTER_SAMPLE\0" /* 62653 */
+ "FLUSH_ES_OUTPUT\0" /* 62672 */
+ "FLUSH_GS_OUTPUT\0" /* 62688 */
+ "SAMPLE_PIPELINESTAT\0" /* 62704 */
+ "SO_VGTSTREAMOUT_FLUSH\0" /* 62724 */
+ "SAMPLE_STREAMOUTSTATS\0" /* 62746 */
+ "RESET_VTX_CNT\0" /* 62768 */
+ "BLOCK_CONTEXT_DONE\0" /* 62782 */
+ "CS_CONTEXT_DONE\0" /* 62801 */
+ "VGT_FLUSH\0" /* 62817 */
+ "SC_SEND_DB_VPZ\0" /* 62827 */
+ "BOTTOM_OF_PIPE_TS\0" /* 62842 */
+ "DB_CACHE_FLUSH_AND_INV\0" /* 62860 */
+ "FLUSH_AND_INV_DB_DATA_TS\0" /* 62883 */
+ "FLUSH_AND_INV_DB_META\0" /* 62908 */
+ "FLUSH_AND_INV_CB_DATA_TS\0" /* 62930 */
+ "FLUSH_AND_INV_CB_META\0" /* 62955 */
+ "FLUSH_AND_INV_CB_PIXEL_DATA\0" /* 62977 */
+ "THREAD_TRACE_START\0" /* 63005 */
+ "THREAD_TRACE_STOP\0" /* 63024 */
+ "THREAD_TRACE_MARKER\0" /* 63042 */
+ "THREAD_TRACE_FLUSH\0" /* 63062 */
+ "THREAD_TRACE_FINISH\0" /* 63081 */
+ "PIXEL_PIPE_STAT_CONTROL\0" /* 63101 */
+ "PIXEL_PIPE_STAT_DUMP\0" /* 63125 */
+ "PIXEL_PIPE_STAT_RESET\0" /* 63146 */
+ "EVENT_TYPE\0" /* 63168 */
+ "ADDRESS_HI_GFX6\0" /* 63179 */
+ "EXTENDED_EVENT\0" /* 63195 */
+ "PRIMGROUP_SIZE\0" /* 63210 */
+ "PARTIAL_VS_WAVE_ON\0" /* 63225 */
+ "SWITCH_ON_EOP\0" /* 63244 */
+ "PARTIAL_ES_WAVE_ON\0" /* 63258 */
+ "SWITCH_ON_EOI\0" /* 63277 */
+ "WD_SWITCH_ON_EOP\0" /* 63291 */
+ "MAX_PRIMGRP_IN_WAVE\0" /* 63308 */
+ "FULL_CACHE\0" /* 63328 */
+ "HTILE_USES_PRELOAD_WIN\0" /* 63339 */
+ "PRELOAD\0" /* 63362 */
+ "PREFETCH_WIDTH\0" /* 63370 */
+ "PREFETCH_HEIGHT\0" /* 63385 */
+ "DST_OUTSIDE_ZERO_TO_ONE\0" /* 63401 */
+ "TC_COMPATIBLE\0" /* 63425 */
+ "COMPAREFUNC0\0" /* 63439 */
+ "COMPAREVALUE0\0" /* 63452 */
+ "COMPAREMASK0\0" /* 63466 */
+ "COMPAREFUNC1\0" /* 63479 */
+ "COMPAREVALUE1\0" /* 63492 */
+ "COMPAREMASK1\0" /* 63506 */
+ "MAX_X\0" /* 63519 */
+ "MAX_Y\0" /* 63525 */
+ "ACCUM_ISOLINE\0" /* 63531 */
+ "ACCUM_TRI\0" /* 63545 */
+ "ACCUM_QUAD\0" /* 63555 */
+ "DONUT_SPLIT\0" /* 63566 */
+ "TRAP_SPLIT\0" /* 63578 */
+ "LS_STAGE_OFF\0" /* 63589 */
+ "LS_STAGE_ON\0" /* 63602 */
+ "CS_STAGE_ON\0" /* 63614 */
+ "ES_STAGE_OFF\0" /* 63626 */
+ "ES_STAGE_DS\0" /* 63639 */
+ "ES_STAGE_REAL\0" /* 63651 */
+ "VS_STAGE_REAL\0" /* 63665 */
+ "VS_STAGE_DS\0" /* 63679 */
+ "VS_STAGE_COPY_SHADER\0" /* 63691 */
+ "DYNAMIC_HS\0" /* 63712 */
+ "DIS_DEALLOC_ACCUM_0\0" /* 63723 */
+ "DIS_DEALLOC_ACCUM_1\0" /* 63743 */
+ "VS_WAVE_ID_EN\0" /* 63763 */
+ "NUM_PATCHES\0" /* 63777 */
+ "HS_NUM_INPUT_CP\0" /* 63789 */
+ "HS_NUM_OUTPUT_CP\0" /* 63805 */
+ "TESS_ISOLINE\0" /* 63822 */
+ "TESS_TRIANGLE\0" /* 63835 */
+ "TESS_QUAD\0" /* 63849 */
+ "PART_INTEGER\0" /* 63859 */
+ "PART_POW2\0" /* 63872 */
+ "PART_FRAC_ODD\0" /* 63882 */
+ "PART_FRAC_EVEN\0" /* 63896 */
+ "PARTITIONING\0" /* 63911 */
+ "OUTPUT_POINT\0" /* 63924 */
+ "OUTPUT_LINE\0" /* 63937 */
+ "OUTPUT_TRIANGLE_CW\0" /* 63949 */
+ "OUTPUT_TRIANGLE_CCW\0" /* 63968 */
+ "TOPOLOGY\0" /* 63988 */
+ "RESERVED_REDUC_AXIS\0" /* 63997 */
+ "DEPRECATED\0" /* 64017 */
+ "NUM_DS_WAVES_PER_SIMD\0" /* 64028 */
+ "DISABLE_DONUTS\0" /* 64050 */
+ "VGT_POLICY_BYPASS\0" /* 64065 */
+ "DISTRIBUTION_MODE_NO_DIST\0" /* 64083 */
+ "DISTRIBUTION_MODE_PATCHES\0" /* 64109 */
+ "DISTRIBUTION_MODE_DONUTS\0" /* 64135 */
+ "DISTRIBUTION_MODE_TRAPEZOIDS\0" /* 64160 */
+ "DISTRIBUTION_MODE\0" /* 64189 */
+ "ALPHA_TO_MASK_ENABLE\0" /* 64207 */
+ "ALPHA_TO_MASK_OFFSET0\0" /* 64228 */
+ "ALPHA_TO_MASK_OFFSET1\0" /* 64250 */
+ "ALPHA_TO_MASK_OFFSET2\0" /* 64272 */
+ "ALPHA_TO_MASK_OFFSET3\0" /* 64294 */
+ "OFFSET_ROUND\0" /* 64316 */
+ "POLY_OFFSET_NEG_NUM_DB_BITS\0" /* 64329 */
+ "POLY_OFFSET_DB_IS_FLOAT_FMT\0" /* 64357 */
+ "STREAMOUT_0_EN\0" /* 64385 */
+ "STREAMOUT_1_EN\0" /* 64400 */
+ "STREAMOUT_2_EN\0" /* 64415 */
+ "STREAMOUT_3_EN\0" /* 64430 */
+ "RAST_STREAM\0" /* 64445 */
+ "RAST_STREAM_MASK\0" /* 64457 */
+ "USE_RAST_STREAM_MASK\0" /* 64474 */
+ "STREAM_0_BUFFER_EN\0" /* 64495 */
+ "STREAM_1_BUFFER_EN\0" /* 64514 */
+ "STREAM_2_BUFFER_EN\0" /* 64533 */
+ "STREAM_3_BUFFER_EN\0" /* 64552 */
+ "DISTANCE_0\0" /* 64571 */
+ "DISTANCE_1\0" /* 64582 */
+ "DISTANCE_2\0" /* 64593 */
+ "DISTANCE_3\0" /* 64604 */
+ "DISTANCE_4\0" /* 64615 */
+ "DISTANCE_5\0" /* 64626 */
+ "DISTANCE_6\0" /* 64637 */
+ "DISTANCE_7\0" /* 64648 */
+ "DISTANCE_8\0" /* 64659 */
+ "DISTANCE_9\0" /* 64670 */
+ "DISTANCE_10\0" /* 64681 */
+ "DISTANCE_11\0" /* 64693 */
+ "DISTANCE_12\0" /* 64705 */
+ "DISTANCE_13\0" /* 64717 */
+ "DISTANCE_14\0" /* 64729 */
+ "DISTANCE_15\0" /* 64741 */
+ "EXPAND_LINE_WIDTH\0" /* 64753 */
+ "LAST_PIXEL\0" /* 64771 */
+ "PERPENDICULAR_ENDCAP_ENA\0" /* 64782 */
+ "DX10_DIAMOND_TEST_ENA\0" /* 64807 */
+ "MSAA_NUM_SAMPLES\0" /* 64829 */
+ "AA_MASK_CENTROID_DTMN\0" /* 64846 */
+ "MAX_SAMPLE_DIST\0" /* 64868 */
+ "MSAA_EXPOSED_SAMPLES\0" /* 64884 */
+ "DETAIL_TO_EXPOSED_MODE\0" /* 64905 */
+ "PIX_CENTER\0" /* 64928 */
+ "X_TRUNCATE\0" /* 64939 */
+ "X_ROUND\0" /* 64950 */
+ "X_ROUND_TO_EVEN\0" /* 64958 */
+ "X_ROUND_TO_ODD\0" /* 64974 */
+ "ROUND_MODE\0" /* 64989 */
+ "X_16_8_FIXED_POINT_1_16TH\0" /* 65000 */
+ "X_16_8_FIXED_POINT_1_8TH\0" /* 65026 */
+ "X_16_8_FIXED_POINT_1_4TH\0" /* 65051 */
+ "X_16_8_FIXED_POINT_1_2\0" /* 65076 */
+ "X_16_8_FIXED_POINT_1\0" /* 65099 */
+ "X_16_8_FIXED_POINT_1_256TH\0" /* 65120 */
+ "X_14_10_FIXED_POINT_1_1024TH\0" /* 65147 */
+ "X_12_12_FIXED_POINT_1_4096TH\0" /* 65176 */
+ "QUANT_MODE\0" /* 65205 */
+ "S0_X\0" /* 65216 */
+ "S0_Y\0" /* 65221 */
+ "S1_X\0" /* 65226 */
+ "S1_Y\0" /* 65231 */
+ "S2_X\0" /* 65236 */
+ "S2_Y\0" /* 65241 */
+ "S3_X\0" /* 65246 */
+ "S3_Y\0" /* 65251 */
+ "S4_X\0" /* 65256 */
+ "S4_Y\0" /* 65261 */
+ "S5_X\0" /* 65266 */
+ "S5_Y\0" /* 65271 */
+ "S6_X\0" /* 65276 */
+ "S6_Y\0" /* 65281 */
+ "S7_X\0" /* 65286 */
+ "S7_Y\0" /* 65291 */
+ "S8_X\0" /* 65296 */
+ "S8_Y\0" /* 65301 */
+ "S9_X\0" /* 65306 */
+ "S9_Y\0" /* 65311 */
+ "S10_X\0" /* 65316 */
+ "S10_Y\0" /* 65322 */
+ "S11_X\0" /* 65328 */
+ "S11_Y\0" /* 65334 */
+ "S12_X\0" /* 65340 */
+ "S12_Y\0" /* 65346 */
+ "S13_X\0" /* 65352 */
+ "S13_Y\0" /* 65358 */
+ "S14_X\0" /* 65364 */
+ "S14_Y\0" /* 65370 */
+ "S15_X\0" /* 65376 */
+ "S15_Y\0" /* 65382 */
+ "AA_MASK_X0Y0\0" /* 65388 */
+ "AA_MASK_X1Y0\0" /* 65401 */
+ "AA_MASK_X0Y1\0" /* 65414 */
+ "AA_MASK_X1Y1\0" /* 65427 */
+ "REALIGN_DQUADS_AFTER_N_WAVES\0" /* 65440 */
+ "VTX_REUSE_DEPTH\0" /* 65469 */
+ "DEALLOC_DIST\0" /* 65485 */
+ "FMASK_TILE_MAX\0" /* 65498 */
+ "ENDIAN_NONE\0" /* 65513 */
+ "ENDIAN_8IN16\0" /* 65525 */
+ "ENDIAN_8IN32\0" /* 65538 */
+ "ENDIAN_8IN64\0" /* 65551 */
+ "ENDIAN\0" /* 65564 */
+ "COLOR_INVALID\0" /* 65571 */
+ "COLOR_8\0" /* 65585 */
+ "COLOR_16\0" /* 65593 */
+ "COLOR_8_8\0" /* 65602 */
+ "COLOR_32\0" /* 65612 */
+ "COLOR_16_16\0" /* 65621 */
+ "COLOR_10_11_11\0" /* 65633 */
+ "COLOR_11_11_10\0" /* 65648 */
+ "COLOR_10_10_10_2\0" /* 65663 */
+ "COLOR_2_10_10_10\0" /* 65680 */
+ "COLOR_8_8_8_8\0" /* 65697 */
+ "COLOR_32_32\0" /* 65711 */
+ "COLOR_16_16_16_16\0" /* 65723 */
+ "COLOR_32_32_32_32\0" /* 65741 */
+ "COLOR_5_6_5\0" /* 65759 */
+ "COLOR_1_5_5_5\0" /* 65771 */
+ "COLOR_5_5_5_1\0" /* 65785 */
+ "COLOR_4_4_4_4\0" /* 65799 */
+ "COLOR_8_24\0" /* 65813 */
+ "COLOR_24_8\0" /* 65824 */
+ "COLOR_X24_8_32_FLOAT\0" /* 65835 */
+ "NUMBER_UNORM\0" /* 65856 */
+ "NUMBER_SNORM\0" /* 65869 */
+ "NUMBER_UINT\0" /* 65882 */
+ "NUMBER_SINT\0" /* 65894 */
+ "NUMBER_SRGB\0" /* 65906 */
+ "NUMBER_FLOAT\0" /* 65918 */
+ "NUMBER_TYPE\0" /* 65931 */
+ "SWAP_STD\0" /* 65943 */
+ "SWAP_ALT\0" /* 65952 */
+ "SWAP_STD_REV\0" /* 65961 */
+ "SWAP_ALT_REV\0" /* 65974 */
+ "COMP_SWAP\0" /* 65987 */
+ "BLEND_CLAMP\0" /* 65997 */
+ "BLEND_BYPASS\0" /* 66009 */
+ "SIMPLE_FLOAT\0" /* 66022 */
+ "CMASK_IS_LINEAR\0" /* 66035 */
+ "FORCE_OPT_AUTO\0" /* 66051 */
+ "FORCE_OPT_DISABLE\0" /* 66066 */
+ "FORCE_OPT_ENABLE_IF_SRC_A_0\0" /* 66084 */
+ "FORCE_OPT_ENABLE_IF_SRC_RGB_0\0" /* 66112 */
+ "FORCE_OPT_ENABLE_IF_SRC_ARGB_0\0" /* 66142 */
+ "FORCE_OPT_ENABLE_IF_SRC_A_1\0" /* 66173 */
+ "FORCE_OPT_ENABLE_IF_SRC_RGB_1\0" /* 66201 */
+ "FORCE_OPT_ENABLE_IF_SRC_ARGB_1\0" /* 66231 */
+ "BLEND_OPT_DONT_RD_DST\0" /* 66262 */
+ "BLEND_OPT_DISCARD_PIXEL\0" /* 66284 */
+ "FMASK_COMPRESSION_DISABLE\0" /* 66308 */
+ "FMASK_COMPRESS_1FRAG_ONLY\0" /* 66334 */
+ "DCC_ENABLE\0" /* 66360 */
+ "CMASK_ADDR_TYPE\0" /* 66371 */
+ "FMASK_TILE_MODE_INDEX\0" /* 66387 */
+ "FMASK_BANK_HEIGHT\0" /* 66409 */
+ "NUM_FRAGMENTS\0" /* 66427 */
+ "FORCE_DST_ALPHA_1\0" /* 66441 */
+ "KEY_CLEAR_ENABLE\0" /* 66459 */
+ "MAX_UNCOMPRESSED_BLOCK_SIZE\0" /* 66476 */
+ "MIN_COMPRESSED_BLOCK_SIZE\0" /* 66504 */
+ "MAX_COMPRESSED_BLOCK_SIZE\0" /* 66530 */
+ "INDEPENDENT_64B_BLOCKS\0" /* 66556 */
+ "LOSSY_RGB_PRECISION\0" /* 66579 */
+ "LOSSY_ALPHA_PRECISION\0" /* 66599 */
+ "UTCL2_BUSY\0" /* 66621 */
+ "EA_BUSY\0" /* 66632 */
+ "RMI_BUSY\0" /* 66640 */
+ "UTCL2_RQ_PENDING\0" /* 66649 */
+ "CPF_RQ_PENDING\0" /* 66666 */
+ "EA_LINK_BUSY\0" /* 66681 */
+ "CPAXI_BUSY\0" /* 66694 */
+ "RSMU_RQ_PENDING\0" /* 66705 */
+ "TC_WC_ACTION_ENA\0" /* 66721 */
+ "TC_INV_METADATA_ACTION_ENA\0" /* 66738 */
+ "UTCL2IU_BUSY\0" /* 66765 */
+ "SAVE_RESTORE_BUSY\0" /* 66778 */
+ "UTCL2IU_WAITING_ON_FREE\0" /* 66796 */
+ "UTCL2IU_WAITING_ON_TAGS\0" /* 66820 */
+ "UTCL1_WAITING_ON_TRANS\0" /* 66844 */
+ "GFX_UTCL1_WAITING_ON_TRANS\0" /* 66867 */
+ "CMP_UTCL1_WAITING_ON_TRANS\0" /* 66894 */
+ "RCIU_WAITING_ON_FREE\0" /* 66921 */
+ "PRIMGEN_EN\0" /* 66942 */
+ "MATCH_ALL_BITS\0" /* 66953 */
+ "EN_INST_OPT_BASIC\0" /* 66968 */
+ "EN_INST_OPT_ADV\0" /* 66986 */
+ "HW_USE_ONLY\0" /* 67002 */
+ "TARGET_INST\0" /* 67014 */
+ "TARGET_DATA\0" /* 67026 */
+ "COMPLETE\0" /* 67038 */
+ "DWB\0" /* 67047 */
+ "GRAD_ADJ_0\0" /* 67051 */
+ "GRAD_ADJ_1\0" /* 67062 */
+ "GRAD_ADJ_2\0" /* 67073 */
+ "GRAD_ADJ_3\0" /* 67084 */
+ "USER_VM_ENABLE\0" /* 67095 */
+ "USER_VM_MODE\0" /* 67110 */
+ "IMG_DATA_FORMAT_6E4\0" /* 67123 */
+ "IMG_DATA_FORMAT_16_AS_32_32\0" /* 67143 */
+ "IMG_DATA_FORMAT_16_AS_16_16_16_16_GFX9\0" /* 67171 */
+ "IMG_DATA_FORMAT_16_AS_32_32_32_32_GFX9\0" /* 67210 */
+ "IMG_DATA_FORMAT_FMASK\0" /* 67249 */
+ "IMG_DATA_FORMAT_ASTC_2D_LDR\0" /* 67271 */
+ "IMG_DATA_FORMAT_ASTC_2D_HDR\0" /* 67299 */
+ "IMG_DATA_FORMAT_ASTC_2D_LDR_SRGB\0" /* 67327 */
+ "IMG_DATA_FORMAT_ASTC_3D_LDR\0" /* 67360 */
+ "IMG_DATA_FORMAT_ASTC_3D_HDR\0" /* 67388 */
+ "IMG_DATA_FORMAT_ASTC_3D_LDR_SRGB\0" /* 67416 */
+ "IMG_DATA_FORMAT_N_IN_16\0" /* 67449 */
+ "IMG_DATA_FORMAT_N_IN_16_16\0" /* 67473 */
+ "IMG_DATA_FORMAT_N_IN_16_16_16_16\0" /* 67500 */
+ "IMG_DATA_FORMAT_N_IN_16_AS_16_16_16_16\0" /* 67533 */
+ "IMG_DATA_FORMAT_RESERVED_56\0" /* 67572 */
+ "IMG_DATA_FORMAT_S8_16\0" /* 67600 */
+ "IMG_DATA_FORMAT_S8_32\0" /* 67622 */
+ "IMG_DATA_FORMAT_8_AS_32\0" /* 67644 */
+ "IMG_DATA_FORMAT_8_AS_32_32\0" /* 67668 */
+ "DATA_FORMAT_GFX9\0" /* 67695 */
+ "IMG_NUM_FORMAT_RESERVED_6\0" /* 67712 */
+ "IMG_NUM_FORMAT_METADATA\0" /* 67738 */
+ "IMG_NUM_FORMAT_UNORM_UINT\0" /* 67762 */
+ "NUM_FORMAT_GFX9\0" /* 67788 */
+ "IMG_FMASK_8_2_1\0" /* 67804 */
+ "IMG_FMASK_8_4_1\0" /* 67820 */
+ "IMG_FMASK_8_8_1\0" /* 67836 */
+ "IMG_FMASK_8_2_2\0" /* 67852 */
+ "IMG_FMASK_8_4_2\0" /* 67868 */
+ "IMG_FMASK_8_4_4\0" /* 67884 */
+ "IMG_FMASK_16_16_1\0" /* 67900 */
+ "IMG_FMASK_16_8_2\0" /* 67918 */
+ "IMG_FMASK_32_16_2\0" /* 67935 */
+ "IMG_FMASK_32_8_4\0" /* 67953 */
+ "IMG_FMASK_32_8_8\0" /* 67970 */
+ "IMG_FMASK_64_16_4\0" /* 67987 */
+ "IMG_FMASK_64_16_8\0" /* 68005 */
+ "NUM_FORMAT_FMASK\0" /* 68023 */
+ "IMG_ASTC_2D_4x4\0" /* 68040 */
+ "IMG_ASTC_2D_5x4\0" /* 68056 */
+ "IMG_ASTC_2D_5x5\0" /* 68072 */
+ "IMG_ASTC_2D_6x5\0" /* 68088 */
+ "IMG_ASTC_2D_6x6\0" /* 68104 */
+ "IMG_ASTC_2D_8x5\0" /* 68120 */
+ "IMG_ASTC_2D_8x6\0" /* 68136 */
+ "IMG_ASTC_2D_8x8\0" /* 68152 */
+ "IMG_ASTC_2D_10x5\0" /* 68168 */
+ "IMG_ASTC_2D_10x6\0" /* 68185 */
+ "IMG_ASTC_2D_10x8\0" /* 68202 */
+ "IMG_ASTC_2D_10x10\0" /* 68219 */
+ "IMG_ASTC_2D_12x10\0" /* 68237 */
+ "IMG_ASTC_2D_12x12\0" /* 68255 */
+ "NUM_FORMAT_ASTC_2D\0" /* 68273 */
+ "IMG_ASTC_3D_3x3x3\0" /* 68292 */
+ "IMG_ASTC_3D_4x3x3\0" /* 68310 */
+ "IMG_ASTC_3D_4x4x3\0" /* 68328 */
+ "IMG_ASTC_3D_4x4x4\0" /* 68346 */
+ "IMG_ASTC_3D_5x4x4\0" /* 68364 */
+ "IMG_ASTC_3D_5x5x4\0" /* 68382 */
+ "IMG_ASTC_3D_5x5x5\0" /* 68400 */
+ "IMG_ASTC_3D_6x5x5\0" /* 68418 */
+ "IMG_ASTC_3D_6x6x5\0" /* 68436 */
+ "IMG_ASTC_3D_6x6x6\0" /* 68454 */
+ "NUM_FORMAT_ASTC_3D\0" /* 68472 */
+ "META_DIRECT\0" /* 68491 */
+ "SW_MODE\0" /* 68503 */
+ "PITCH_GFX9\0" /* 68511 */
+ "BC_SWIZZLE_XYZW\0" /* 68522 */
+ "BC_SWIZZLE_XWYZ\0" /* 68538 */
+ "BC_SWIZZLE_WZYX\0" /* 68554 */
+ "BC_SWIZZLE_WXYZ\0" /* 68570 */
+ "BC_SWIZZLE_ZYXW\0" /* 68586 */
+ "BC_SWIZZLE_YXWZ\0" /* 68602 */
+ "BC_SWIZZLE\0" /* 68618 */
+ "ARRAY_PITCH\0" /* 68629 */
+ "META_DATA_ADDRESS\0" /* 68641 */
+ "META_LINEAR\0" /* 68659 */
+ "META_PIPE_ALIGNED\0" /* 68676, 68671 */
+ "META_RB_ALIGNED\0" /* 68689, 68694 */
+ "MAX_MIP\0" /* 68705 */
+ "BLEND_ZERO_PRT\0" /* 68713 */
+ "SKIP_DEGAMMA\0" /* 68728 */
+ "TTRACE_STALL_ALL\0" /* 68741 */
+ "ALLOC_ARB_LRU_ENA\0" /* 68758 */
+ "EXP_ARB_LRU_ENA\0" /* 68776 */
+ "PS_PKR_PRIORITY_CNTL\0" /* 68792 */
+ "BATON_RESET_DISABLE\0" /* 68813 */
+ "CRC_SIMD_ID_WADDR_DISABLE\0" /* 68833 */
+ "LBPW_CU_CHK_MODE\0" /* 68859 */
+ "LBPW_CU_CHK_CNT\0" /* 68876 */
+ "CSC_PWR_SAVE_DISABLE\0" /* 68892 */
+ "CSG_PWR_SAVE_DISABLE\0" /* 68913 */
+ "CONTEXT_SAVE_WAIT_GDS_REQUEST_CYCLE_OVHD\0" /* 68934 */
+ "CONTEXT_SAVE_WAIT_GDS_GRANT_CYCLE_OVHD\0" /* 68975 */
+ "PIPE_INTERLEAVE_SIZE_GFX9\0" /* 69014 */
+ "MAX_COMPRESSED_FRAGS\0" /* 69040 */
+ "NUM_SHADER_ENGINES_GFX9\0" /* 69061 */
+ "NUM_GPUS_GFX9\0" /* 69085 */
+ "NUM_RB_PER_SE\0" /* 69099 */
+ "SE_ENABLE\0" /* 69113 */
+ "SIMD_DISABLE\0" /* 69123 */
+ "FP16_OVFL\0" /* 69136 */
+ "LOAD_COLLISION_WAVEID\0" /* 69146 */
+ "LOAD_INTRAWAVE_COLLISION\0" /* 69168 */
+ "SKIP_USGPR0\0" /* 69193 */
+ "USER_SGPR_MSB\0" /* 69205 */
+ "PC_BASE_EN\0" /* 69219 */
+ "SPI_SHADER_LATE_ALLOC_GS\0" /* 69230 */
+ "GS_VGPR_COMP_CNT\0" /* 69255 */
+ "ES_VGPR_COMP_CNT\0" /* 69272 */
+ "LS_VGPR_COMP_CNT\0" /* 69289 */
+ "CNTR_SEL0\0" /* 69306 */
+ "CNTR_SEL1\0" /* 69316 */
+ "CNTR_MODE1\0" /* 69326 */
+ "CNTR_MODE0\0" /* 69337 */
+ "CNTR_SEL2\0" /* 69348 */
+ "CNTR_SEL3\0" /* 69358 */
+ "CNTR_MODE3\0" /* 69368 */
+ "CNTR_MODE2\0" /* 69379 */
+ "UTCL2_BUSY_USER_DEFINED_MASK\0" /* 69390 */
+ "EA_BUSY_USER_DEFINED_MASK\0" /* 69419 */
+ "RMI_BUSY_USER_DEFINED_MASK\0" /* 69445 */
+ "MIPID\0" /* 69472 */
+ "ALLOW_PARTIAL_RES_HIER_KILL\0" /* 69478 */
+ "X_MAX\0" /* 69506 */
+ "Y_MAX\0" /* 69512 */
+ "PARTIALLY_RESIDENT\0" /* 69518 */
+ "FAULT_BEHAVIOR\0" /* 69537 */
+ "ITERATE_FLUSH\0" /* 69552 */
+ "MAXMIP\0" /* 69566 */
+ "FORCE_ON\0" /* 69573 */
+ "PUNCHOUT_MODE\0" /* 69582 */
+ "POPS_DRAIN_PS_ON_OVERLAP\0" /* 69596 */
+ "DISALLOW_OVERFLOW\0" /* 69621 */
+ "PS_INVOKE_MASK\0" /* 69639 */
+ "DEST_BASE_HI_256B\0" /* 69654 */
+ "SE_XSEL_GFX9\0" /* 69672 */
+ "SE_YSEL_GFX9\0" /* 69685 */
+ "SE_PAIR_XSEL_GFX9\0" /* 69698 */
+ "SE_PAIR_YSEL_GFX9\0" /* 69716 */
+ "NUM_SE\0" /* 69734 */
+ "DISABLE_SRBSL_DB_OPTIMIZED_PACKING\0" /* 69741 */
+ "PERFMON_ENABLE\0" /* 69776 */
+ "LEFT_QTR\0" /* 69791 */
+ "LEFT_HALF\0" /* 69800 */
+ "RIGHT_HALF\0" /* 69810 */
+ "RIGHT_QTR\0" /* 69821 */
+ "TOP_QTR\0" /* 69831 */
+ "TOP_HALF\0" /* 69839 */
+ "BOT_HALF\0" /* 69848 */
+ "BOT_QTR\0" /* 69857 */
+ "LEFT_EYE_FOV_LEFT\0" /* 69865 */
+ "LEFT_EYE_FOV_RIGHT\0" /* 69883 */
+ "RIGHT_EYE_FOV_LEFT\0" /* 69902 */
+ "RIGHT_EYE_FOV_RIGHT\0" /* 69921 */
+ "FOV_TOP\0" /* 69941 */
+ "FOV_BOT\0" /* 69949 */
+ "OFFCHIP_PARAM_EN\0" /* 69957 */
+ "LATE_PC_DEALLOC\0" /* 69974 */
+ "BASE_ADDR_GFX9\0" /* 69990 */
+ "SPRITE_EN_R6XX\0" /* 70005 */
+ "UNROLLED_INST\0" /* 70020 */
+ "GRBM_SKEW_NO_DEC\0" /* 70034 */
+ "REG_RT_INDEX\0" /* 70051 */
+ "PRIMITIVE_ORDERED_PIXEL_SHADER\0" /* 70064 */
+ "EXEC_IF_OVERLAPPED\0" /* 70095 */
+ "POPS_OVERLAP_NUM_SAMPLES\0" /* 70114 */
+ "RIGHT_TRIANGLE_ALTERNATE_GRADIENT_REF\0" /* 70139 */
+ "NEW_QUAD_DECOMPOSITION\0" /* 70177 */
+ "PERFCOUNTER_REF\0" /* 70200 */
+ "USE_VTX_SHD_OBJPRIM_ID\0" /* 70216 */
+ "SRBSL_ENABLE\0" /* 70239 */
+ "OBJ_ID_SEL\0" /* 70252 */
+ "ADD_PIPED_PRIM_ID\0" /* 70263 */
+ "EN_32BIT_OBJPRIMID\0" /* 70281 */
+ "VERTEX_REUSE_OFF\0" /* 70300 */
+ "INDEX_BUF_EDGE_FLAG_ENA\0" /* 70317 */
+ "DISCARD_0_AREA_TRIANGLES\0" /* 70341 */
+ "DISCARD_0_AREA_LINES\0" /* 70366 */
+ "DISCARD_0_AREA_POINTS\0" /* 70387 */
+ "DISCARD_0_AREA_RECTANGLES\0" /* 70409 */
+ "USE_PROVOKING_ZW\0" /* 70435 */
+ "RESERVED_3\0" /* 70452 */
+ "RESERVED_4\0" /* 70463 */
+ "RESERVED_5\0" /* 70474 */
+ "GS_INST_PRIMS_IN_SUBGRP\0" /* 70485 */
+ "SCALE_LINE_WIDTH_PAD\0" /* 70509 */
+ "ALTERNATE_RBS_PER_TILE\0" /* 70530 */
+ "COARSE_TILE_STARTS_ON_EVEN_RB\0" /* 70553 */
+ "NGG_DISABLE_PROVOK_REUSE\0" /* 70583 */
+ "BREAK_BATCH\0" /* 70608 */
+ "FLUSH_DFSM\0" /* 70620 */
+ "RESET_TO_LOWEST_VGT\0" /* 70631 */
+ "TGID_ROLLOVER\0" /* 70651 */
+ "ENABLE_NGG_PIPELINE\0" /* 70665 */
+ "ENABLE_LEGACY_PIPELINE\0" /* 70685 */
+ "ADDRESS_HI_GFX9\0" /* 70708 */
+ "OBJPRIM_ID_EN\0" /* 70724 */
+ "EN_REG_RT_INDEX\0" /* 70738 */
+ "EN_PIPELINE_PRIMID\0" /* 70754 */
+ "OBJECT_ID_INST_EN\0" /* 70773 */
+ "COMPOUND_INDEX_EN\0" /* 70791 */
+ "ORDERED_ID_MODE\0" /* 70809 */
+ "GS_FAST_LAUNCH\0" /* 70825 */
+ "EN_PRIMS_NEEDED_CNT\0" /* 70840 */
+ "COVERAGE_TO_SHADER_SELECT\0" /* 70860 */
+ "BINNING_ALLOWED\0" /* 70886 */
+ "FORCE_BINNING_ON\0" /* 70902 */
+ "DISABLE_BINNING_USE_NEW_SC\0" /* 70919 */
+ "DISABLE_BINNING_USE_LEGACY_SC\0" /* 70946 */
+ "BINNING_MODE\0" /* 70976 */
+ "BIN_SIZE_X\0" /* 70989 */
+ "BIN_SIZE_Y\0" /* 71000 */
+ "BIN_SIZE_X_EXTEND\0" /* 71011 */
+ "BIN_SIZE_Y_EXTEND\0" /* 71029 */
+ "CONTEXT_STATES_PER_BIN\0" /* 71047 */
+ "PERSISTENT_STATES_PER_BIN\0" /* 71070 */
+ "DISABLE_START_OF_PRIM\0" /* 71096 */
+ "FPOVS_PER_BATCH\0" /* 71118 */
+ "OPTIMAL_BIN_SELECTION\0" /* 71134 */
+ "MAX_ALLOC_COUNT\0" /* 71156 */
+ "MAX_PRIM_PER_BATCH\0" /* 71172 */
+ "OVER_RAST_ENABLE\0" /* 71191 */
+ "OVER_RAST_SAMPLE_SELECT\0" /* 71208 */
+ "UNDER_RAST_ENABLE\0" /* 71232 */
+ "UNDER_RAST_SAMPLE_SELECT\0" /* 71250 */
+ "PBB_UNCERTAINTY_REGION_ENABLE\0" /* 71275 */
+ "ZMM_TRI_EXTENT\0" /* 71305 */
+ "ZMM_TRI_OFFSET\0" /* 71320 */
+ "OVERRIDE_OVER_RAST_INNER_TO_NORMAL\0" /* 71335 */
+ "OVERRIDE_UNDER_RAST_INNER_TO_NORMAL\0" /* 71370 */
+ "DEGENERATE_OVERRIDE_INNER_TO_NORMAL_DISABLE\0" /* 71406 */
+ "UNCERTAINTY_REGION_MODE\0" /* 71450 */
+ "OUTER_UNCERTAINTY_EDGERULE_OVERRIDE\0" /* 71474 */
+ "INNER_UNCERTAINTY_EDGERULE_OVERRIDE\0" /* 71510 */
+ "NULL_SQUAD_AA_MASK_ENABLE\0" /* 71546 */
+ "COVERAGE_AA_MASK_ENABLE\0" /* 71572 */
+ "PREZ_AA_MASK_ENABLE\0" /* 71596 */
+ "POSTZ_AA_MASK_ENABLE\0" /* 71616 */
+ "CENTROID_SAMPLE_OVERRIDE\0" /* 71637 */
+ "MAX_DEALLOCS_IN_WAVE\0" /* 71662 */
+ "BASE_256B\0" /* 71683 */
+ "MIP0_HEIGHT\0" /* 71693 */
+ "MIP0_WIDTH\0" /* 71705 */
+ "MIP_LEVEL\0" /* 71716 */
+ "MIP0_DEPTH\0" /* 71726 */
+ "COLOR_SW_MODE\0" /* 71737 */
+ "FMASK_SW_MODE\0" /* 71751 */
+ "RESOURCE_TYPE\0" /* 71765 */;
static const int sid_strings_offsets[] = {
- /* 0 */ 509, 867, 871, 874,
- /* 4 */ 911, 931, 943, 949, 953, 962,
- /* 10 */ 1020, 949, 391, 1029,
- /* 14 */ 1059, 949, 1068, 1076,
- /* 18 */ 1191, 1196, 1204, 1212,
- /* 22 */ 1238, 922,
- /* 24 */ 1253, 1263,
- /* 26 */ 5827, 5838, 5854, 5869, 5885, 5899, 5912, 5927, 5942, 5957, 5969, 5988, 6008, 6026, 6045, 6060, 6075, 6097, 6112, 6127, 6142, 6158, 6172, 6199, 6226, 6253, 6280, 6304, 6324,
- /* 55 */ 6353, 6374,
- /* 57 */ 5827, 5838, 5854, 5869, 5885, 5899, 5912, -1, -1, 5957, 5969, 5988, 6008, 6026, -1, -1, 6075, 6097, 6112, 6127, 6142, 6158, 6172, 6199, 6226, 6253, 6280, 6304, 6324,
- /* 86 */ 8345, 8357, 8369, 8381,
- /* 90 */ 8773, -1, -1, -1, -1, -1, -1, -1, 8784, 8796, -1, -1, 8808, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 8819,
- /* 123 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 8845,
- /* 186 */ 8940, 8949, 8958, 8976, 8994, 9003, 9012, 9021,
- /* 194 */ 9070, 9091, 9112, 9135, 9158, 9178, 9198, 9223,
- /* 202 */ 9255, 9279, 9297, 9316, 9336, 9355, 9377, 9402, 9427, 9454, 9481, 9505, 9527, 9555, 9580, 9608,
- /* 218 */ 9716, 9728, 9747, 9766,
- /* 222 */ 9824, 9848, 9866, 9885, 9905, 9924, 9946, 9971, 9996, 10023, 10050, 10074, 10096, 10124, 10149, 10177, 10205, 10227, 10251, 10275, 10299, 10320, 10341, 10366, 10395, 10420, 10446, 10469, 10493, 10520, 10548, 10576, 10604, 10626, 10648, 10672, 10692, 10712, 10732, 10752, 10772, 10792, 10812, 10851, 10890, 10919, 10948, 10977, 11006, 11035, 11064, 11095, 11125, 11156, 11186, 11216, 11247, 11278, 11298, 11320, 11338, 11365, 11389, 11415,
- /* 286 */ 11466, 11487, 11508, 11531, 11554, 11574, 11594, 11619, 11640, 11666, 11686, 11708, 11734, 11755, 11779, 11806,
- /* 302 */ 9824, 9848, 9866, 9885, 9905, 9924, 9946, 9971, 9996, 10023, 10050, 10074, 10096, 10124, 10149, 10177, 10205, 10227, 10251, 10275, 10299, 10320, 10341, 10366, 10395, 10420, 10446, 10469, 10493, 10520, 10548, 11849, 10604, 10626, 10648, 10672, 10692, 10712, 10732, 10752, 10772, 10792, 11869, 11897, 11936, 11975, 11997, 12025, 12053, 12086, 12114, 12142, 12175, 12199, 12226, 12259, 12298, 11278, 11298, 12326, 12348, 12370, 12394, 11415,
- /* 366 */ 11466, 11487, 11508, 11531, 11554, 11574, 12438, 11619, 12464, 11666, 12488,
- /* 377 */ 12530, 12546, 12562, 12578, 12594, 12610, 12626, 12644, 12661, 12679, 12696, 12713, 12731,
- /* 390 */ 12766, 12782, 12798, 12814, 12830, 12846, 12862, 12878, 12894, 12911, 12928, 12945, 12963, 12981,
- /* 404 */ 13018, 13036, 13054, 13072, 13090, 13108, 13126, 13144, 13162, 13180,
- /* 414 */ 13306, 13325, 13344, 13363, 13382, 13401, 13420, 13439, 13458, 13473, 13488, 13503, 13520, 13541, 13562, 13582,
- /* 430 */ 13644, 13660, 13676, 13692, 13708, 13724,
- /* 436 */ 13980, 13992, 14006, 14030, 14060, 14085, 14116, 14136,
- /* 444 */ 14202, 14229, 14255, 14282, 14313, 14342, 14372, 14406,
- /* 452 */ 14628, 14651,
- /* 454 */ 14628, 14651, 14691, 14720,
- /* 458 */ 14766, 14787, 14809,
- /* 461 */ 14966, 14998, 15031, 15064,
- /* 465 */ 15268,
- /* 466 */ 15567, 15583, 15599, 15615, 15631, 15647, 15663, 15679, 15695, 15711, 15727, 15743, 15758, 15773, 15788, 15804,
- /* 482 */ 16350, 16381, 16409, 16438,
- /* 486 */ 16483, 16504, 16525, 16546, 16567, -1, -1, 16588, 16609, -1, -1, -1, 16631, 16652, 16673, 16695,
- /* 502 */ 16723, 16736, 16759, 16782, 16805, 16825, 16846, 16867, 16888, 16914, 16940, 16966, 16993, 17020, 17047,
- /* 517 */ 17086, 17111, 17137, 17163, 17189, 17214, 17239,
- /* 524 */ 17275, 17298, 17321, 17344,
- /* 528 */ 17378, 17402, 17426, 17450,
- /* 532 */ 17486, 17511, 17536, 17561,
- /* 536 */ 17604, 17621, 17638, 17655,
- /* 540 */ 16350, 16381, 16409, 17673,
- /* 544 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17801, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17815, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17829,
- /* 785 */ 19010, 19028, 19043,
- /* 788 */ 20659, 20669, 20682, 20696,
- /* 792 */ 20993, 21008, 21024, 21040,
- /* 796 */ 21205, 21215, 21233, 21243,
- /* 800 */ 16483, 16504, 16525, -1, 16567, 21676, 21698,
- /* 807 */ 16723, -1, -1, -1, 16805, 16825, 16846, 16867, 16888, 16914, 16940, 16966, 16993, 17020, 17047, -1, 21723, 21750,
- /* 825 */ 21778, 21788, 21793, 21798,
- /* 829 */ 21940, 21956,
- /* 831 */ 22452, 22475, 22498, 22521,
- /* 835 */ 22568, 22593, 22618, 22643,
- /* 839 */ 22693, 22717, 22741, 22765,
- /* 843 */ 22797, 22822, 22847, 22872,
- /* 847 */ 22906, 22931, 22956, 22981,
- /* 851 */ 23015, 23041, 23067, 23093,
- /* 855 */ 23129, 23152, 23175, 23198,
- /* 859 */ 23228, 23262, 23297, 23332,
- /* 863 */ 23375, 23409, 23444, 23479,
- /* 867 */ 23522, 23545, 23568, 23591,
- /* 871 */ 23621, 23655, 23690, 23725,
- /* 875 */ 23773, 23807, 23842, 23877,
- /* 879 */ 23951, 23979, 24007, 24035,
- /* 883 */ 24075, 24114, 24154, 24194,
- /* 887 */ 24252, 24291, 24331, 24371,
- /* 891 */ 24560, 24573, 24586, 24599, 24620, 24639, 24657, 24675, 24690, 24707, 24724, 24736, 24747, 24759, 24772, 24784,
- /* 907 */ 25008,
- /* 908 */ 25537, 25558, 25579, 25600, 25621,
- /* 913 */ 25915,
- /* 914 */ 26095, 26111, 26128, 26145, 26166,
- /* 919 */ 26259, 26275, 26291, 26308, 26325, 26346, 26370, 26394, 26417, 26440,
- /* 929 */ 26627, 26654, 26672, 26690, 26712, 26736, 26757, 26776, 26797, 26818, 26840,
- /* 940 */ 26902, 26908, -1, 26921, -1, -1, -1, 26934, -1, -1, -1, 26946, -1, 26958, -1, 26970,
- /* 956 */ 27477, 27512, 27547, 27579, 27611, 27643, 27675, 27709,
- /* 964 */ 27773, 27787, 27800, 27818, 27831, 27844, 27865, 27889,
- /* 972 */ 27965, 27976, 27986, 28002, 28028, 28044, 28070, 28086, 28112, 28128, 28154, -1, -1, 28179, 28200, 28231, 28248, 28269, 28286, 28307, 28328,
- /* 993 */ 28374, 28392, 28411, 28428, 28445,
- /* 998 */ 28588, 28603, 28624, 28646,
- /* 1002 */ 28680, 28696,
- /* 1004 */ 28863, 28874, 28884, 28895, 28907, 28920, 28934, 28946,
- /* 1012 */ 28974, 28984, 28993, 29003, 29014, 29026, 29039, 29050,
- /* 1020 */ 29471, 29482, 29492, 29516, -1, 29527, 29547,
- /* 1027 */ 29565, -1, -1, -1, -1, 29572, -1, -1, -1, -1, 29579, -1, -1, -1, -1, 29586, -1, 29593, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29600, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29607, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29614, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29621, -1, -1, -1, -1, 29628, -1, -1, -1, -1, 29635, -1, -1, -1, -1, 29642, -1, -1, -1, -1, -1, -1, 29649, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29656, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29663, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29670, -1, -1, -1, -1, -1, -1, 29677, -1, -1, -1, -1, 29684, -1, -1, -1, -1, 29691, -1, -1, -1, -1, 29698, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29705, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29712, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29719, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 29726, -1, 29733, -1, -1, -1, -1, 29740, -1, -1, -1, -1, 29747, -1, -1, -1, -1, 29754,
- /* 1283 */ 29837, 29844, 29864, 29869,
- /* 1287 */ 30015, 30028, 30047, 30069,
- /* 1291 */ 30530, 30550,
- /* 1293 */ 30562, 30576, 30589,
- /* 1296 */ 32233, 32255, 32275, 32296, 32317,
- /* 1301 */ 32372, 32389, 32405, 32420, 32436, 32452, 32476, 32500, 32524, 32548, 32569, 32585, 32600, 32624, 32647, 32671, 32691, 32710,
- /* 1319 */ 32753, 32766, 32780, 32792, 32805,
- /* 1324 */ 32889, 32906, 32923, 32939, 32955, 32971, 32987, 33004, 33022,
- /* 1333 */ 33094, 33101, 33115, 33129, 33143, 33157,
- /* 1339 */ 33167, 33179, 33190, 33201,
- /* 1343 */ 33373, -1, -1, 33388,
- /* 1347 */ 34188, 34211, 34234,
- /* 1350 */ 34337, 34350, 34363,
- /* 1353 */ 34375, 34393, 34413, 34433,
- /* 1357 */ 34461, 34477, 34494,
- /* 1360 */ 34521, 34536,
- /* 1362 */ -1, 34654, 34677, 34700, 34723, 34738, 34751, 34763, 34780, -1, 34799, 34819, 34839, 34858, 35589, 34870, 34887, 34904, 35601, 35612, 34936, 34965, 34976, 35002, 35020, 35037, 35056, 35074, 35093, 35109, 35125, 35145, 35167, 35189, 35203, 35222, 35238, 35632, -1, 35248, 35263, -1, 35281, 35304, 35329, 35351, 35376, 7971, 7998, 35398, -1, 35426, 35445, 35463, 35483, 35502, 35522, 35546, 35567, -1, -1, 35646, 35666,
- /* 1425 */ 36208, 36221, 36233,
- /* 1428 */ 36245, 36258, 36270,
- /* 1431 */ 36284, 36298, 36310,
- /* 1434 */ 36472, 36485, 36499,
- /* 1437 */ 36509, 36522, 36532, 36546,
- /* 1441 */ 36574, 36587, 36599, 36618,
- /* 1445 */ 34521, 34536, 36715,
- /* 1448 */ 36733, 36759, 36785, 36810,
- /* 1452 */ 37635, 37646, 37654, 37670,
- /* 1456 */ 37696, 37722, 37747, 37772, 37795, 37816, 37843, 37872,
- /* 1464 */ 38219, 38231, 38244, 38257,
- /* 1468 */ 38277, 38291, 38299, 38308, 38318, 38327, 38339, 38354, 38369, 38386, 38403, 38417, 38429, -1, 38447, -1, 38465, 38477, 38491, 38505, 38519, 38530, 38541,
- /* 1491 */ 38562, 38575, -1, -1, 38588, 38600, 38612, 38624,
- /* 1499 */ 38649, 38658, 38667, 38680,
- /* 1503 */ 38757, 38772, 38790, 38818, 38848, 38879, 38907, 38937,
- /* 1511 */ 13470, 13015, 13214, 953,
- /* 1515 */ 245, 39862, 20659, 953,
- /* 1519 */ 40481, 40497, 40514, 40541,
+ /* 0 */ 509, 31780, 1753, 15296,
+ /* 4 */ 31818, 31838, 31850, 31856, 8050, 31860,
+ /* 10 */ 31906, 31856, 391, 31915,
+ /* 14 */ 31945, 31856, 8540, 31954,
+ /* 18 */ 32033, 32038, 32046, 32054,
+ /* 22 */ 32080, 31829,
+ /* 24 */ 32095, 32105,
+ /* 26 */ 36348, 36359, 36375, 36390, 36406, 36420, 36433, 36448, 36463, 36478, 36490, 36509, 36529, 36547, 36566, 36581, 36596, 36618, 36633, 36648, 36663, 36679, 36693, 36720, 36747, 36774, 36801, 36825, 36845,
+ /* 55 */ 36864, 36885,
+ /* 57 */ 36348, 36359, 36375, 36390, 36406, 36420, 36433, -1, -1, 36478, 36490, 36509, 36529, 36547, -1, -1, 36596, 36618, 36633, 36648, 36663, 36679, 36693, 36720, 36747, 36774, 36801, 36825, 36845,
+ /* 86 */ 38826, 38838, 38850, 38862,
+ /* 90 */ 39207, -1, -1, -1, -1, -1, -1, -1, 39218, 39230, -1, -1, 39242, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39253,
+ /* 123 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39279,
+ /* 186 */ 39358, 39367, 39376, 39394, 39412, 39421, 39430, 39439,
+ /* 194 */ 39488, 39509, 39530, 39553, 39576, 39596, 39616, 39641,
+ /* 202 */ 39673, 39697, 39715, 39734, 39754, 39773, 39795, 39820, 39845, 39872, 39899, 39923, 39945, 39973, 39998, 40026,
+ /* 218 */ 40134, 40146, 40165, 40184,
+ /* 222 */ 40211, 40235, 40253, 40272, 40292, 40311, 40333, 40358, 40383, 40410, 40437, 40461, 40483, 40511, 40536, 40564, 40592, 40614, 40638, 40662, 40686, 40707, 40728, 40753, 40782, 40807, 40833, 40856, 40880, 40907, 40935, 40963, 40991, 41013, 41035, 41059, 41079, 41099, 41119, 41139, 41159, 41179, 41199, 41238, 41277, 41306, 41335, 41364, 41393, 41422, 41451, 41482, 41512, 41543, 41573, 41603, 41634, 41665, 41685, 41707, 41725, 41752, 41776, 41802,
+ /* 286 */ 41853, 41874, 41895, 41918, 41941, 41961, 41981, 42006, 42027, 42053, 42073, 42095, 42121, 42142, 42166, 42193,
+ /* 302 */ 42313, 42332, 42351, 42370, 42389, 42408, 42427, 42446, 42465, 42480, 42495, 42510, 42527, 42548, 42569, 42589,
+ /* 318 */ 42771, 42783, 42797, 42821, 42851, 42876, 42907, 42927,
+ /* 326 */ 42993, 43020, 43046, 43073, 43104, 43133, 43163, 43197,
+ /* 334 */ 43419, 43442,
+ /* 336 */ 43419, 43442, 43482, 43511,
+ /* 340 */ 43557, 43578, 43600,
+ /* 343 */ 43742, 43774, 43807, 43840,
+ /* 347 */ 44019,
+ /* 348 */ 44318, 44334, 44350, 44366, 44382, 44398, 44414, 44430, 44446, 44462, 44478, 44494, 44509, 44524, 44539, 44555,
+ /* 364 */ 44982, 45013, 45041, 45070,
+ /* 368 */ 45115, 45136, 45157, 45178, 45199, -1, -1, 45220, 45241, -1, -1, -1, 45263, 45284, 45305, 45327,
+ /* 384 */ 45355, 45368, 45391, 45414, 45437, 45457, 45478, 45499, 45520, 45546, 45572, 45598, 45625, 45652, 45679,
+ /* 399 */ 45718, 45743, 45769, 45795, 45821, 45846, 45871,
+ /* 406 */ 45907, 45930, 45953, 45976,
+ /* 410 */ 46010, 46034, 46058, 46082,
+ /* 414 */ 46118, 46143, 46168, 46193,
+ /* 418 */ 46236, 46253, 46270, 46287,
+ /* 422 */ 44982, 45013, 45041, 46315,
+ /* 426 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46430, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46444, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46458,
+ /* 667 */ 47349, 47367, 47382,
+ /* 670 */ 48894, 48904, 48917, 48931,
+ /* 674 */ 49228, 49243, 49259, 49275,
+ /* 678 */ 49440, 49450, 49468, 49478,
+ /* 682 */ 45115, 45136, 45157, -1, 45199, 49877, 49899,
+ /* 689 */ 45355, -1, -1, -1, 45437, 45457, 45478, 45499, 45520, 45546, 45572, 45598, 45625, 45652, 45679, -1, 49924, 49951,
+ /* 707 */ 49979, 49989, 49994, 49999,
+ /* 711 */ 50141, 50157,
+ /* 713 */ 50635, 50658, 50681, 50704,
+ /* 717 */ 50751, 50776, 50801, 50826,
+ /* 721 */ 50876, 50900, 50924, 50948,
+ /* 725 */ 50980, 51005, 51030, 51055,
+ /* 729 */ 51089, 51114, 51139, 51164,
+ /* 733 */ 51198, 51224, 51250, 51276,
+ /* 737 */ 51312, 51335, 51358, 51381,
+ /* 741 */ 51411, 51445, 51480, 51515,
+ /* 745 */ 51558, 51592, 51627, 51662,
+ /* 749 */ 51705, 51728, 51751, 51774,
+ /* 753 */ 51804, 51838, 51873, 51908,
+ /* 757 */ 51956, 51990, 52025, 52060,
+ /* 761 */ 52108, 52136, 52164, 52192,
+ /* 765 */ 52232, 52271, 52311, 52351,
+ /* 769 */ 52409, 52448, 52488, 52528,
+ /* 773 */ 52681, 52694, 52707, 52720, 52741, 52760, 52778, 52796, 52811, 52828, 52845, 52857, 52868, 52880, 52893, 52905,
+ /* 789 */ 53129,
+ /* 790 */ 53658, 53679, 53700, 53721, 53742,
+ /* 795 */ 54003,
+ /* 796 */ 54183, 54199, 54216, 54233, 54254,
+ /* 801 */ 54347, 54363, 54379, 54396, 54413, 54434, 54458, 54482, 54505, 54528,
+ /* 811 */ 54715, 54742, 54760, 54778, 54800, 54824, 54845, 54864, 54885, 54906, 54928,
+ /* 822 */ 54990, 54996, -1, 55009, -1, -1, -1, 55022, -1, -1, -1, 55034, -1, 55046, -1, 55058,
+ /* 838 */ 55565, 55600, 55635, 55667, 55699, 55731, 55763, 55797,
+ /* 846 */ 55861, 55875, 55888, 55906, 55919, 55932, 55953, 55977,
+ /* 854 */ 56053, 56064, 56074, 56090, 56116, 56132, 56158, 56174, 56200, 56216, 56242, -1, -1, 56267, 56288, 56319, 56336, 56357, 56374, 56395, 56416,
+ /* 875 */ 56462, 56480, 56499, 56516, 56533,
+ /* 880 */ 56661, 56676, 56697, 56719,
+ /* 884 */ 56753, 56769,
+ /* 886 */ 56877, 56888, 56898, 56909, 56921, 56934, 56948, 56960,
+ /* 894 */ 56988, 56998, 57007, 57017, 57028, 57040, 57053, 57064,
+ /* 902 */ 57485, 57496, 57506, 57530, -1, 57541, 57561,
+ /* 909 */ 57579, -1, -1, -1, -1, 57586, -1, -1, -1, -1, 57593, -1, -1, -1, -1, 57600, -1, 57607, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57614, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57621, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57628, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57635, -1, -1, -1, -1, 57642, -1, -1, -1, -1, 57649, -1, -1, -1, -1, 57656, -1, -1, -1, -1, -1, -1, 57663, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57670, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57677, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57684, -1, -1, -1, -1, -1, -1, 57691, -1, -1, -1, -1, 57698, -1, -1, -1, -1, 57705, -1, -1, -1, -1, 57712, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57719, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57726, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57733, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57740, -1, 57747, -1, -1, -1, -1, 57754, -1, -1, -1, -1, 57761, -1, -1, -1, -1, 57768,
+ /* 1165 */ 57851, 57858, 57878, 57883,
+ /* 1169 */ 58029, 58042, 58061, 58083,
+ /* 1173 */ 58464, 58484,
+ /* 1175 */ 58496, 58510, 58523,
+ /* 1178 */ 60045, 60067, 60087, 60108, 60129,
+ /* 1183 */ 60172, 60189, 60205, 60220, 60236, 60252, 60276, 60300, 60324, 60348, 60369, 60385, 60400, 60424, 60447, 60471, 60491, 60510,
+ /* 1201 */ 60553, 60566, 60580, 60592, 60605,
+ /* 1206 */ 60678, 60695, 60712, 60728, 60744, 60760, 60776, 60793, 60811,
+ /* 1215 */ 60874, 60881, 60895, 60909, 60923, 60937,
+ /* 1221 */ 60947, 60959, 60970, 60981,
+ /* 1225 */ 61153, -1, -1, 61168,
+ /* 1229 */ 61807, 61830, 61853,
+ /* 1232 */ 61956, 61969, 61982,
+ /* 1235 */ 61994, 62012, 62032, 62052,
+ /* 1239 */ 62080, 62096, 62113,
+ /* 1242 */ 62140, 62155,
+ /* 1244 */ -1, 62233, 62256, 62279, 62302, 62317, 62330, 62342, 62359, -1, 62378, 62398, 62418, 62437, -1, 62449, 62466, 62483, 62499, -1, 62515, 62544, 62555, 62581, 62599, 62616, 62635, 62653, 62672, 62688, 62704, 62724, 62746, 62768, 62782, 62801, 62817, -1, -1, 62827, 62842, -1, 62860, 62883, 62908, 62930, 62955, 38463, 38490, 62977, -1, 63005, 63024, 63042, 63062, 63081, 63101, 63125, 63146,
+ /* 1303 */ 63589, 63602, 63614,
+ /* 1306 */ 63626, 63639, 63651,
+ /* 1309 */ 63665, 63679, 63691,
+ /* 1312 */ 63822, 63835, 63849,
+ /* 1315 */ 63859, 63872, 63882, 63896,
+ /* 1319 */ 63924, 63937, 63949, 63968,
+ /* 1323 */ 62140, 62155, 64065,
+ /* 1326 */ 64083, 64109, 64135, 64160,
+ /* 1330 */ 64939, 64950, 64958, 64974,
+ /* 1334 */ 65000, 65026, 65051, 65076, 65099, 65120, 65147, 65176,
+ /* 1342 */ 65513, 65525, 65538, 65551,
+ /* 1346 */ 65571, 65585, 65593, 65602, 65612, 65621, 65633, 65648, 65663, 65680, 65697, 65711, 65723, -1, 65741, -1, 65759, 65771, 65785, 65799, 65813, 65824, 65835,
+ /* 1369 */ 65856, 65869, -1, -1, 65882, 65894, 65906, 65918,
+ /* 1377 */ 65943, 65952, 65961, 65974,
+ /* 1381 */ 66051, 66066, 66084, 66112, 66142, 66173, 66201, 66231,
+ /* 1389 */ 40211, 40235, 40253, 40272, 40292, 40311, 40333, 40358, 40383, 40410, 40437, 40461, 40483, 40511, 40536, 40564, 40592, 40614, 40638, 40662, 40686, 40707, 40728, 40753, 40782, 40807, 40833, 40856, 40880, 40907, 40935, 67123, 40991, 41013, 41035, 41059, 41079, 41099, 41119, 41139, 41159, 41179, 67143, 67171, 67210, 67249, 67271, 67299, 67327, 67360, 67388, 67416, 67449, 67473, 67500, 67533, 67572, 41665, 41685, 67600, 67622, 67644, 67668, 41802,
+ /* 1453 */ 41853, 41874, 41895, 41918, 41941, 41961, 67712, 42006, 67738, 42053, 67762,
+ /* 1464 */ 67804, 67820, 67836, 67852, 67868, 67884, 67900, 67918, 67935, 67953, 67970, 67987, 68005,
+ /* 1477 */ 68040, 68056, 68072, 68088, 68104, 68120, 68136, 68152, 68168, 68185, 68202, 68219, 68237, 68255,
+ /* 1491 */ 68292, 68310, 68328, 68346, 68364, 68382, 68400, 68418, 68436, 68454,
+ /* 1501 */ 68522, 68538, 68554, 68570, 68586, 68602,
+ /* 1507 */ 245, 69573, 48894, 8050,
+ /* 1511 */ -1, 62233, 62256, 62279, 62302, 62317, 62330, 62342, 62359, -1, 62378, 62398, 62418, 62437, 70608, 62449, 62466, 62483, 70620, 70631, 62515, 62544, 62555, 62581, 62599, 62616, 62635, 62653, -1, -1, 62704, 62724, 62746, 62768, 62782, 62801, 62817, 70651, -1, 62827, 62842, -1, 62860, 62883, 62908, 62930, 62955, 38463, 38490, 62977, -1, 63005, 63024, 63042, 63062, 63081, 63101, 63125, 63146, -1, -1, 70665, 70685,
+ /* 1574 */ 70886, 70902, 70919, 70946,
+ /* 1578 */ 42477, 42492, 42507, 8050,
};
#endif
diff -Nru mesa-17.2.4/src/amd/common/sid_tables.py mesa-17.3.3/src/amd/common/sid_tables.py
--- mesa-17.2.4/src/amd/common/sid_tables.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/common/sid_tables.py 2018-01-18 21:30:28.000000000 +0000
@@ -25,8 +25,12 @@
*/
'''
-import sys
+import collections
+import functools
+import itertools
+import os.path
import re
+import sys
class StringTable:
@@ -131,91 +135,203 @@
self.s_name = s_name
self.name = strip_prefix(s_name)
self.values = []
- self.varname_values = '%s__%s__values' % (reg.r_name.lower(), self.name.lower())
+
+ def format(self, string_table, idx_table):
+ if len(self.values):
+ values_offsets = []
+ for value in self.values:
+ while value[1] >= len(values_offsets):
+ values_offsets.append(-1)
+ values_offsets[value[1]] = string_table.add(strip_prefix(value[0]))
+ return '{%s, %s(~0u), %s, %s}' % (
+ string_table.add(self.name), self.s_name,
+ len(values_offsets), idx_table.add(values_offsets))
+ else:
+ return '{%s, %s(~0u)}' % (string_table.add(self.name), self.s_name)
+
+ def __eq__(self, other):
+ return (self.s_name == other.s_name and
+ self.name == other.name and
+ len(self.values) == len(other.values) and
+ all(a[0] == b[0] and a[1] == b[1] for a, b, in zip(self.values, other.values)))
+
+ def __ne__(self, other):
+ return not (self == other)
+
+
+class FieldTable:
+ """
+ A class for collecting multiple arrays of register fields in a single big
+ array that is used by indexing (to avoid relocations in the resulting binary)
+ """
+ def __init__(self):
+ self.table = []
+ self.idxs = set()
+ self.name_to_idx = collections.defaultdict(lambda: [])
+
+ def add(self, array):
+ """
+ Add an array of Field objects, and return the index of where to find
+ the array in the table.
+ """
+ # Check if we can find the array in the table already
+ for base_idx in self.name_to_idx.get(array[0].name, []):
+ if base_idx + len(array) > len(self.table):
+ continue
+
+ for i, a in enumerate(array):
+ b = self.table[base_idx + i]
+ if a != b:
+ break
+ else:
+ return base_idx
+
+ base_idx = len(self.table)
+ self.idxs.add(base_idx)
+
+ for field in array:
+ self.name_to_idx[field.name].append(len(self.table))
+ self.table.append(field)
+
+ return base_idx
+
+ def emit(self, filp, string_table, idx_table):
+ """
+ Write
+ static const struct si_field sid_fields_table[] = { ... };
+ to filp.
+ """
+ idxs = sorted(self.idxs) + [len(self.table)]
+
+ filp.write('static const struct si_field sid_fields_table[] = {\n')
+
+ for start, end in zip(idxs, idxs[1:]):
+ filp.write('\t/* %s */\n' % (start))
+ for field in self.table[start:end]:
+ filp.write('\t%s,\n' % (field.format(string_table, idx_table)))
+
+ filp.write('};\n')
+
class Reg:
def __init__(self, r_name):
self.r_name = r_name
self.name = strip_prefix(r_name)
self.fields = []
- self.own_fields = True
+
+ def __eq__(self, other):
+ if not isinstance(other, Reg):
+ return False
+ return (self.r_name == other.r_name and
+ self.name == other.name and
+ len(self.fields) == len(other.fields) and
+ all(a == b for a, b in zip(self.fields, other.fields)))
+
+ def __ne__(self, other):
+ return not (self == other)
def strip_prefix(s):
'''Strip prefix in the form ._.*_, e.g. R_001234_'''
return s[s[2:].find('_')+3:]
-def parse(filename, regs, packets):
- stream = open(filename)
-
- for line in stream:
- if not line.startswith('#define '):
- continue
-
- line = line[8:].strip()
- if line.startswith('R_'):
- name = line.split()[0]
-
- for it in regs:
- if it.r_name == name:
- reg = it
- break
- else:
- reg = Reg(name)
- regs.append(reg)
+class Asic:
+ """
+ Store the registers of one ASIC class / group of classes.
+ """
+ def __init__(self, name):
+ self.name = name
+ self.registers = []
- elif line.startswith('S_'):
- name = line[:line.find('(')]
+ def parse(self, filp, packets, older_asics):
+ """
+ Parse registers from the given header file. Packets are separately
+ stored in the packets array.
+ """
+ for line in filp:
+ if not line.startswith('#define '):
+ continue
+
+ line = line[8:].strip()
+
+ if line.startswith('R_'):
+ name = line.split()[0]
+
+ for it in self.registers:
+ if it.r_name == name:
+ sys.exit('Duplicate register define: %s' % (name))
+ else:
+ reg = Reg(name)
+ self.registers.append(reg)
- for it in reg.fields:
- if it.s_name == name:
- field = it
- break
- else:
- field = Field(reg, name)
- reg.fields.append(field)
+ elif line.startswith('S_'):
+ name = line[:line.find('(')]
- elif line.startswith('V_'):
- split = line.split()
- name = split[0]
- value = int(split[1], 0)
-
- for (n,v) in field.values:
- if n == name:
- if v != value:
- sys.exit('Value mismatch: name = ' + name)
-
- field.values.append((name, value))
-
- elif line.startswith('PKT3_') and line.find('0x') != -1 and line.find('(') == -1:
- packets.append(line.split()[0])
-
- # Copy fields to indexed registers which have their fields only defined
- # at register index 0.
- # For example, copy fields from CB_COLOR0_INFO to CB_COLORn_INFO, n > 0.
- match_number = re.compile('[0-9]+')
- reg_dict = dict()
-
- # Create a dict of registers with fields and '0' in their name
- for reg in regs:
- if len(reg.fields) and reg.name.find('0') != -1:
- reg_dict[reg.name] = reg
-
- # Assign fields
- for reg in regs:
- if not len(reg.fields):
- reg0 = reg_dict.get(match_number.sub('0', reg.name))
- if reg0 != None:
- reg.fields = reg0.fields
- reg.fields_owner = reg0
- reg.own_fields = False
+ for it in reg.fields:
+ if it.s_name == name:
+ sys.exit('Duplicate field define: %s' % (name))
+ else:
+ field = Field(reg, name)
+ reg.fields.append(field)
+ elif line.startswith('V_'):
+ split = line.split()
+ name = split[0]
+ value = int(split[1], 0)
+
+ for (n,v) in field.values:
+ if n == name:
+ sys.exit('Duplicate value define: name = ' + name)
+
+ field.values.append((name, value))
+
+ elif line.startswith('PKT3_') and line.find('0x') != -1 and line.find('(') == -1:
+ packets.append(line.split()[0])
+
+ # Copy values for corresponding fields from older ASICs if they were
+ # not redefined
+ for reg in self.registers:
+ old_reg = False
+ for field in reg.fields:
+ if len(field.values) > 0:
+ continue
+ if old_reg is False:
+ for old_reg in itertools.chain(
+ *(asic.registers for asic in reversed(older_asics))):
+ if old_reg.name == reg.name:
+ break
+ else:
+ old_reg = None
+ if old_reg is not None:
+ for old_field in old_reg.fields:
+ if old_field.name == field.name:
+ field.values = old_field.values
+ break
+
+ # Copy fields to indexed registers which have their fields only defined
+ # at register index 0.
+ # For example, copy fields from CB_COLOR0_INFO to CB_COLORn_INFO, n > 0.
+ match_number = re.compile('[0-9]+')
+ reg_dict = dict()
+
+ # Create a dict of registers with fields and '0' in their name
+ for reg in self.registers:
+ if len(reg.fields) and reg.name.find('0') != -1:
+ reg_dict[reg.name] = reg
+
+ # Assign fields
+ for reg in self.registers:
+ if not len(reg.fields):
+ reg0 = reg_dict.get(match_number.sub('0', reg.name))
+ if reg0 != None:
+ reg.fields = reg0.fields
-def write_tables(regs, packets):
+def write_tables(asics, packets):
strings = StringTable()
strings_offsets = IntTable("int")
+ fields = FieldTable()
print '/* This file is autogenerated by sid_tables.py from sid.h. Do not edit directly. */'
print
@@ -250,40 +366,28 @@
print '};'
print
- print 'static const struct si_field sid_fields_table[] = {'
-
- fields_idx = 0
- for reg in regs:
- if len(reg.fields) and reg.own_fields:
- print '\t/* %s */' % (fields_idx)
-
- reg.fields_idx = fields_idx
+ regs = {}
+ for asic in asics:
+ print 'static const struct si_reg %s_reg_table[] = {' % (asic.name)
+ for reg in asic.registers:
+ # Only output a register that was changed or added relative to
+ # the previous generation
+ previous = regs.get(reg.r_name, None)
+ if previous == reg:
+ continue
+
+ if len(reg.fields):
+ print '\t{%s, %s, %s, %s},' % (strings.add(reg.name), reg.r_name,
+ len(reg.fields), fields.add(reg.fields))
+ else:
+ print '\t{%s, %s},' % (strings.add(reg.name), reg.r_name)
- for field in reg.fields:
- if len(field.values):
- values_offsets = []
- for value in field.values:
- while value[1] >= len(values_offsets):
- values_offsets.append(-1)
- values_offsets[value[1]] = strings.add(strip_prefix(value[0]))
- print '\t{%s, %s(~0u), %s, %s},' % (
- strings.add(field.name), field.s_name,
- len(values_offsets), strings_offsets.add(values_offsets))
- else:
- print '\t{%s, %s(~0u)},' % (strings.add(field.name), field.s_name)
- fields_idx += 1
+ regs[reg.r_name] = reg
+ print '};'
+ print
- print '};'
- print
+ fields.emit(sys.stdout, strings, strings_offsets)
- print 'static const struct si_reg sid_reg_table[] = {'
- for reg in regs:
- if len(reg.fields):
- print '\t{%s, %s, %s, %s},' % (strings.add(reg.name), reg.r_name,
- len(reg.fields), reg.fields_idx if reg.own_fields else reg.fields_owner.fields_idx)
- else:
- print '\t{%s, %s},' % (strings.add(reg.name), reg.r_name)
- print '};'
print
strings.emit(sys.stdout, "sid_strings")
@@ -297,11 +401,16 @@
def main():
- regs = []
+ asics = []
packets = []
for arg in sys.argv[1:]:
- parse(arg, regs, packets)
- write_tables(regs, packets)
+ basename = os.path.basename(arg)
+ m = re.match(r'(.*)\.h', basename)
+ asic = Asic(m.group(1))
+ with open(arg) as filp:
+ asic.parse(filp, packets, asics)
+ asics.append(asic)
+ write_tables(asics, packets)
if __name__ == '__main__':
diff -Nru mesa-17.2.4/src/amd/Makefile.common.am mesa-17.3.3/src/amd/Makefile.common.am
--- mesa-17.2.4/src/amd/Makefile.common.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/Makefile.common.am 2018-01-18 21:30:28.000000000 +0000
@@ -58,13 +58,8 @@
$(AMD_COMMON_FILES) \
$(AMD_COMPILER_FILES) \
$(AMD_DEBUG_FILES) \
- $(AMD_GENERATED_FILES)
-
-# nir_to_llvm requires LLVM 3.9, which is only required as a minimum when
-# radv is built.
-if HAVE_RADEON_VULKAN
-common_libamd_common_la_SOURCES += $(AMD_NIR_FILES)
-endif
+ $(AMD_GENERATED_FILES) \
+ $(AMD_NIR_FILES)
endif
common_libamd_common_la_LIBADD = $(LIBELF_LIBS)
diff -Nru mesa-17.2.4/src/amd/Makefile.in mesa-17.3.3/src/amd/Makefile.in
--- mesa-17.2.4/src/amd/Makefile.in 2017-10-30 14:49:58.000000000 +0000
+++ mesa-17.3.3/src/amd/Makefile.in 2018-01-18 21:30:38.000000000 +0000
@@ -153,13 +153,10 @@
host_triplet = @host@
target_triplet = @target@
@HAVE_GALLIUM_LLVM_TRUE@am__append_1 = $(COMMON_LIBS)
-
-# nir_to_llvm requires LLVM 3.9, which is only required as a minimum when
-# radv is built.
-@HAVE_GALLIUM_LLVM_TRUE@@HAVE_RADEON_VULKAN_TRUE@am__append_2 = $(AMD_NIR_FILES)
subdir = src/amd
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -208,10 +205,10 @@
common/ac_binary.c common/ac_binary.h common/ac_exp_param.h \
common/ac_llvm_build.c common/ac_llvm_build.h \
common/ac_llvm_helper.cpp common/ac_llvm_util.c \
- common/ac_llvm_util.h common/ac_shader_info.c \
- common/ac_shader_info.h common/ac_debug.c common/ac_debug.h \
- common/sid_tables.h common/ac_nir_to_llvm.c \
- common/ac_nir_to_llvm.h
+ common/ac_llvm_util.h common/ac_shader_abi.h \
+ common/ac_shader_info.c common/ac_shader_info.h \
+ common/ac_debug.c common/ac_debug.h common/sid_tables.h \
+ common/ac_nir_to_llvm.c common/ac_nir_to_llvm.h
am__objects_2 = common/common_libamd_common_la-ac_gpu_info.lo \
common/common_libamd_common_la-ac_surface.lo
am__objects_3 = common/common_libamd_common_la-ac_binary.lo \
@@ -222,11 +219,10 @@
am__objects_4 = common/common_libamd_common_la-ac_debug.lo
am__objects_5 =
am__objects_6 = common/common_libamd_common_la-ac_nir_to_llvm.lo
-@HAVE_GALLIUM_LLVM_TRUE@@HAVE_RADEON_VULKAN_TRUE@am__objects_7 = $(am__objects_6)
@HAVE_GALLIUM_LLVM_TRUE@am_common_libamd_common_la_OBJECTS = \
@HAVE_GALLIUM_LLVM_TRUE@ $(am__objects_2) $(am__objects_3) \
@HAVE_GALLIUM_LLVM_TRUE@ $(am__objects_4) $(am__objects_5) \
-@HAVE_GALLIUM_LLVM_TRUE@ $(am__objects_7)
+@HAVE_GALLIUM_LLVM_TRUE@ $(am__objects_6)
common_libamd_common_la_OBJECTS = \
$(am_common_libamd_common_la_OBJECTS)
common_libamd_common_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -456,9 +452,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -512,6 +508,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -523,12 +521,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -615,7 +616,6 @@
COMMON_HEADER_FILES = \
common/gfx9d.h \
common/sid.h \
- common/r600d_common.h \
common/amd_family.h \
common/amd_kernel_code_t.h \
common/amdgpu_id.h
@@ -662,6 +662,7 @@
common/ac_llvm_helper.cpp \
common/ac_llvm_util.c \
common/ac_llvm_util.h \
+ common/ac_shader_abi.h \
common/ac_shader_info.c \
common/ac_shader_info.h
@@ -731,11 +732,13 @@
@HAVE_GALLIUM_LLVM_TRUE@ $(VISIBILITY_CXXFLAGS) \
@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_CXXFLAGS)
-@HAVE_GALLIUM_LLVM_TRUE@common_libamd_common_la_SOURCES = \
+@HAVE_GALLIUM_LLVM_TRUE@common_libamd_common_la_SOURCES = \
@HAVE_GALLIUM_LLVM_TRUE@ $(AMD_COMMON_FILES) \
@HAVE_GALLIUM_LLVM_TRUE@ $(AMD_COMPILER_FILES) \
@HAVE_GALLIUM_LLVM_TRUE@ $(AMD_DEBUG_FILES) \
-@HAVE_GALLIUM_LLVM_TRUE@ $(AMD_GENERATED_FILES) $(am__append_2)
+@HAVE_GALLIUM_LLVM_TRUE@ $(AMD_GENERATED_FILES) \
+@HAVE_GALLIUM_LLVM_TRUE@ $(AMD_NIR_FILES)
+
common_libamd_common_la_LIBADD = $(LIBELF_LIBS)
BUILT_SOURCES = $(AMD_GENERATED_FILES)
all: $(BUILT_SOURCES)
diff -Nru mesa-17.2.4/src/amd/Makefile.sources mesa-17.3.3/src/amd/Makefile.sources
--- mesa-17.2.4/src/amd/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -1,7 +1,6 @@
COMMON_HEADER_FILES = \
common/gfx9d.h \
common/sid.h \
- common/r600d_common.h \
common/amd_family.h \
common/amd_kernel_code_t.h \
common/amdgpu_id.h
@@ -48,6 +47,7 @@
common/ac_llvm_helper.cpp \
common/ac_llvm_util.c \
common/ac_llvm_util.h \
+ common/ac_shader_abi.h \
common/ac_shader_info.c \
common/ac_shader_info.h
diff -Nru mesa-17.2.4/src/amd/meson.build mesa-17.3.3/src/amd/meson.build
--- mesa-17.2.4/src/amd/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,27 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_amd = include_directories('.')
+
+subdir('addrlib')
+subdir('common')
+if with_amd_vk
+ subdir('vulkan')
+endif
diff -Nru mesa-17.2.4/src/amd/vulkan/dev_icd.json.in mesa-17.3.3/src/amd/vulkan/dev_icd.json.in
--- mesa-17.2.4/src/amd/vulkan/dev_icd.json.in 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/dev_icd.json.in 2018-01-18 21:30:28.000000000 +0000
@@ -1,7 +1,7 @@
{
"file_format_version": "1.0.0",
"ICD": {
- "library_path": "@build_libdir@/libvulkan_radeon.so",
+ "library_path": "@libvulkan_radeon_path@",
"api_version": "1.0.3"
}
}
diff -Nru mesa-17.2.4/src/amd/vulkan/Makefile.am mesa-17.3.3/src/amd/vulkan/Makefile.am
--- mesa-17.2.4/src/amd/vulkan/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -90,13 +90,13 @@
if HAVE_PLATFORM_WAYLAND
AM_CPPFLAGS += \
- $(WAYLAND_CFLAGS) \
+ $(WAYLAND_CLIENT_CFLAGS) \
-DVK_USE_PLATFORM_WAYLAND_KHR
VULKAN_SOURCES += $(VULKAN_WSI_WAYLAND_FILES)
VULKAN_LIB_DEPS += \
- $(WAYLAND_LIBS)
+ $(WAYLAND_CLIENT_LIBS)
endif
noinst_LTLIBRARIES = libvulkan_common.la
@@ -107,12 +107,19 @@
vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
-radv_entrypoints.c: radv_entrypoints_gen.py $(vulkan_api_xml)
+radv_entrypoints.c: radv_entrypoints_gen.py radv_extensions.py $(vulkan_api_xml)
$(MKDIR_GEN)
$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py \
--xml $(vulkan_api_xml) --outdir $(builddir)
radv_entrypoints.h: radv_entrypoints.c
+radv_extensions.c: radv_extensions.py \
+ $(vulkan_api_xml)
+ $(MKDIR_GEN)
+ $(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_extensions.py \
+ --xml $(vulkan_api_xml) \
+ --out $@
+
vk_format_table.c: vk_format_table.py \
vk_format_parse.py \
vk_format_layout.csv
@@ -125,6 +132,7 @@
dev_icd.json.in \
radeon_icd.json.in \
radv_entrypoints_gen.py \
+ radv_extensions.py \
vk_format_layout.csv \
vk_format_parse.py \
vk_format_table.py
@@ -149,7 +157,7 @@
dev_icd.json : dev_icd.json.in
$(AM_V_GEN) $(SED) \
- -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
+ -e "s#@libvulkan_radeon_path@#${abs_top_builddir}/${LIB_DIR}/libvulkan_radeon.so#" \
< $(srcdir)/dev_icd.json.in > $@
radeon_icd.@host_cpu@.json : radeon_icd.json.in
diff -Nru mesa-17.2.4/src/amd/vulkan/Makefile.in mesa-17.3.3/src/amd/vulkan/Makefile.in
--- mesa-17.2.4/src/amd/vulkan/Makefile.in 2017-10-30 14:49:58.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/Makefile.in 2018-01-18 21:30:38.000000000 +0000
@@ -144,16 +144,17 @@
@HAVE_PLATFORM_X11_TRUE@am__append_2 = $(VULKAN_WSI_X11_FILES)
@HAVE_PLATFORM_X11_TRUE@am__append_3 = $(XCB_DRI3_LIBS)
@HAVE_PLATFORM_WAYLAND_TRUE@am__append_4 = \
-@HAVE_PLATFORM_WAYLAND_TRUE@ $(WAYLAND_CFLAGS) \
+@HAVE_PLATFORM_WAYLAND_TRUE@ $(WAYLAND_CLIENT_CFLAGS) \
@HAVE_PLATFORM_WAYLAND_TRUE@ -DVK_USE_PLATFORM_WAYLAND_KHR
@HAVE_PLATFORM_WAYLAND_TRUE@am__append_5 = $(VULKAN_WSI_WAYLAND_FILES)
@HAVE_PLATFORM_WAYLAND_TRUE@am__append_6 = \
-@HAVE_PLATFORM_WAYLAND_TRUE@ $(WAYLAND_LIBS)
+@HAVE_PLATFORM_WAYLAND_TRUE@ $(WAYLAND_CLIENT_LIBS)
subdir = src/amd/vulkan
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -201,41 +202,42 @@
LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
libvulkan_common_la_LIBADD =
am__libvulkan_common_la_SOURCES_DIST = radv_entrypoints.c \
- radv_entrypoints.h radv_cmd_buffer.c radv_cs.h radv_debug.h \
- radv_device.c radv_descriptor_set.c radv_descriptor_set.h \
- radv_formats.c radv_image.c radv_meta.c radv_meta.h \
- radv_meta_blit.c radv_meta_blit2d.c radv_meta_buffer.c \
- radv_meta_bufimage.c radv_meta_clear.c radv_meta_copy.c \
- radv_meta_decompress.c radv_meta_fast_clear.c \
- radv_meta_resolve.c radv_meta_resolve_cs.c \
- radv_meta_resolve_fs.c radv_pass.c radv_pipeline.c \
- radv_pipeline_cache.c radv_private.h radv_radeon_winsys.h \
- radv_query.c radv_util.c radv_util.h radv_wsi.c \
- si_cmd_buffer.c vk_format_table.c vk_format.h \
- winsys/amdgpu/radv_amdgpu_bo.c winsys/amdgpu/radv_amdgpu_bo.h \
- winsys/amdgpu/radv_amdgpu_cs.c winsys/amdgpu/radv_amdgpu_cs.h \
+ radv_entrypoints.h radv_extensions.c radv_cmd_buffer.c \
+ radv_cs.h radv_debug.c radv_debug.h radv_device.c \
+ radv_descriptor_set.c radv_descriptor_set.h radv_formats.c \
+ radv_image.c radv_meta.c radv_meta.h radv_meta_blit.c \
+ radv_meta_blit2d.c radv_meta_buffer.c radv_meta_bufimage.c \
+ radv_meta_clear.c radv_meta_copy.c radv_meta_decompress.c \
+ radv_meta_fast_clear.c radv_meta_resolve.c \
+ radv_meta_resolve_cs.c radv_meta_resolve_fs.c radv_pass.c \
+ radv_pipeline.c radv_pipeline_cache.c radv_private.h \
+ radv_radeon_winsys.h radv_shader.c radv_shader.h radv_query.c \
+ radv_util.c radv_util.h radv_wsi.c si_cmd_buffer.c \
+ vk_format_table.c vk_format.h winsys/amdgpu/radv_amdgpu_bo.c \
+ winsys/amdgpu/radv_amdgpu_bo.h winsys/amdgpu/radv_amdgpu_cs.c \
+ winsys/amdgpu/radv_amdgpu_cs.h \
winsys/amdgpu/radv_amdgpu_surface.c \
winsys/amdgpu/radv_amdgpu_surface.h \
winsys/amdgpu/radv_amdgpu_winsys.c \
winsys/amdgpu/radv_amdgpu_winsys.h \
winsys/amdgpu/radv_amdgpu_winsys_public.h radv_wsi_x11.c \
radv_wsi_wayland.c
-am__objects_1 = radv_entrypoints.lo
+am__objects_1 = radv_entrypoints.lo radv_extensions.lo
am__dirstamp = $(am__leading_dot)dirstamp
am__objects_2 = winsys/amdgpu/radv_amdgpu_bo.lo \
winsys/amdgpu/radv_amdgpu_cs.lo \
winsys/amdgpu/radv_amdgpu_surface.lo \
winsys/amdgpu/radv_amdgpu_winsys.lo
-am__objects_3 = radv_cmd_buffer.lo radv_device.lo \
+am__objects_3 = radv_cmd_buffer.lo radv_debug.lo radv_device.lo \
radv_descriptor_set.lo radv_formats.lo radv_image.lo \
radv_meta.lo radv_meta_blit.lo radv_meta_blit2d.lo \
radv_meta_buffer.lo radv_meta_bufimage.lo radv_meta_clear.lo \
radv_meta_copy.lo radv_meta_decompress.lo \
radv_meta_fast_clear.lo radv_meta_resolve.lo \
radv_meta_resolve_cs.lo radv_meta_resolve_fs.lo radv_pass.lo \
- radv_pipeline.lo radv_pipeline_cache.lo radv_query.lo \
- radv_util.lo radv_wsi.lo si_cmd_buffer.lo vk_format_table.lo \
- $(am__objects_2)
+ radv_pipeline.lo radv_pipeline_cache.lo radv_shader.lo \
+ radv_query.lo radv_util.lo radv_wsi.lo si_cmd_buffer.lo \
+ vk_format_table.lo $(am__objects_2)
am__objects_4 = radv_wsi_x11.lo
@HAVE_PLATFORM_X11_TRUE@am__objects_5 = $(am__objects_4)
am__objects_6 = radv_wsi_wayland.lo
@@ -494,9 +496,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -550,6 +552,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -561,12 +565,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -664,6 +671,7 @@
VULKAN_FILES := \
radv_cmd_buffer.c \
radv_cs.h \
+ radv_debug.c \
radv_debug.h \
radv_device.c \
radv_descriptor_set.c \
@@ -688,6 +696,8 @@
radv_pipeline_cache.c \
radv_private.h \
radv_radeon_winsys.h \
+ radv_shader.c \
+ radv_shader.h \
radv_query.c \
radv_util.c \
radv_util.h \
@@ -705,7 +715,8 @@
VULKAN_GENERATED_FILES := \
radv_entrypoints.c \
- radv_entrypoints.h
+ radv_entrypoints.h \
+ radv_extensions.c
noinst_HEADERS = \
$(top_srcdir)/include/vulkan/vk_platform.h \
@@ -755,6 +766,7 @@
dev_icd.json.in \
radeon_icd.json.in \
radv_entrypoints_gen.py \
+ radv_extensions.py \
vk_format_layout.csv \
vk_format_parse.py \
vk_format_table.py
@@ -886,9 +898,11 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dummy.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_cmd_buffer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_debug.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_descriptor_set.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_device.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_entrypoints.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_extensions.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_formats.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_image.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_meta.Plo@am__quote@
@@ -907,6 +921,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_pipeline.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_pipeline_cache.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_query.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_shader.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_util.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_wsi.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radv_wsi_wayland.Plo@am__quote@
@@ -1215,12 +1230,19 @@
.PRECIOUS: Makefile
-radv_entrypoints.c: radv_entrypoints_gen.py $(vulkan_api_xml)
+radv_entrypoints.c: radv_entrypoints_gen.py radv_extensions.py $(vulkan_api_xml)
$(MKDIR_GEN)
$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py \
--xml $(vulkan_api_xml) --outdir $(builddir)
radv_entrypoints.h: radv_entrypoints.c
+radv_extensions.c: radv_extensions.py \
+ $(vulkan_api_xml)
+ $(MKDIR_GEN)
+ $(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_extensions.py \
+ --xml $(vulkan_api_xml) \
+ --out $@
+
vk_format_table.c: vk_format_table.py \
vk_format_parse.py \
vk_format_layout.csv
@@ -1228,7 +1250,7 @@
dev_icd.json : dev_icd.json.in
$(AM_V_GEN) $(SED) \
- -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
+ -e "s#@libvulkan_radeon_path@#${abs_top_builddir}/${LIB_DIR}/libvulkan_radeon.so#" \
< $(srcdir)/dev_icd.json.in > $@
radeon_icd.@host_cpu@.json : radeon_icd.json.in
diff -Nru mesa-17.2.4/src/amd/vulkan/Makefile.sources mesa-17.3.3/src/amd/vulkan/Makefile.sources
--- mesa-17.2.4/src/amd/vulkan/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -33,6 +33,7 @@
VULKAN_FILES := \
radv_cmd_buffer.c \
radv_cs.h \
+ radv_debug.c \
radv_debug.h \
radv_device.c \
radv_descriptor_set.c \
@@ -57,6 +58,8 @@
radv_pipeline_cache.c \
radv_private.h \
radv_radeon_winsys.h \
+ radv_shader.c \
+ radv_shader.h \
radv_query.c \
radv_util.c \
radv_util.h \
@@ -74,5 +77,6 @@
VULKAN_GENERATED_FILES := \
radv_entrypoints.c \
- radv_entrypoints.h
+ radv_entrypoints.h \
+ radv_extensions.c
diff -Nru mesa-17.2.4/src/amd/vulkan/meson.build mesa-17.3.3/src/amd/vulkan/meson.build
--- mesa-17.2.4/src/amd/vulkan/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,140 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+radv_entrypoints = custom_target(
+ 'radv_entrypoints.[ch]',
+ input : ['radv_entrypoints_gen.py', vk_api_xml],
+ output : ['radv_entrypoints.h', 'radv_entrypoints.c'],
+ command : [prog_python2, '@INPUT0@', '--xml', '@INPUT1@',
+ '--outdir', meson.current_build_dir()],
+ depend_files : files('radv_extensions.py'),
+)
+
+radv_extensions_c = custom_target(
+ 'radv_extensions.c',
+ input : ['radv_extensions.py', vk_api_xml],
+ output : ['radv_extensions.c'],
+ command : [prog_python2, '@INPUT0@', '--xml', '@INPUT1@',
+ '--out', '@OUTPUT@'],
+)
+
+vk_format_table_c = custom_target(
+ 'vk_format_table.c',
+ input : ['vk_format_table.py', 'vk_format_layout.csv'],
+ output : 'vk_format_table.c',
+ command : [prog_python2, '@INPUT@'],
+ depend_files : files('vk_format_parse.py'),
+ capture : true,
+)
+
+libradv_files = files(
+ 'winsys/amdgpu/radv_amdgpu_bo.c',
+ 'winsys/amdgpu/radv_amdgpu_bo.h',
+ 'winsys/amdgpu/radv_amdgpu_cs.c',
+ 'winsys/amdgpu/radv_amdgpu_cs.h',
+ 'winsys/amdgpu/radv_amdgpu_surface.c',
+ 'winsys/amdgpu/radv_amdgpu_surface.h',
+ 'winsys/amdgpu/radv_amdgpu_winsys.c',
+ 'winsys/amdgpu/radv_amdgpu_winsys.h',
+ 'winsys/amdgpu/radv_amdgpu_winsys_public.h',
+ 'radv_cmd_buffer.c',
+ 'radv_cs.h',
+ 'radv_debug.c',
+ 'radv_debug.h',
+ 'radv_device.c',
+ 'radv_descriptor_set.c',
+ 'radv_descriptor_set.h',
+ 'radv_formats.c',
+ 'radv_image.c',
+ 'radv_meta.c',
+ 'radv_meta.h',
+ 'radv_meta_blit.c',
+ 'radv_meta_blit2d.c',
+ 'radv_meta_buffer.c',
+ 'radv_meta_bufimage.c',
+ 'radv_meta_clear.c',
+ 'radv_meta_copy.c',
+ 'radv_meta_decompress.c',
+ 'radv_meta_fast_clear.c',
+ 'radv_meta_resolve.c',
+ 'radv_meta_resolve_cs.c',
+ 'radv_meta_resolve_fs.c',
+ 'radv_pass.c',
+ 'radv_pipeline.c',
+ 'radv_pipeline_cache.c',
+ 'radv_private.h',
+ 'radv_radeon_winsys.h',
+ 'radv_shader.c',
+ 'radv_shader.h',
+ 'radv_query.c',
+ 'radv_util.c',
+ 'radv_util.h',
+ 'radv_wsi.c',
+ 'si_cmd_buffer.c',
+ 'vk_format.h',
+)
+
+radv_deps = []
+radv_flags = []
+
+if with_platform_x11
+ radv_deps += dep_xcb_dri3
+ radv_flags += [
+ '-DVK_USE_PLATFORM_XCB_KHR',
+ '-DVK_USE_PLATFORM_XLIB_KHR',
+ ]
+ libradv_files += files('radv_wsi_x11.c')
+endif
+
+if with_platform_wayland
+ radv_deps += dep_wayland_client
+ radv_flags += '-DVK_USE_PLATFORM_WAYLAND_KHR'
+ libradv_files += files('radv_wsi_wayland.c')
+endif
+
+libvulkan_radeon = shared_library(
+ 'vulkan_radeon',
+ [libradv_files, radv_entrypoints, radv_extensions_c, nir_opcodes_h, vk_format_table_c],
+ include_directories : [inc_common, inc_amd, inc_amd_common, inc_compiler,
+ inc_vulkan_util, inc_vulkan_wsi],
+ link_with : [libamd_common, libamdgpu_addrlib, libvulkan_util,
+ libvulkan_wsi, libnir, libmesa_util],
+ dependencies : [dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl,
+ dep_m, dep_valgrind],
+ c_args : [c_vis_args, no_override_init_args, radv_flags],
+ link_args : [ld_args_bsymbolic, ld_args_gc_sections],
+ install : true,
+)
+
+radv_data = configuration_data()
+radv_data.set('install_libdir', join_paths(get_option('prefix'), get_option('libdir')))
+radv_data.set('libvulkan_radeon_path', libvulkan_radeon.full_path())
+
+configure_file(
+ configuration : radv_data,
+ input : 'radeon_icd.json.in',
+ output : 'radeon_icd.@0@.json'.format(target_machine.cpu()),
+ install_dir : with_vulkan_icd_dir,
+)
+configure_file(
+ configuration : radv_data,
+ input : 'dev_icd.json.in',
+ output : 'dev_icd.json'
+)
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_cmd_buffer.c mesa-17.3.3/src/amd/vulkan/radv_cmd_buffer.c
--- mesa-17.2.4/src/amd/vulkan/radv_cmd_buffer.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_cmd_buffer.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,10 +27,12 @@
#include "radv_private.h"
#include "radv_radeon_winsys.h"
+#include "radv_shader.h"
#include "radv_cs.h"
#include "sid.h"
#include "gfx9d.h"
#include "vk_format.h"
+#include "radv_debug.h"
#include "radv_meta.h"
#include "ac_debug.h"
@@ -76,19 +78,23 @@
},
};
-void
+static void
radv_dynamic_state_copy(struct radv_dynamic_state *dest,
const struct radv_dynamic_state *src,
uint32_t copy_mask)
{
+ /* Make sure to copy the number of viewports/scissors because they can
+ * only be specified at pipeline creation time.
+ */
+ dest->viewport.count = src->viewport.count;
+ dest->scissor.count = src->scissor.count;
+
if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
- dest->viewport.count = src->viewport.count;
typed_memcpy(dest->viewport.viewports, src->viewport.viewports,
src->viewport.count);
}
if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
- dest->scissor.count = src->scissor.count;
typed_memcpy(dest->scissor.scissors, src->scissor.scissors,
src->scissor.count);
}
@@ -141,7 +147,6 @@
VkCommandBuffer* pCommandBuffer)
{
struct radv_cmd_buffer *cmd_buffer;
- VkResult result;
unsigned ring;
cmd_buffer = vk_alloc(&pool->alloc, sizeof(*cmd_buffer), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -170,8 +175,8 @@
cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
if (!cmd_buffer->cs) {
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail;
+ vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
*pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer);
@@ -181,11 +186,6 @@
list_inithead(&cmd_buffer->upload.list);
return VK_SUCCESS;
-
-fail:
- vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
-
- return result;
}
static void
@@ -207,7 +207,8 @@
vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
}
-static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
+static VkResult
+radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
{
cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
@@ -219,6 +220,7 @@
free(up);
}
+ cmd_buffer->push_constant_stages = 0;
cmd_buffer->scratch_size_needed = 0;
cmd_buffer->compute_scratch_size_needed = 0;
cmd_buffer->esgs_ring_size_needed = 0;
@@ -231,7 +233,7 @@
cmd_buffer->upload.upload_bo, 8);
cmd_buffer->upload.offset = 0;
- cmd_buffer->record_fail = false;
+ cmd_buffer->record_result = VK_SUCCESS;
cmd_buffer->ring_offsets_idx = -1;
@@ -242,6 +244,8 @@
&fence_ptr);
cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
}
+
+ return cmd_buffer->record_result;
}
static bool
@@ -262,7 +266,7 @@
RADEON_FLAG_CPU_ACCESS);
if (!bo) {
- cmd_buffer->record_fail = true;
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
return false;
}
@@ -271,7 +275,7 @@
upload = malloc(sizeof(*upload));
if (!upload) {
- cmd_buffer->record_fail = true;
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
device->ws->buffer_destroy(bo);
return false;
}
@@ -286,7 +290,7 @@
cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo);
if (!cmd_buffer->upload.map) {
- cmd_buffer->record_fail = true;
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
return false;
}
@@ -331,6 +335,19 @@
return true;
}
+static void
+radv_emit_write_data_packet(struct radeon_winsys_cs *cs, uint64_t va,
+ unsigned count, const uint32_t *data)
+{
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+ S_370_WR_CONFIRM(1) |
+ S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit_array(cs, data, count);
+}
+
void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = cmd_buffer->device;
@@ -340,24 +357,104 @@
if (!device->trace_bo)
return;
- va = device->ws->buffer_get_va(device->trace_bo);
+ va = radv_buffer_get_va(device->trace_bo);
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
+ va += 4;
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);
++cmd_buffer->state.trace_id;
device->ws->cs_add_buffer(cs, device->trace_bo, 8);
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, cmd_buffer->state.trace_id);
+ radv_emit_write_data_packet(cs, va, 1, &cmd_buffer->state.trace_id);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
}
static void
+radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer)
+{
+ if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
+ enum radv_cmd_flush_bits flags;
+
+ /* Force wait for graphics/compute engines to be idle. */
+ flags = RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
+
+ si_cs_emit_cache_flush(cmd_buffer->cs, false,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ NULL, 0,
+ radv_cmd_buffer_uses_mec(cmd_buffer),
+ flags);
+ }
+
+ radv_cmd_buffer_trace_emit(cmd_buffer);
+}
+
+static void
+radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline, enum ring_type ring)
+{
+ struct radv_device *device = cmd_buffer->device;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ uint32_t data[2];
+ uint64_t va;
+
+ if (!device->trace_bo)
+ return;
+
+ va = radv_buffer_get_va(device->trace_bo);
+
+ switch (ring) {
+ case RING_GFX:
+ va += 8;
+ break;
+ case RING_COMPUTE:
+ va += 16;
+ break;
+ default:
+ assert(!"invalid ring type");
+ }
+
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(device->ws,
+ cmd_buffer->cs, 6);
+
+ data[0] = (uintptr_t)pipeline;
+ data[1] = (uintptr_t)pipeline >> 32;
+
+ device->ws->cs_add_buffer(cs, device->trace_bo, 8);
+ radv_emit_write_data_packet(cs, va, 2, data);
+}
+
+static void
+radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_device *device = cmd_buffer->device;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ uint32_t data[MAX_SETS * 2] = {};
+ uint64_t va;
+
+ if (!device->trace_bo)
+ return;
+
+ va = radv_buffer_get_va(device->trace_bo) + 24;
+
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(device->ws,
+ cmd_buffer->cs, 4 + MAX_SETS * 2);
+
+ for (int i = 0; i < MAX_SETS; i++) {
+ struct radv_descriptor_set *set = cmd_buffer->state.descriptors[i];
+ if (!set)
+ continue;
+
+ data[i * 2] = (uintptr_t)set;
+ data[i * 2 + 1] = (uintptr_t)set >> 32;
+ }
+
+ device->ws->cs_add_buffer(cs, device->trace_bo, 8);
+ radv_emit_write_data_packet(cs, va, MAX_SETS * 2, data);
+}
+
+static void
radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
{
@@ -391,45 +488,24 @@
radeon_set_context_reg(cmd_buffer->cs, R_028010_DB_RENDER_OVERRIDE2, ds->db_render_override2);
}
-/* 12.4 fixed-point */
-static unsigned radv_pack_float_12p4(float x)
-{
- return x <= 0 ? 0 :
- x >= 4096 ? 0xffff : x * 16;
-}
-
-uint32_t
-radv_shader_stage_to_user_data_0(gl_shader_stage stage, bool has_gs, bool has_tess)
-{
- switch (stage) {
- case MESA_SHADER_FRAGMENT:
- return R_00B030_SPI_SHADER_USER_DATA_PS_0;
- case MESA_SHADER_VERTEX:
- if (has_tess)
- return R_00B530_SPI_SHADER_USER_DATA_LS_0;
- else
- return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0;
- case MESA_SHADER_GEOMETRY:
- return R_00B230_SPI_SHADER_USER_DATA_GS_0;
- case MESA_SHADER_COMPUTE:
- return R_00B900_COMPUTE_USER_DATA_0;
- case MESA_SHADER_TESS_CTRL:
- return R_00B430_SPI_SHADER_USER_DATA_HS_0;
- case MESA_SHADER_TESS_EVAL:
- if (has_gs)
- return R_00B330_SPI_SHADER_USER_DATA_ES_0;
- else
- return R_00B130_SPI_SHADER_USER_DATA_VS_0;
- default:
- unreachable("unknown shader");
- }
-}
-
struct ac_userdata_info *
radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
gl_shader_stage stage,
int idx)
{
+ if (stage == MESA_SHADER_VERTEX) {
+ if (pipeline->shaders[MESA_SHADER_VERTEX])
+ return &pipeline->shaders[MESA_SHADER_VERTEX]->info.user_sgprs_locs.shader_data[idx];
+ if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
+ return &pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.user_sgprs_locs.shader_data[idx];
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+ return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.user_sgprs_locs.shader_data[idx];
+ } else if (stage == MESA_SHADER_TESS_EVAL) {
+ if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+ return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.user_sgprs_locs.shader_data[idx];
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+ return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.user_sgprs_locs.shader_data[idx];
+ }
return &pipeline->shaders[stage]->info.user_sgprs_locs.shader_data[idx];
}
@@ -440,7 +516,7 @@
int idx, uint64_t va)
{
struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
- uint32_t base_reg = radv_shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ uint32_t base_reg = radv_shader_stage_to_user_data_0(stage, cmd_buffer->device->physical_device->rad_info.chip_class, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
if (loc->sgpr_idx == -1)
return;
assert(loc->num_sgprs == 2);
@@ -462,13 +538,14 @@
radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[0]);
radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_mask[1]);
- radeon_set_context_reg(cmd_buffer->cs, CM_R_028804_DB_EQAA, ms->db_eqaa);
- radeon_set_context_reg(cmd_buffer->cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
+ radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, ms->db_eqaa);
+ radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
- if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
+ if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples &&
+ old_pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions == pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
return;
- radeon_set_context_reg_seq(cmd_buffer->cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cmd_buffer->cs, ms->pa_sc_line_cntl);
radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_config);
@@ -482,7 +559,7 @@
if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions) {
uint32_t offset;
struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET);
- uint32_t base_reg = radv_shader_stage_to_user_data_0(MESA_SHADER_FRAGMENT, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ uint32_t base_reg = radv_shader_stage_to_user_data_0(MESA_SHADER_FRAGMENT, cmd_buffer->device->physical_device->rad_info.chip_class, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
if (loc->sgpr_idx == -1)
return;
assert(loc->num_sgprs == 1);
@@ -518,29 +595,47 @@
radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL,
raster->pa_cl_clip_cntl);
-
radeon_set_context_reg(cmd_buffer->cs, R_0286D4_SPI_INTERP_CONTROL_0,
raster->spi_interp_control);
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028A00_PA_SU_POINT_SIZE, 2);
- unsigned tmp = (unsigned)(1.0 * 8.0);
- radeon_emit(cmd_buffer->cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
- radeon_emit(cmd_buffer->cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
- S_028A04_MAX_SIZE(radv_pack_float_12p4(8192/2))); /* R_028A04_PA_SU_POINT_MINMAX */
-
radeon_set_context_reg(cmd_buffer->cs, R_028BE4_PA_SU_VTX_CNTL,
raster->pa_su_vtx_cntl);
-
radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL,
raster->pa_su_sc_mode_cntl);
}
-static inline void
-radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
- unsigned size)
+static void
+radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_shader_variant *shader)
{
+ struct radeon_winsys *ws = cmd_buffer->device->ws;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ uint64_t va;
+
+ if (!shader)
+ return;
+
+ va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ ws->cs_add_buffer(cs, shader->bo, 8);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
- si_cp_dma_prefetch(cmd_buffer, va, size);
+ si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
+}
+
+static void
+radv_emit_shaders_prefetch(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline)
+{
+ radv_emit_shader_prefetch(cmd_buffer,
+ pipeline->shaders[MESA_SHADER_VERTEX]);
+ radv_emit_shader_prefetch(cmd_buffer,
+ pipeline->shaders[MESA_SHADER_TESS_CTRL]);
+ radv_emit_shader_prefetch(cmd_buffer,
+ pipeline->shaders[MESA_SHADER_TESS_EVAL]);
+ radv_emit_shader_prefetch(cmd_buffer,
+ pipeline->shaders[MESA_SHADER_GEOMETRY]);
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader);
+ radv_emit_shader_prefetch(cmd_buffer,
+ pipeline->shaders[MESA_SHADER_FRAGMENT]);
}
static void
@@ -549,13 +644,9 @@
struct radv_shader_variant *shader,
struct ac_vs_output_info *outinfo)
{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- uint64_t va = ws->buffer_get_va(shader->bo);
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
unsigned export_count;
- ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
- radv_emit_prefetch(cmd_buffer, va, shader->code_size);
-
export_count = MAX2(1, outinfo->param_exports);
radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG,
S_0286C4_VS_EXPORT_COUNT(export_count - 1));
@@ -599,11 +690,7 @@
struct radv_shader_variant *shader,
struct ac_es_output_info *outinfo)
{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- uint64_t va = ws->buffer_get_va(shader->bo);
-
- ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
- radv_emit_prefetch(cmd_buffer, va, shader->code_size);
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
outinfo->esgs_itemsize / 4);
@@ -618,13 +705,9 @@
radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer,
struct radv_shader_variant *shader)
{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- uint64_t va = ws->buffer_get_va(shader->bo);
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
uint32_t rsrc2 = shader->rsrc2;
- ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
- radv_emit_prefetch(cmd_buffer, va, shader->code_size);
-
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, va >> 40);
@@ -643,17 +726,24 @@
radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer,
struct radv_shader_variant *shader)
{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- uint64_t va = ws->buffer_get_va(shader->bo);
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
- radv_emit_prefetch(cmd_buffer, va, shader->code_size);
-
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
- radeon_emit(cmd_buffer->cs, va >> 8);
- radeon_emit(cmd_buffer->cs, va >> 40);
- radeon_emit(cmd_buffer->cs, shader->rsrc1);
- radeon_emit(cmd_buffer->cs, shader->rsrc2);
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
+ radeon_emit(cmd_buffer->cs, va >> 8);
+ radeon_emit(cmd_buffer->cs, va >> 40);
+
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
+ radeon_emit(cmd_buffer->cs, shader->rsrc1);
+ radeon_emit(cmd_buffer->cs, shader->rsrc2 |
+ S_00B42C_LDS_SIZE(cmd_buffer->state.pipeline->graphics.tess.lds_size));
+ } else {
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
+ radeon_emit(cmd_buffer->cs, va >> 8);
+ radeon_emit(cmd_buffer->cs, va >> 40);
+ radeon_emit(cmd_buffer->cs, shader->rsrc1);
+ radeon_emit(cmd_buffer->cs, shader->rsrc2);
+ }
}
static void
@@ -662,9 +752,12 @@
{
struct radv_shader_variant *vs;
- assert (pipeline->shaders[MESA_SHADER_VERTEX]);
+ radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, pipeline->graphics.vgt_primitiveid_en);
+ /* Skip shaders merged into HS/GS */
vs = pipeline->shaders[MESA_SHADER_VERTEX];
+ if (!vs)
+ return;
if (vs->info.vs.as_ls)
radv_emit_hw_ls(cmd_buffer, vs);
@@ -672,8 +765,6 @@
radv_emit_hw_es(cmd_buffer, vs, &vs->info.vs.es_info);
else
radv_emit_hw_vs(cmd_buffer, pipeline, vs, &vs->info.vs.outinfo);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028A84_VGT_PRIMITIVEID_EN, pipeline->graphics.vgt_primitiveid_en);
}
@@ -689,10 +780,12 @@
tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL];
tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
- if (tes->info.tes.as_es)
- radv_emit_hw_es(cmd_buffer, tes, &tes->info.tes.es_info);
- else
- radv_emit_hw_vs(cmd_buffer, pipeline, tes, &tes->info.tes.outinfo);
+ if (tes) {
+ if (tes->info.tes.as_es)
+ radv_emit_hw_es(cmd_buffer, tes, &tes->info.tes.es_info);
+ else
+ radv_emit_hw_vs(cmd_buffer, pipeline, tes, &tes->info.tes.outinfo);
+ }
radv_emit_hw_hs(cmd_buffer, tcs);
@@ -710,7 +803,7 @@
loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT);
if (loc->sgpr_idx != -1) {
- uint32_t base_reg = radv_shader_stage_to_user_data_0(MESA_SHADER_TESS_CTRL, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ uint32_t base_reg = radv_shader_stage_to_user_data_0(MESA_SHADER_TESS_CTRL, cmd_buffer->device->physical_device->rad_info.chip_class, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
assert(loc->num_sgprs == 4);
assert(!loc->indirect);
radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 4);
@@ -723,7 +816,7 @@
loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_EVAL, AC_UD_TES_OFFCHIP_LAYOUT);
if (loc->sgpr_idx != -1) {
- uint32_t base_reg = radv_shader_stage_to_user_data_0(MESA_SHADER_TESS_EVAL, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ uint32_t base_reg = radv_shader_stage_to_user_data_0(MESA_SHADER_TESS_EVAL, cmd_buffer->device->physical_device->rad_info.chip_class, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
assert(loc->num_sgprs == 1);
assert(!loc->indirect);
@@ -733,7 +826,7 @@
loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_LS_TCS_IN_LAYOUT);
if (loc->sgpr_idx != -1) {
- uint32_t base_reg = radv_shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ uint32_t base_reg = radv_shader_stage_to_user_data_0(MESA_SHADER_VERTEX, cmd_buffer->device->physical_device->rad_info.chip_class, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
assert(loc->num_sgprs == 1);
assert(!loc->indirect);
@@ -746,7 +839,6 @@
radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
struct radv_shader_variant *gs;
uint64_t va;
@@ -779,15 +871,28 @@
S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
S_028B90_ENABLE(gs_num_invocations > 0));
- va = ws->buffer_get_va(gs->bo);
- ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8);
- radv_emit_prefetch(cmd_buffer, va, gs->code_size);
+ va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
- radeon_emit(cmd_buffer->cs, va >> 8);
- radeon_emit(cmd_buffer->cs, va >> 40);
- radeon_emit(cmd_buffer->cs, gs->rsrc1);
- radeon_emit(cmd_buffer->cs, gs->rsrc2);
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
+ radeon_emit(cmd_buffer->cs, va >> 8);
+ radeon_emit(cmd_buffer->cs, va >> 40);
+
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
+ radeon_emit(cmd_buffer->cs, gs->rsrc1);
+ radeon_emit(cmd_buffer->cs, gs->rsrc2 |
+ S_00B22C_LDS_SIZE(pipeline->graphics.gs.lds_size));
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028A44_VGT_GS_ONCHIP_CNTL, pipeline->graphics.gs.vgt_gs_onchip_cntl);
+ radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup);
+ radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, pipeline->graphics.gs.vgt_esgs_ring_itemsize);
+ } else {
+ radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
+ radeon_emit(cmd_buffer->cs, va >> 8);
+ radeon_emit(cmd_buffer->cs, va >> 40);
+ radeon_emit(cmd_buffer->cs, gs->rsrc1);
+ radeon_emit(cmd_buffer->cs, gs->rsrc2);
+ }
radv_emit_hw_vs(cmd_buffer, pipeline, pipeline->gs_copy_shader, &pipeline->gs_copy_shader->info.vs.outinfo);
@@ -812,7 +917,6 @@
radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
struct radv_shader_variant *ps;
uint64_t va;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
@@ -820,10 +924,7 @@
assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
- va = ws->buffer_get_va(ps->bo);
- ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
- radv_emit_prefetch(cmd_buffer, va, ps->code_size);
+ va = radv_buffer_get_va(ps->bo) + ps->bo_offset;
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
@@ -840,7 +941,7 @@
radeon_set_context_reg(cmd_buffer->cs, R_0286D0_SPI_PS_INPUT_ADDR,
ps->config.spi_ps_input_addr);
- if (ps->info.fs.force_persample)
+ if (ps->info.info.ps.force_persample)
spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
radeon_set_context_reg(cmd_buffer->cs, R_0286D8_SPI_PS_IN_CONTROL,
@@ -870,25 +971,24 @@
}
}
-static void polaris_set_vgt_vertex_reuse(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+static void
+radv_emit_vgt_vertex_reuse(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline)
{
- uint32_t vtx_reuse_depth = 30;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+
if (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10)
return;
- if (pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
- if (pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD)
- vtx_reuse_depth = 14;
- }
- radeon_set_context_reg(cmd_buffer->cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
- vtx_reuse_depth);
+ radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+ pipeline->graphics.vtx_reuse_depth);
}
static void
-radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+
if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline)
return;
@@ -900,7 +1000,7 @@
radv_emit_tess_shaders(cmd_buffer, pipeline);
radv_emit_geometry_shader(cmd_buffer, pipeline);
radv_emit_fragment_shader(cmd_buffer, pipeline);
- polaris_set_vgt_vertex_reuse(cmd_buffer, pipeline);
+ radv_emit_vgt_vertex_reuse(cmd_buffer, pipeline);
cmd_buffer->scratch_size_needed =
MAX2(cmd_buffer->scratch_size_needed,
@@ -924,7 +1024,11 @@
}
radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, pipeline->graphics.gs_out);
+ radv_save_pipeline(cmd_buffer, pipeline, RING_GFX);
+
cmd_buffer->state.emitted_pipeline = pipeline;
+
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE;
}
static void
@@ -952,6 +1056,73 @@
}
static void
+radv_emit_line_width(struct radv_cmd_buffer *cmd_buffer)
+{
+ unsigned width = cmd_buffer->state.dynamic.line_width * 8;
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
+ S_028A08_WIDTH(CLAMP(width, 0, 0xFFF)));
+}
+
+static void
+radv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4);
+ radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4);
+}
+
+static void
+radv_emit_stencil(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+
+ radeon_set_context_reg_seq(cmd_buffer->cs,
+ R_028430_DB_STENCILREFMASK, 2);
+ radeon_emit(cmd_buffer->cs,
+ S_028430_STENCILTESTVAL(d->stencil_reference.front) |
+ S_028430_STENCILMASK(d->stencil_compare_mask.front) |
+ S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) |
+ S_028430_STENCILOPVAL(1));
+ radeon_emit(cmd_buffer->cs,
+ S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) |
+ S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) |
+ S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) |
+ S_028434_STENCILOPVAL_BF(1));
+}
+
+static void
+radv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN,
+ fui(d->depth_bounds.min));
+ radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX,
+ fui(d->depth_bounds.max));
+}
+
+static void
+radv_emit_depth_biais(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_raster_state *raster = &cmd_buffer->state.pipeline->graphics.raster;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ unsigned slope = fui(d->depth_bias.slope * 16.0f);
+ unsigned bias = fui(d->depth_bias.bias * cmd_buffer->state.offset_scale);
+
+ if (G_028814_POLY_OFFSET_FRONT_ENABLE(raster->pa_su_sc_mode_cntl)) {
+ radeon_set_context_reg_seq(cmd_buffer->cs,
+ R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5);
+ radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */
+ radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */
+ radeon_emit(cmd_buffer->cs, bias); /* FRONT OFFSET */
+ radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */
+ radeon_emit(cmd_buffer->cs, bias); /* BACK OFFSET */
+ }
+}
+
+static void
radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
int index,
struct radv_color_buffer_info *cb)
@@ -1066,7 +1237,7 @@
VkClearDepthStencilValue ds_clear_value,
VkImageAspectFlags aspects)
{
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
+ uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
unsigned reg_offset = 0, reg_count = 0;
@@ -1106,7 +1277,7 @@
radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image)
{
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
+ uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
if (!image->surface.htile_size)
@@ -1138,7 +1309,7 @@
bool value)
{
uint64_t pred_val = value;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
+ uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->dcc_pred_offset;
if (!image->surface.dcc_size)
@@ -1162,7 +1333,7 @@
int idx,
uint32_t color_values[2])
{
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
+ uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
if (!image->cmask.size && !image->surface.dcc_size)
@@ -1189,7 +1360,7 @@
struct radv_image *image,
int idx)
{
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
+ uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
if (!image->cmask.size && !image->surface.dcc_size)
@@ -1246,9 +1417,9 @@
struct radv_attachment_info *att = &framebuffer->attachments[idx];
struct radv_image *image = att->attachment->image;
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, att->attachment->bo, 8);
- uint32_t queue_mask = radv_image_queue_family_mask(image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
+ MAYBE_UNUSED uint32_t queue_mask = radv_image_queue_family_mask(image,
+ cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
/* We currently don't support writing decompressed HTILE */
assert(radv_layout_has_htile(image, layout, queue_mask) ==
radv_layout_is_htile_compressed(image, layout, queue_mask));
@@ -1277,6 +1448,31 @@
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
+
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
+}
+
+static void
+radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_uconfig_reg_idx(cs, R_03090C_VGT_INDEX_TYPE,
+ 2, cmd_buffer->state.index_type);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
+ radeon_emit(cs, cmd_buffer->state.index_type);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
+ radeon_emit(cs, cmd_buffer->state.index_va);
+ radeon_emit(cs, cmd_buffer->state.index_va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
+ radeon_emit(cs, cmd_buffer->state.max_index_count);
+
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER;
}
void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer)
@@ -1308,8 +1504,6 @@
static void
radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
-
if (G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.pipeline->graphics.raster.pa_cl_clip_cntl))
return;
@@ -1319,54 +1513,26 @@
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
radv_emit_scissor(cmd_buffer);
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
- unsigned width = cmd_buffer->state.dynamic.line_width * 8;
- radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
- S_028A08_WIDTH(CLAMP(width, 0, 0xFFF)));
- }
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)
+ radv_emit_line_width(cmd_buffer);
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4);
- radeon_emit_array(cmd_buffer->cs, (uint32_t*)d->blend_constants, 4);
- }
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
+ radv_emit_blend_constants(cmd_buffer);
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |
RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK)) {
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028430_DB_STENCILREFMASK, 2);
- radeon_emit(cmd_buffer->cs, S_028430_STENCILTESTVAL(d->stencil_reference.front) |
- S_028430_STENCILMASK(d->stencil_compare_mask.front) |
- S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) |
- S_028430_STENCILOPVAL(1));
- radeon_emit(cmd_buffer->cs, S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) |
- S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) |
- S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) |
- S_028434_STENCILOPVAL_BF(1));
- }
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK))
+ radv_emit_stencil(cmd_buffer);
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_PIPELINE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS)) {
- radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, fui(d->depth_bounds.min));
- radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX, fui(d->depth_bounds.max));
- }
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS))
+ radv_emit_depth_bounds(cmd_buffer);
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_PIPELINE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) {
- struct radv_raster_state *raster = &cmd_buffer->state.pipeline->graphics.raster;
- unsigned slope = fui(d->depth_bias.slope * 16.0f);
- unsigned bias = fui(d->depth_bias.bias * cmd_buffer->state.offset_scale);
-
- if (G_028814_POLY_OFFSET_FRONT_ENABLE(raster->pa_su_sc_mode_cntl)) {
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5);
- radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */
- radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */
- radeon_emit(cmd_buffer->cs, bias); /* FRONT OFFSET */
- radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */
- radeon_emit(cmd_buffer->cs, bias); /* BACK OFFSET */
- }
- }
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS))
+ radv_emit_depth_biais(cmd_buffer);
- cmd_buffer->state.dirty = 0;
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_DYNAMIC_ALL;
}
static void
@@ -1377,7 +1543,7 @@
gl_shader_stage stage)
{
struct ac_userdata_info *desc_set_loc = &pipeline->shaders[stage]->info.user_sgprs_locs.descriptor_sets[idx];
- uint32_t base_reg = radv_shader_stage_to_user_data_0(stage, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ uint32_t base_reg = radv_shader_stage_to_user_data_0(stage, cmd_buffer->device->physical_device->rad_info.chip_class, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
if (desc_set_loc->sgpr_idx == -1 || desc_set_loc->indirect)
return;
@@ -1415,18 +1581,15 @@
radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_descriptor_set *set = &cmd_buffer->push_descriptors.set;
- uint32_t *ptr = NULL;
unsigned bo_offset;
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, set->size, 32,
- &bo_offset,
- (void**) &ptr))
+ if (!radv_cmd_buffer_upload_data(cmd_buffer, set->size, 32,
+ set->mapped_ptr,
+ &bo_offset))
return;
- set->va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+ set->va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
set->va += bo_offset;
-
- memcpy(ptr, set->mapped_ptr, set->size);
}
static void
@@ -1450,7 +1613,7 @@
uptr[1] = set_va >> 32;
}
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+ uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += offset;
if (cmd_buffer->state.pipeline) {
@@ -1501,9 +1664,7 @@
cmd_buffer->cs,
MAX_SETS * MESA_SHADER_STAGES * 4);
- for (i = 0; i < MAX_SETS; i++) {
- if (!(cmd_buffer->state.descriptors_dirty & (1u << i)))
- continue;
+ for_each_bit(i, cmd_buffer->state.descriptors_dirty) {
struct radv_descriptor_set *set = cmd_buffer->state.descriptors[i];
if (!set)
continue;
@@ -1512,6 +1673,9 @@
}
cmd_buffer->state.descriptors_dirty = 0;
cmd_buffer->state.push_descriptors_dirty = false;
+
+ radv_save_descriptors(cmd_buffer);
+
assert(cmd_buffer->cs->cdw <= cdw_max);
}
@@ -1538,7 +1702,7 @@
memcpy((char*)ptr + layout->push_constant_size, cmd_buffer->dynamic_buffers,
16 * layout->dynamic_offset_count);
- va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += offset;
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
@@ -1555,123 +1719,131 @@
assert(cmd_buffer->cs->cdw <= cdw_max);
}
-static void radv_emit_primitive_reset_state(struct radv_cmd_buffer *cmd_buffer,
- bool indexed_draw)
-{
- int32_t primitive_reset_en = indexed_draw && cmd_buffer->state.pipeline->graphics.prim_restart_enable;
-
- if (primitive_reset_en != cmd_buffer->state.last_primitive_reset_en) {
- cmd_buffer->state.last_primitive_reset_en = primitive_reset_en;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
- primitive_reset_en);
- } else {
- radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
- primitive_reset_en);
- }
- }
-
- if (primitive_reset_en) {
- uint32_t primitive_reset_index = cmd_buffer->state.index_type ? 0xffffffffu : 0xffffu;
-
- if (primitive_reset_index != cmd_buffer->state.last_primitive_reset_index) {
- cmd_buffer->state.last_primitive_reset_index = primitive_reset_index;
- radeon_set_context_reg(cmd_buffer->cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
- primitive_reset_index);
- }
- }
-}
-
-static void
-radv_cmd_buffer_update_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer)
+static bool
+radv_cmd_buffer_update_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
{
struct radv_device *device = cmd_buffer->device;
- if ((cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline || cmd_buffer->state.vb_dirty) &&
- cmd_buffer->state.pipeline->num_vertex_attribs &&
- cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX]->info.info.vs.has_vertex_buffers) {
+ if ((pipeline_is_dirty || cmd_buffer->state.vb_dirty) &&
+ cmd_buffer->state.pipeline->vertex_elements.count &&
+ radv_get_vertex_shader(cmd_buffer->state.pipeline)->info.info.vs.has_vertex_buffers) {
+ struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements;
unsigned vb_offset;
void *vb_ptr;
uint32_t i = 0;
- uint32_t num_attribs = cmd_buffer->state.pipeline->num_vertex_attribs;
+ uint32_t count = velems->count;
uint64_t va;
/* allocate some descriptor state for vertex buffers */
- radv_cmd_buffer_upload_alloc(cmd_buffer, num_attribs * 16, 256,
- &vb_offset, &vb_ptr);
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, 256,
+ &vb_offset, &vb_ptr))
+ return false;
- for (i = 0; i < num_attribs; i++) {
+ for (i = 0; i < count; i++) {
uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
uint32_t offset;
- int vb = cmd_buffer->state.pipeline->va_binding[i];
+ int vb = velems->binding[i];
struct radv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
uint32_t stride = cmd_buffer->state.pipeline->binding_stride[vb];
device->ws->cs_add_buffer(cmd_buffer->cs, buffer->bo, 8);
- va = device->ws->buffer_get_va(buffer->bo);
+ va = radv_buffer_get_va(buffer->bo);
- offset = cmd_buffer->state.vertex_bindings[vb].offset + cmd_buffer->state.pipeline->va_offset[i];
+ offset = cmd_buffer->state.vertex_bindings[vb].offset + velems->offset[i];
va += offset + buffer->offset;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
- desc[2] = (buffer->size - offset - cmd_buffer->state.pipeline->va_format_size[i]) / stride + 1;
+ desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1;
else
desc[2] = buffer->size - offset;
- desc[3] = cmd_buffer->state.pipeline->va_rsrc_word3[i];
+ desc[3] = velems->rsrc_word3[i];
}
- va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += vb_offset;
radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
AC_UD_VS_VERTEX_BUFFERS, va);
}
- cmd_buffer->state.vb_dirty = 0;
+ cmd_buffer->state.vb_dirty = false;
+
+ return true;
}
-static void
-radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
- bool indexed_draw, bool instanced_draw,
- bool indirect_draw,
- uint32_t draw_vertex_count)
+static bool
+radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
{
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- uint32_t ia_multi_vgt_param;
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 4096);
+ if (!radv_cmd_buffer_update_vertex_descriptors(cmd_buffer, pipeline_is_dirty))
+ return false;
- radv_cmd_buffer_update_vertex_descriptors(cmd_buffer);
+ radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
+ radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline,
+ VK_SHADER_STAGE_ALL_GRAPHICS);
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
- radv_emit_graphics_pipeline(cmd_buffer, pipeline);
+ return true;
+}
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RENDER_TARGETS)
- radv_emit_framebuffer_state(cmd_buffer);
+static void
+radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw,
+ bool instanced_draw, bool indirect_draw,
+ uint32_t draw_vertex_count)
+{
+ struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ uint32_t ia_multi_vgt_param;
+ int32_t primitive_reset_en;
- ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, draw_vertex_count);
- if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) {
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
- radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
- else if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
- radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
- else
- radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
- cmd_buffer->state.last_ia_multi_vgt_param = ia_multi_vgt_param;
+ /* Draw state. */
+ ia_multi_vgt_param =
+ si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw,
+ indirect_draw, draw_vertex_count);
+
+ if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
+ if (info->chip_class >= GFX9) {
+ radeon_set_uconfig_reg_idx(cs,
+ R_030960_IA_MULTI_VGT_PARAM,
+ 4, ia_multi_vgt_param);
+ } else if (info->chip_class >= CIK) {
+ radeon_set_context_reg_idx(cs,
+ R_028AA8_IA_MULTI_VGT_PARAM,
+ 1, ia_multi_vgt_param);
+ } else {
+ radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM,
+ ia_multi_vgt_param);
+ }
+ state->last_ia_multi_vgt_param = ia_multi_vgt_param;
}
- radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
-
- radv_emit_primitive_reset_state(cmd_buffer, indexed_draw);
+ /* Primitive restart. */
+ primitive_reset_en =
+ indexed_draw && state->pipeline->graphics.prim_restart_enable;
- radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
- radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline,
- VK_SHADER_STAGE_ALL_GRAPHICS);
+ if (primitive_reset_en != state->last_primitive_reset_en) {
+ state->last_primitive_reset_en = primitive_reset_en;
+ if (info->chip_class >= GFX9) {
+ radeon_set_uconfig_reg(cs,
+ R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
+ primitive_reset_en);
+ } else {
+ radeon_set_context_reg(cs,
+ R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
+ primitive_reset_en);
+ }
+ }
- assert(cmd_buffer->cs->cdw <= cdw_max);
+ if (primitive_reset_en) {
+ uint32_t primitive_reset_index =
+ state->index_type ? 0xffffffffu : 0xffffu;
- si_emit_cache_flush(cmd_buffer);
+ if (primitive_reset_index != state->last_primitive_reset_index) {
+ radeon_set_context_reg(cs,
+ R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+ primitive_reset_index);
+ state->last_primitive_reset_index = primitive_reset_index;
+ }
+ }
}
static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
@@ -1696,8 +1868,7 @@
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT |
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
- } else if (src_stage_mask & (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT |
- VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
+ } else if (src_stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
@@ -1748,11 +1919,11 @@
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
case VK_ACCESS_INDEX_READ_BIT:
- case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
break;
case VK_ACCESS_UNIFORM_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
break;
+ case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
case VK_ACCESS_SHADER_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
@@ -1835,10 +2006,10 @@
cmd_buffer->state.subpass = subpass;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RENDER_TARGETS;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
}
-static void
+static VkResult
radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
struct radv_render_pass *pass,
const VkRenderPassBeginInfo *info)
@@ -1847,7 +2018,7 @@
if (pass->attachment_count == 0) {
state->attachments = NULL;
- return;
+ return VK_SUCCESS;
}
state->attachments = vk_alloc(&cmd_buffer->pool->alloc,
@@ -1855,8 +2026,8 @@
sizeof(state->attachments[0]),
8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (state->attachments == NULL) {
- /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
- abort();
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return cmd_buffer->record_result;
}
for (uint32_t i = 0; i < pass->attachment_count; ++i) {
@@ -1885,6 +2056,7 @@
}
state->attachments[i].pending_clear_aspects = clear_aspects;
+ state->attachments[i].cleared_views = 0;
if (clear_aspects && info) {
assert(info->clearValueCount > i);
state->attachments[i].clear_value = info->pClearValues[i];
@@ -1892,6 +2064,8 @@
state->attachments[i].current_layout = att->initial_layout;
}
+
+ return VK_SUCCESS;
}
VkResult radv_AllocateCommandBuffers(
@@ -1916,12 +2090,11 @@
list_del(&cmd_buffer->pool_link);
list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
- radv_reset_cmd_buffer(cmd_buffer);
+ result = radv_reset_cmd_buffer(cmd_buffer);
cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
cmd_buffer->level = pAllocateInfo->level;
pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer);
- result = VK_SUCCESS;
} else {
result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level,
&pCommandBuffers[i]);
@@ -1962,15 +2135,14 @@
VkCommandBufferResetFlags flags)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_reset_cmd_buffer(cmd_buffer);
- return VK_SUCCESS;
+ return radv_reset_cmd_buffer(cmd_buffer);
}
static void emit_gfx_buffer_state(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = cmd_buffer->device;
if (device->gfx_init) {
- uint64_t va = device->ws->buffer_get_va(device->gfx_init);
+ uint64_t va = radv_buffer_get_va(device->gfx_init);
device->ws->cs_add_buffer(cmd_buffer->cs, device->gfx_init, 8);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
radeon_emit(cmd_buffer->cs, va);
@@ -1985,7 +2157,11 @@
const VkCommandBufferBeginInfo *pBeginInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_reset_cmd_buffer(cmd_buffer);
+ VkResult result;
+
+ result = radv_reset_cmd_buffer(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return result;
memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
cmd_buffer->state.last_primitive_reset_en = -1;
@@ -1996,7 +2172,6 @@
switch (cmd_buffer->queue_family_index) {
case RADV_QUEUE_GENERAL:
emit_gfx_buffer_state(cmd_buffer);
- radv_set_db_count_control(cmd_buffer);
break;
case RADV_QUEUE_COMPUTE:
si_init_compute(cmd_buffer);
@@ -2008,18 +2183,22 @@
}
if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
+ assert(pBeginInfo->pInheritanceInfo);
cmd_buffer->state.framebuffer = radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
cmd_buffer->state.pass = radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
struct radv_subpass *subpass =
&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
- radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL);
+ result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL);
+ if (result != VK_SUCCESS)
+ return result;
+
radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
}
radv_cmd_buffer_trace_emit(cmd_buffer);
- return VK_SUCCESS;
+ return result;
}
void radv_CmdBindVertexBuffers(
@@ -2039,8 +2218,9 @@
for (uint32_t i = 0; i < bindingCount; i++) {
vb[firstBinding + i].buffer = radv_buffer_from_handle(pBuffers[i]);
vb[firstBinding + i].offset = pOffsets[i];
- cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i);
}
+
+ cmd_buffer->state.vb_dirty = true;
}
void radv_CmdBindIndexBuffer(
@@ -2053,7 +2233,7 @@
RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer);
cmd_buffer->state.index_type = indexType; /* vk matches hw */
- cmd_buffer->state.index_va = cmd_buffer->device->ws->buffer_get_va(index_buffer->bo);
+ cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo);
cmd_buffer->state.index_va += index_buffer->offset + offset;
int index_size_shift = cmd_buffer->state.index_type ? 2 : 1;
@@ -2142,7 +2322,7 @@
if (!set->mapped_ptr) {
cmd_buffer->push_descriptors.capacity = 0;
- cmd_buffer->record_fail = true;
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
return false;
}
@@ -2164,6 +2344,7 @@
struct radv_descriptor_set *push_set = &cmd_buffer->meta_push_descriptors;
unsigned bo_offset;
+ assert(set == 0);
assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
push_set->size = layout->set[set].layout->size;
@@ -2174,7 +2355,7 @@
(void**) &push_set->mapped_ptr))
return;
- push_set->va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+ push_set->va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
push_set->va += bo_offset;
radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
@@ -2258,16 +2439,17 @@
si_emit_cache_flush(cmd_buffer);
}
- if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
- cmd_buffer->record_fail)
+ vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+
+ if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
- return VK_SUCCESS;
+
+ return cmd_buffer->record_result;
}
static void
radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
struct radv_shader_variant *compute_shader;
struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
uint64_t va;
@@ -2278,10 +2460,9 @@
cmd_buffer->state.emitted_compute_pipeline = pipeline;
compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
- va = ws->buffer_get_va(compute_shader->bo);
+ va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
- ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8);
- radv_emit_prefetch(cmd_buffer, va, compute_shader->code_size);
+ radv_emit_shader_prefetch(cmd_buffer, compute_shader);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
cmd_buffer->cs, 16);
@@ -2313,6 +2494,7 @@
S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]));
assert(cmd_buffer->cs->cdw <= cdw_max);
+ radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
}
static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer)
@@ -2331,14 +2513,20 @@
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
- radv_mark_descriptor_sets_dirty(cmd_buffer);
-
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_COMPUTE:
+ if (cmd_buffer->state.compute_pipeline == pipeline)
+ return;
+ radv_mark_descriptor_sets_dirty(cmd_buffer);
+
cmd_buffer->state.compute_pipeline = pipeline;
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
break;
case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ if (cmd_buffer->state.pipeline == pipeline)
+ return;
+ radv_mark_descriptor_sets_dirty(cmd_buffer);
+
cmd_buffer->state.pipeline = pipeline;
if (!pipeline)
break;
@@ -2382,10 +2570,10 @@
const VkViewport* pViewports)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ MAYBE_UNUSED const uint32_t total_count = firstViewport + viewportCount;
- const uint32_t total_count = firstViewport + viewportCount;
- if (cmd_buffer->state.dynamic.viewport.count < total_count)
- cmd_buffer->state.dynamic.viewport.count = total_count;
+ assert(firstViewport < MAX_VIEWPORTS);
+ assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport,
pViewports, viewportCount * sizeof(*pViewports));
@@ -2400,10 +2588,10 @@
const VkRect2D* pScissors)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ MAYBE_UNUSED const uint32_t total_count = firstScissor + scissorCount;
- const uint32_t total_count = firstScissor + scissorCount;
- if (cmd_buffer->state.dynamic.scissor.count < total_count)
- cmd_buffer->state.dynamic.scissor.count = total_count;
+ assert(firstScissor < MAX_SCISSORS);
+ assert(total_count >= 1 && total_count <= MAX_SCISSORS);
memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor,
pScissors, scissorCount * sizeof(*pScissors));
@@ -2511,6 +2699,8 @@
{
RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
+ assert(commandBufferCount > 0);
+
/* Emit pending flushes on primary prior to executing secondary */
si_emit_cache_flush(primary);
@@ -2538,18 +2728,48 @@
assert(secondary->ring_offsets_idx == primary->ring_offsets_idx);
}
primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
- }
- /* if we execute secondary we need to re-emit out pipelines */
- if (commandBufferCount) {
- primary->state.emitted_pipeline = NULL;
- primary->state.emitted_compute_pipeline = NULL;
- primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
- primary->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL;
- primary->state.last_primitive_reset_en = -1;
- primary->state.last_primitive_reset_index = 0;
- radv_mark_descriptor_sets_dirty(primary);
+
+ /* When the secondary command buffer is compute only we don't
+ * need to re-emit the current graphics pipeline.
+ */
+ if (secondary->state.emitted_pipeline) {
+ primary->state.emitted_pipeline =
+ secondary->state.emitted_pipeline;
+ }
+
+ /* When the secondary command buffer is graphics only we don't
+ * need to re-emit the current compute pipeline.
+ */
+ if (secondary->state.emitted_compute_pipeline) {
+ primary->state.emitted_compute_pipeline =
+ secondary->state.emitted_compute_pipeline;
+ }
+
+ /* Only re-emit the draw packets when needed. */
+ if (secondary->state.last_primitive_reset_en != -1) {
+ primary->state.last_primitive_reset_en =
+ secondary->state.last_primitive_reset_en;
+ }
+
+ if (secondary->state.last_primitive_reset_index) {
+ primary->state.last_primitive_reset_index =
+ secondary->state.last_primitive_reset_index;
+ }
+
+ if (secondary->state.last_ia_multi_vgt_param) {
+ primary->state.last_ia_multi_vgt_param =
+ secondary->state.last_ia_multi_vgt_param;
+ }
}
+
+ /* After executing commands from secondary buffers we have to dirty
+ * some states.
+ */
+ primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE |
+ RADV_CMD_DIRTY_INDEX_BUFFER |
+ RADV_CMD_DIRTY_DYNAMIC_ALL;
+ radv_mark_descriptor_sets_dirty(primary);
}
VkResult radv_CreateCommandPool(
@@ -2612,10 +2832,13 @@
VkCommandPoolResetFlags flags)
{
RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
+ VkResult result;
list_for_each_entry(struct radv_cmd_buffer, cmd_buffer,
&pool->cmd_buffers, pool_link) {
- radv_reset_cmd_buffer(cmd_buffer);
+ result = radv_reset_cmd_buffer(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return result;
}
return VK_SUCCESS;
@@ -2648,11 +2871,15 @@
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
cmd_buffer->cs, 2048);
+ MAYBE_UNUSED VkResult result;
cmd_buffer->state.framebuffer = framebuffer;
cmd_buffer->state.pass = pass;
cmd_buffer->state.render_area = pRenderPassBegin->renderArea;
- radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin);
+
+ result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin);
+ if (result != VK_SUCCESS)
+ return;
radv_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses, true);
assert(cmd_buffer->cs->cdw <= cdw_max);
@@ -2675,127 +2902,65 @@
radv_cmd_buffer_clear_subpass(cmd_buffer);
}
-void radv_CmdDraw(
- VkCommandBuffer commandBuffer,
- uint32_t vertexCount,
- uint32_t instanceCount,
- uint32_t firstVertex,
- uint32_t firstInstance)
+static void radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-
- radv_cmd_buffer_flush_state(cmd_buffer, false, (instanceCount > 1), false, vertexCount);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
-
- assert(cmd_buffer->state.pipeline->graphics.vtx_base_sgpr);
- radeon_set_sh_reg_seq(cmd_buffer->cs, cmd_buffer->state.pipeline->graphics.vtx_base_sgpr,
- cmd_buffer->state.pipeline->graphics.vtx_emit_num);
- radeon_emit(cmd_buffer->cs, firstVertex);
- radeon_emit(cmd_buffer->cs, firstInstance);
- if (cmd_buffer->state.pipeline->graphics.vtx_emit_num == 3)
- radeon_emit(cmd_buffer->cs, 0);
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
+ if (!pipeline->shaders[stage])
+ continue;
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX);
+ if (loc->sgpr_idx == -1)
+ continue;
+ uint32_t base_reg = radv_shader_stage_to_user_data_0(stage, cmd_buffer->device->physical_device->rad_info.chip_class, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, cmd_buffer->state.predicating));
- radeon_emit(cmd_buffer->cs, instanceCount);
+ }
+ if (pipeline->gs_copy_shader) {
+ struct ac_userdata_info *loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX];
+ if (loc->sgpr_idx != -1) {
+ uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
+ }
+ }
+}
+static void
+radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer,
+ uint32_t vertex_count)
+{
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
- radeon_emit(cmd_buffer->cs, vertexCount);
+ radeon_emit(cmd_buffer->cs, vertex_count);
radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
- S_0287F0_USE_OPAQUE(0));
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
-
- radv_cmd_buffer_trace_emit(cmd_buffer);
+ S_0287F0_USE_OPAQUE(0));
}
-void radv_CmdDrawIndexed(
- VkCommandBuffer commandBuffer,
- uint32_t indexCount,
- uint32_t instanceCount,
- uint32_t firstIndex,
- int32_t vertexOffset,
- uint32_t firstInstance)
+static void
+radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer,
+ uint64_t index_va,
+ uint32_t index_count)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- int index_size = cmd_buffer->state.index_type ? 4 : 2;
- uint64_t index_va;
-
- radv_cmd_buffer_flush_state(cmd_buffer, true, (instanceCount > 1), false, indexCount);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 16);
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_03090C_VGT_INDEX_TYPE,
- 2, cmd_buffer->state.index_type);
- } else {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
- radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type);
- }
-
- assert(cmd_buffer->state.pipeline->graphics.vtx_base_sgpr);
- radeon_set_sh_reg_seq(cmd_buffer->cs, cmd_buffer->state.pipeline->graphics.vtx_base_sgpr,
- cmd_buffer->state.pipeline->graphics.vtx_emit_num);
- radeon_emit(cmd_buffer->cs, vertexOffset);
- radeon_emit(cmd_buffer->cs, firstInstance);
- if (cmd_buffer->state.pipeline->graphics.vtx_emit_num == 3)
- radeon_emit(cmd_buffer->cs, 0);
-
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
- radeon_emit(cmd_buffer->cs, instanceCount);
-
- index_va = cmd_buffer->state.index_va;
- index_va += firstIndex * index_size;
radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, false));
radeon_emit(cmd_buffer->cs, cmd_buffer->state.max_index_count);
radeon_emit(cmd_buffer->cs, index_va);
- radeon_emit(cmd_buffer->cs, (index_va >> 32UL) & 0xFF);
- radeon_emit(cmd_buffer->cs, indexCount);
+ radeon_emit(cmd_buffer->cs, index_va >> 32);
+ radeon_emit(cmd_buffer->cs, index_count);
radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void
-radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
- VkBuffer _buffer,
- VkDeviceSize offset,
- VkBuffer _count_buffer,
- VkDeviceSize count_offset,
- uint32_t draw_count,
- uint32_t stride,
- bool indexed)
+radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer,
+ bool indexed,
+ uint32_t draw_count,
+ uint64_t count_va,
+ uint32_t stride)
{
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- RADV_FROM_HANDLE(radv_buffer, count_buffer, _count_buffer);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA
- : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
- uint64_t indirect_va = cmd_buffer->device->ws->buffer_get_va(buffer->bo);
- indirect_va += offset + buffer->offset;
- uint64_t count_va = 0;
-
- if (count_buffer) {
- count_va = cmd_buffer->device->ws->buffer_get_va(count_buffer->bo);
- count_va += count_offset + count_buffer->offset;
-
- cmd_buffer->device->ws->cs_add_buffer(cs, count_buffer->bo, 8);
- }
-
- if (!draw_count)
- return;
-
- cmd_buffer->device->ws->cs_add_buffer(cs, buffer->bo, 8);
- bool draw_id_enable = cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX]->info.info.vs.needs_draw_id;
+ : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
+ bool draw_id_enable = radv_get_vertex_shader(cmd_buffer->state.pipeline)->info.info.vs.needs_draw_id;
uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr;
assert(base_reg);
- radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
- radeon_emit(cs, 1);
- radeon_emit(cs, indirect_va);
- radeon_emit(cs, indirect_va >> 32);
-
if (draw_count == 1 && !count_va && !draw_id_enable) {
radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT :
PKT3_DRAW_INDIRECT, 3, false));
@@ -2819,123 +2984,531 @@
radeon_emit(cs, stride); /* stride */
radeon_emit(cs, di_src_sel);
}
-
- radv_cmd_buffer_trace_emit(cmd_buffer);
}
+struct radv_draw_info {
+ /**
+ * Number of vertices.
+ */
+ uint32_t count;
+
+ /**
+ * Index of the first vertex.
+ */
+ int32_t vertex_offset;
+
+ /**
+ * First instance id.
+ */
+ uint32_t first_instance;
+
+ /**
+ * Number of instances.
+ */
+ uint32_t instance_count;
+
+ /**
+ * First index (indexed draws only).
+ */
+ uint32_t first_index;
+
+ /**
+ * Whether it's an indexed draw.
+ */
+ bool indexed;
+
+ /**
+ * Indirect draw parameters resource.
+ */
+ struct radv_buffer *indirect;
+ uint64_t indirect_offset;
+ uint32_t stride;
+
+ /**
+ * Draw count parameters resource.
+ */
+ struct radv_buffer *count_buffer;
+ uint64_t count_buffer_offset;
+};
+
static void
-radv_cmd_draw_indirect_count(VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkBuffer countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
+radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_draw_info *info)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_cmd_buffer_flush_state(cmd_buffer, false, false, true, 0);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radeon_winsys *ws = cmd_buffer->device->ws;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 14);
+ if (info->indirect) {
+ uint64_t va = radv_buffer_get_va(info->indirect->bo);
+ uint64_t count_va = 0;
- radv_emit_indirect_draw(cmd_buffer, buffer, offset,
- countBuffer, countBufferOffset, maxDrawCount, stride, false);
+ va += info->indirect->offset + info->indirect_offset;
- assert(cmd_buffer->cs->cdw <= cdw_max);
+ ws->cs_add_buffer(cs, info->indirect->bo, 8);
+
+ radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
+ radeon_emit(cs, 1);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ if (info->count_buffer) {
+ count_va = radv_buffer_get_va(info->count_buffer->bo);
+ count_va += info->count_buffer->offset +
+ info->count_buffer_offset;
+
+ ws->cs_add_buffer(cs, info->count_buffer->bo, 8);
+ }
+
+ if (!state->subpass->view_mask) {
+ radv_cs_emit_indirect_draw_packet(cmd_buffer,
+ info->indexed,
+ info->count,
+ count_va,
+ info->stride);
+ } else {
+ unsigned i;
+ for_each_bit(i, state->subpass->view_mask) {
+ radv_emit_view_index(cmd_buffer, i);
+
+ radv_cs_emit_indirect_draw_packet(cmd_buffer,
+ info->indexed,
+ info->count,
+ count_va,
+ info->stride);
+ }
+ }
+ } else {
+ assert(state->pipeline->graphics.vtx_base_sgpr);
+ radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr,
+ state->pipeline->graphics.vtx_emit_num);
+ radeon_emit(cs, info->vertex_offset);
+ radeon_emit(cs, info->first_instance);
+ if (state->pipeline->graphics.vtx_emit_num == 3)
+ radeon_emit(cs, 0);
+
+ radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, state->predicating));
+ radeon_emit(cs, info->instance_count);
+
+ if (info->indexed) {
+ int index_size = state->index_type ? 4 : 2;
+ uint64_t index_va;
+
+ index_va = state->index_va;
+ index_va += info->first_index * index_size;
+
+ if (!state->subpass->view_mask) {
+ radv_cs_emit_draw_indexed_packet(cmd_buffer,
+ index_va,
+ info->count);
+ } else {
+ unsigned i;
+ for_each_bit(i, state->subpass->view_mask) {
+ radv_emit_view_index(cmd_buffer, i);
+
+ radv_cs_emit_draw_indexed_packet(cmd_buffer,
+ index_va,
+ info->count);
+ }
+ }
+ } else {
+ if (!state->subpass->view_mask) {
+ radv_cs_emit_draw_packet(cmd_buffer, info->count);
+ } else {
+ unsigned i;
+ for_each_bit(i, state->subpass->view_mask) {
+ radv_emit_view_index(cmd_buffer, i);
+
+ radv_cs_emit_draw_packet(cmd_buffer,
+ info->count);
+ }
+ }
+ }
+ }
}
static void
-radv_cmd_draw_indexed_indirect_count(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkBuffer countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
+radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_draw_info *info)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- uint64_t index_va;
- radv_cmd_buffer_flush_state(cmd_buffer, true, false, true, 0);
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
+ radv_emit_graphics_pipeline(cmd_buffer);
- index_va = cmd_buffer->state.index_va;
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
+ radv_emit_framebuffer_state(cmd_buffer);
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 21);
+ if (info->indexed) {
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER)
+ radv_emit_index_buffer(cmd_buffer);
+ } else {
+ /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
+ * so the state must be re-emitted before the next indexed
+ * draw.
+ */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
+ }
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
- radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type);
+ radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_BASE, 1, 0));
- radeon_emit(cmd_buffer->cs, index_va);
- radeon_emit(cmd_buffer->cs, index_va >> 32);
+ radv_emit_draw_registers(cmd_buffer, info->indexed,
+ info->instance_count > 1, info->indirect,
+ info->indirect ? 0 : info->count);
+}
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
- radeon_emit(cmd_buffer->cs, cmd_buffer->state.max_index_count);
+static void
+radv_draw(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_draw_info *info)
+{
+ bool pipeline_is_dirty =
+ (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) &&
+ cmd_buffer->state.pipeline &&
+ cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline;
+
+ MAYBE_UNUSED unsigned cdw_max =
+ radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, 4096);
+
+ /* Use optimal packet order based on whether we need to sync the
+ * pipeline.
+ */
+ if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+ /* If we have to wait for idle, set all states first, so that
+ * all SET packets are processed in parallel with previous draw
+ * calls. Then upload descriptors, set shader pointers, and
+ * draw, and prefetch at the end. This ensures that the time
+ * the CUs are idle is very short. (there are only SET_SH
+ * packets between the wait and the draw)
+ */
+ radv_emit_all_graphics_states(cmd_buffer, info);
+ si_emit_cache_flush(cmd_buffer);
+ /* <-- CUs are idle here --> */
+
+ if (!radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty))
+ return;
+
+ radv_emit_draw_packets(cmd_buffer, info);
+ /* <-- CUs are busy here --> */
+
+ /* Start prefetches after the draw has been started. Both will
+ * run in parallel, but starting the draw first is more
+ * important.
+ */
+ if (pipeline_is_dirty) {
+ radv_emit_shaders_prefetch(cmd_buffer,
+ cmd_buffer->state.pipeline);
+ }
+ } else {
+ /* If we don't wait for idle, start prefetches first, then set
+ * states, and draw at the end.
+ */
+ si_emit_cache_flush(cmd_buffer);
- radv_emit_indirect_draw(cmd_buffer, buffer, offset,
- countBuffer, countBufferOffset, maxDrawCount, stride, true);
+ if (pipeline_is_dirty) {
+ radv_emit_shaders_prefetch(cmd_buffer,
+ cmd_buffer->state.pipeline);
+ }
+
+ if (!radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty))
+ return;
+
+ radv_emit_all_graphics_states(cmd_buffer, info);
+ radv_emit_draw_packets(cmd_buffer, info);
+ }
assert(cmd_buffer->cs->cdw <= cdw_max);
+ radv_cmd_buffer_after_draw(cmd_buffer);
+}
+
+void radv_CmdDraw(
+ VkCommandBuffer commandBuffer,
+ uint32_t vertexCount,
+ uint32_t instanceCount,
+ uint32_t firstVertex,
+ uint32_t firstInstance)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_draw_info info = {};
+
+ info.count = vertexCount;
+ info.instance_count = instanceCount;
+ info.first_instance = firstInstance;
+ info.vertex_offset = firstVertex;
+
+ radv_draw(cmd_buffer, &info);
+}
+
+void radv_CmdDrawIndexed(
+ VkCommandBuffer commandBuffer,
+ uint32_t indexCount,
+ uint32_t instanceCount,
+ uint32_t firstIndex,
+ int32_t vertexOffset,
+ uint32_t firstInstance)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_draw_info info = {};
+
+ info.indexed = true;
+ info.count = indexCount;
+ info.instance_count = instanceCount;
+ info.first_index = firstIndex;
+ info.vertex_offset = vertexOffset;
+ info.first_instance = firstInstance;
+
+ radv_draw(cmd_buffer, &info);
}
void radv_CmdDrawIndirect(
VkCommandBuffer commandBuffer,
- VkBuffer buffer,
+ VkBuffer _buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
- radv_cmd_draw_indirect_count(commandBuffer, buffer, offset,
- VK_NULL_HANDLE, 0, drawCount, stride);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ struct radv_draw_info info = {};
+
+ info.count = drawCount;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
+ info.stride = stride;
+
+ radv_draw(cmd_buffer, &info);
}
void radv_CmdDrawIndexedIndirect(
VkCommandBuffer commandBuffer,
- VkBuffer buffer,
+ VkBuffer _buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
- radv_cmd_draw_indexed_indirect_count(commandBuffer, buffer, offset,
- VK_NULL_HANDLE, 0, drawCount, stride);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ struct radv_draw_info info = {};
+
+ info.indexed = true;
+ info.count = drawCount;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
+ info.stride = stride;
+
+ radv_draw(cmd_buffer, &info);
}
void radv_CmdDrawIndirectCountAMD(
VkCommandBuffer commandBuffer,
- VkBuffer buffer,
+ VkBuffer _buffer,
VkDeviceSize offset,
- VkBuffer countBuffer,
+ VkBuffer _countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount,
uint32_t stride)
{
- radv_cmd_draw_indirect_count(commandBuffer, buffer, offset,
- countBuffer, countBufferOffset,
- maxDrawCount, stride);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
+ struct radv_draw_info info = {};
+
+ info.count = maxDrawCount;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
+ info.count_buffer = count_buffer;
+ info.count_buffer_offset = countBufferOffset;
+ info.stride = stride;
+
+ radv_draw(cmd_buffer, &info);
}
void radv_CmdDrawIndexedIndirectCountAMD(
VkCommandBuffer commandBuffer,
- VkBuffer buffer,
+ VkBuffer _buffer,
VkDeviceSize offset,
- VkBuffer countBuffer,
+ VkBuffer _countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount,
uint32_t stride)
{
- radv_cmd_draw_indexed_indirect_count(commandBuffer, buffer, offset,
- countBuffer, countBufferOffset,
- maxDrawCount, stride);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
+ struct radv_draw_info info = {};
+
+ info.indexed = true;
+ info.count = maxDrawCount;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
+ info.count_buffer = count_buffer;
+ info.count_buffer_offset = countBufferOffset;
+ info.stride = stride;
+
+ radv_draw(cmd_buffer, &info);
+}
+
+struct radv_dispatch_info {
+ /**
+ * Determine the layout of the grid (in block units) to be used.
+ */
+ uint32_t blocks[3];
+
+ /**
+ * Whether it's an unaligned compute dispatch.
+ */
+ bool unaligned;
+
+ /**
+ * Indirect compute parameters resource.
+ */
+ struct radv_buffer *indirect;
+ uint64_t indirect_offset;
+};
+
+static void
+radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_dispatch_info *info)
+{
+ struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+ struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+ struct radeon_winsys *ws = cmd_buffer->device->ws;
+ struct radeon_winsys_cs *cs = cmd_buffer->cs;
+ struct ac_userdata_info *loc;
+ unsigned dispatch_initiator;
+ uint8_t grid_used;
+
+ grid_used = compute_shader->info.info.cs.grid_components_used;
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE,
+ AC_UD_CS_GRID_SIZE);
+
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(ws, cs, 25);
+
+ dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
+ S_00B800_FORCE_START_AT_000(1);
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ /* If the KMD allows it (there is a KMD hw register for it),
+ * allow launching waves out-of-order.
+ */
+ dispatch_initiator |= S_00B800_ORDER_MODE(1);
+ }
+
+ if (info->indirect) {
+ uint64_t va = radv_buffer_get_va(info->indirect->bo);
+
+ va += info->indirect->offset + info->indirect_offset;
+
+ ws->cs_add_buffer(cs, info->indirect->bo, 8);
+
+ if (loc->sgpr_idx != -1) {
+ for (unsigned i = 0; i < grid_used; ++i) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ COPY_DATA_DST_SEL(COPY_DATA_REG));
+ radeon_emit(cs, (va + 4 * i));
+ radeon_emit(cs, (va + 4 * i) >> 32);
+ radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0
+ + loc->sgpr_idx * 4) >> 2) + i);
+ radeon_emit(cs, 0);
+ }
+ }
+
+ if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, dispatch_initiator);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, 1);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, dispatch_initiator);
+ }
+ } else {
+ unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] };
+
+ if (info->unaligned) {
+ unsigned *cs_block_size = compute_shader->info.cs.block_size;
+ unsigned remainder[3];
+
+ /* If aligned, these should be an entire block size,
+ * not 0.
+ */
+ remainder[0] = blocks[0] + cs_block_size[0] -
+ align_u32_npot(blocks[0], cs_block_size[0]);
+ remainder[1] = blocks[1] + cs_block_size[1] -
+ align_u32_npot(blocks[1], cs_block_size[1]);
+ remainder[2] = blocks[2] + cs_block_size[2] -
+ align_u32_npot(blocks[2], cs_block_size[2]);
+
+ blocks[0] = round_up_u32(blocks[0], cs_block_size[0]);
+ blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
+ blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
+
+ radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
+ radeon_emit(cs,
+ S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
+
+ dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
+ }
+
+ if (loc->sgpr_idx != -1) {
+ assert(!loc->indirect);
+ assert(loc->num_sgprs == grid_used);
+
+ radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
+ loc->sgpr_idx * 4, grid_used);
+ radeon_emit(cs, blocks[0]);
+ if (grid_used > 1)
+ radeon_emit(cs, blocks[1]);
+ if (grid_used > 2)
+ radeon_emit(cs, blocks[2]);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, blocks[0]);
+ radeon_emit(cs, blocks[1]);
+ radeon_emit(cs, blocks[2]);
+ radeon_emit(cs, dispatch_initiator);
+ }
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
}
static void
-radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer)
+radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_dispatch_info *info)
{
radv_emit_compute_pipeline(cmd_buffer);
+
radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline,
VK_SHADER_STAGE_COMPUTE_BIT);
+
si_emit_cache_flush(cmd_buffer);
+
+ radv_emit_dispatch_packets(cmd_buffer, info);
+
+ radv_cmd_buffer_after_draw(cmd_buffer);
}
void radv_CmdDispatch(
@@ -2945,34 +3518,13 @@
uint32_t z)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_dispatch_info info = {};
- radv_flush_compute_state(cmd_buffer);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
-
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
- assert(!loc->indirect);
- uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
- assert(loc->num_sgprs == grid_used);
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
- radeon_emit(cmd_buffer->cs, x);
- if (grid_used > 1)
- radeon_emit(cmd_buffer->cs, y);
- if (grid_used > 2)
- radeon_emit(cmd_buffer->cs, z);
- }
-
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, x);
- radeon_emit(cmd_buffer->cs, y);
- radeon_emit(cmd_buffer->cs, z);
- radeon_emit(cmd_buffer->cs, 1);
+ info.blocks[0] = x;
+ info.blocks[1] = y;
+ info.blocks[2] = z;
- assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_cmd_buffer_trace_emit(cmd_buffer);
+ radv_dispatch(cmd_buffer, &info);
}
void radv_CmdDispatchIndirect(
@@ -2982,50 +3534,12 @@
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(buffer->bo);
- va += buffer->offset + offset;
-
- cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, buffer->bo, 8);
+ struct radv_dispatch_info info = {};
- radv_flush_compute_state(cmd_buffer);
+ info.indirect = buffer;
+ info.indirect_offset = offset;
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25);
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
- uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
- for (unsigned i = 0; i < grid_used; ++i) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG));
- radeon_emit(cmd_buffer->cs, (va + 4 * i));
- radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32);
- radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i);
- radeon_emit(cmd_buffer->cs, 0);
- }
- }
-
- if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
- radeon_emit(cmd_buffer->cs, 1);
- } else {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, 1);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
-
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, 1);
- }
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_cmd_buffer_trace_emit(cmd_buffer);
+ radv_dispatch(cmd_buffer, &info);
}
void radv_unaligned_dispatch(
@@ -3034,55 +3548,14 @@
uint32_t y,
uint32_t z)
{
- struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
- uint32_t blocks[3], remainder[3];
-
- blocks[0] = round_up_u32(x, compute_shader->info.cs.block_size[0]);
- blocks[1] = round_up_u32(y, compute_shader->info.cs.block_size[1]);
- blocks[2] = round_up_u32(z, compute_shader->info.cs.block_size[2]);
-
- /* If aligned, these should be an entire block size, not 0 */
- remainder[0] = x + compute_shader->info.cs.block_size[0] - align_u32_npot(x, compute_shader->info.cs.block_size[0]);
- remainder[1] = y + compute_shader->info.cs.block_size[1] - align_u32_npot(y, compute_shader->info.cs.block_size[1]);
- remainder[2] = z + compute_shader->info.cs.block_size[2] - align_u32_npot(z, compute_shader->info.cs.block_size[2]);
-
- radv_flush_compute_state(cmd_buffer);
-
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
+ struct radv_dispatch_info info = {};
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
- radeon_emit(cmd_buffer->cs,
- S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
+ info.blocks[0] = x;
+ info.blocks[1] = y;
+ info.blocks[2] = z;
+ info.unaligned = 1;
- struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
- MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
- if (loc->sgpr_idx != -1) {
- uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used;
- radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used);
- radeon_emit(cmd_buffer->cs, blocks[0]);
- if (grid_used > 1)
- radeon_emit(cmd_buffer->cs, blocks[1]);
- if (grid_used > 2)
- radeon_emit(cmd_buffer->cs, blocks[2]);
- }
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, blocks[0]);
- radeon_emit(cmd_buffer->cs, blocks[1]);
- radeon_emit(cmd_buffer->cs, blocks[2]);
- radeon_emit(cmd_buffer->cs, S_00B800_COMPUTE_SHADER_EN(1) |
- S_00B800_PARTIAL_TG_EN(1));
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
- radv_cmd_buffer_trace_emit(cmd_buffer);
+ radv_dispatch(cmd_buffer, &info);
}
void radv_CmdEndRenderPass(
@@ -3110,7 +3583,8 @@
/*
* For HTILE we have the following interesting clear words:
- * 0x0000030f: Uncompressed.
+ * 0x0000030f: Uncompressed for depth+stencil HTILE.
+ * 0x0000000f: Uncompressed for depth only HTILE.
* 0xfffffff0: Clear depth to 1.0
* 0x00000000: Clear depth to 0.0
*/
@@ -3159,7 +3633,8 @@
radv_initialize_htile(cmd_buffer, image, range, 0);
} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
- radv_initialize_htile(cmd_buffer, image, range, 0xffffffff);
+ uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0x30f : 0xf;
+ radv_initialize_htile(cmd_buffer, image, range, clear_value);
} else if (radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
!radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
VkImageSubresourceRange local_range = *range;
@@ -3198,8 +3673,7 @@
VkImageLayout dst_layout,
unsigned src_queue_mask,
unsigned dst_queue_mask,
- const VkImageSubresourceRange *range,
- VkImageAspectFlags pending_clears)
+ const VkImageSubresourceRange *range)
{
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
if (image->fmask.size)
@@ -3235,8 +3709,7 @@
VkImageLayout dst_layout,
unsigned src_queue_mask,
unsigned dst_queue_mask,
- const VkImageSubresourceRange *range,
- VkImageAspectFlags pending_clears)
+ const VkImageSubresourceRange *range)
{
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
radv_initialize_dcc(cmd_buffer, image, 0x20202020u);
@@ -3281,17 +3754,15 @@
dst_queue_mask, range,
pending_clears);
- if (image->cmask.size)
+ if (image->cmask.size || image->fmask.size)
radv_handle_cmask_image_transition(cmd_buffer, image, src_layout,
dst_layout, src_queue_mask,
- dst_queue_mask, range,
- pending_clears);
+ dst_queue_mask, range);
if (image->surface.dcc_size)
radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
dst_layout, src_queue_mask,
- dst_queue_mask, range,
- pending_clears);
+ dst_queue_mask, range);
}
void radv_CmdPipelineBarrier(
@@ -3353,7 +3824,7 @@
unsigned value)
{
struct radeon_winsys_cs *cs = cmd_buffer->cs;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(event->bo);
+ uint64_t va = radv_buffer_get_va(event->bo);
cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8);
@@ -3365,8 +3836,8 @@
si_cs_emit_write_event_eop(cs,
cmd_buffer->state.predicating,
cmd_buffer->device->physical_device->rad_info.chip_class,
- false,
- EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
+ radv_cmd_buffer_uses_mec(cmd_buffer),
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, va, 2, value);
assert(cmd_buffer->cs->cdw <= cdw_max);
@@ -3409,7 +3880,7 @@
for (unsigned i = 0; i < eventCount; ++i) {
RADV_FROM_HANDLE(radv_event, event, pEvents[i]);
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(event->bo);
+ uint64_t va = radv_buffer_get_va(event->bo);
cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8);
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_cs.h mesa-17.3.3/src/amd/vulkan/radv_cs.h
--- mesa-17.2.4/src/amd/vulkan/radv_cs.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_cs.h 2018-01-18 21:30:28.000000000 +0000
@@ -28,7 +28,7 @@
#include
#include
#include
-#include "r600d_common.h"
+#include "sid.h"
static inline unsigned radeon_check_space(struct radeon_winsys *ws,
struct radeon_winsys_cs *cs,
@@ -41,11 +41,11 @@
static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
- assert(reg < R600_CONTEXT_REG_OFFSET);
+ assert(reg < SI_CONTEXT_REG_OFFSET);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
- radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
+ radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
@@ -56,11 +56,11 @@
static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
- assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(reg >= SI_CONTEXT_REG_OFFSET);
assert(cs->cdw + 2 + num <= cs->max_dw);
assert(num);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
- radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
}
static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
@@ -74,10 +74,10 @@
unsigned reg, unsigned idx,
unsigned value)
{
- assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(reg >= SI_CONTEXT_REG_OFFSET);
assert(cs->cdw + 3 <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
- radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_debug.c mesa-17.3.3/src/amd/vulkan/radv_debug.c
--- mesa-17.2.4/src/amd/vulkan/radv_debug.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_debug.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,736 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include
+#include
+
+#include "sid.h"
+#include "gfx9d.h"
+#include "ac_debug.h"
+#include "radv_debug.h"
+#include "radv_shader.h"
+
+#define TRACE_BO_SIZE 4096
+
+#define COLOR_RESET "\033[0m"
+#define COLOR_RED "\033[31m"
+#define COLOR_GREEN "\033[1;32m"
+#define COLOR_YELLOW "\033[1;33m"
+#define COLOR_CYAN "\033[1;36m"
+
+/* Trace BO layout (offsets are 4 bytes):
+ *
+ * [0]: primary trace ID
+ * [1]: secondary trace ID
+ * [2-3]: 64-bit GFX pipeline pointer
+ * [4-5]: 64-bit COMPUTE pipeline pointer
+ * [6-7]: 64-bit descriptor set #0 pointer
+ * ...
+ * [68-69]: 64-bit descriptor set #31 pointer
+ */
+
+bool
+radv_init_trace(struct radv_device *device)
+{
+ struct radeon_winsys *ws = device->ws;
+
+ device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS);
+ if (!device->trace_bo)
+ return false;
+
+ device->trace_id_ptr = ws->buffer_map(device->trace_bo);
+ if (!device->trace_id_ptr)
+ return false;
+
+ memset(device->trace_id_ptr, 0, TRACE_BO_SIZE);
+
+ ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
+ &device->dmesg_timestamp, NULL);
+
+ return true;
+}
+
+static void
+radv_dump_trace(struct radv_device *device, struct radeon_winsys_cs *cs)
+{
+ const char *filename = getenv("RADV_TRACE_FILE");
+ FILE *f = fopen(filename, "w");
+
+ if (!f) {
+ fprintf(stderr, "Failed to write trace dump to %s\n", filename);
+ return;
+ }
+
+ fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
+ device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
+ fclose(f);
+}
+
+static void
+radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
+{
+ struct radeon_winsys *ws = device->ws;
+ uint32_t value;
+
+ if (ws->read_registers(ws, offset, 1, &value))
+ ac_dump_reg(f, device->physical_device->rad_info.chip_class,
+ offset, value, ~0);
+}
+
+static void
+radv_dump_debug_registers(struct radv_device *device, FILE *f)
+{
+ struct radeon_info *info = &device->physical_device->rad_info;
+
+ if (info->drm_major == 2 && info->drm_minor < 42)
+ return; /* no radeon support */
+
+ fprintf(f, "Memory-mapped registers:\n");
+ radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
+
+ /* No other registers can be read on DRM < 3.1.0. */
+ if (info->drm_major < 3 || info->drm_minor < 1) {
+ fprintf(f, "\n");
+ return;
+ }
+
+ radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
+ radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
+ radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
+ radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
+ radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
+ radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
+ radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
+ if (info->chip_class <= VI) {
+ radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
+ radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
+ radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
+ }
+ radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
+ radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
+ radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
+ radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
+ radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
+ radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
+ radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
+ radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
+ radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
+ radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
+ fprintf(f, "\n");
+}
+
+static const char *
+radv_get_descriptor_name(enum VkDescriptorType type)
+{
+ switch (type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ return "SAMPLER";
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ return "COMBINED_IMAGE_SAMPLER";
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ return "SAMPLED_IMAGE";
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ return "STORAGE_IMAGE";
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ return "UNIFORM_TEXEL_BUFFER";
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ return "STORAGE_TEXEL_BUFFER";
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ return "UNIFORM_BUFFER";
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ return "STORAGE_BUFFER";
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ return "UNIFORM_BUFFER_DYNAMIC";
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ return "STORAGE_BUFFER_DYNAMIC";
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ return "INPUT_ATTACHMENT";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static void
+radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc,
+ FILE *f)
+{
+ fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 4; j++)
+ ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4,
+ desc[j], 0xffffffff);
+}
+
+static void
+radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc,
+ FILE *f)
+{
+ fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chip_class, R_008F10_SQ_IMG_RSRC_WORD0 + j * 4,
+ desc[j], 0xffffffff);
+
+ fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chip_class, R_008F10_SQ_IMG_RSRC_WORD0 + j * 4,
+ desc[8 + j], 0xffffffff);
+}
+
+static void
+radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
+ FILE *f)
+{
+ fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 4; j++) {
+ ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4,
+ desc[j], 0xffffffff);
+ }
+}
+
+static void
+radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,
+ const uint32_t *desc, FILE *f)
+{
+ radv_dump_image_descriptor(chip_class, desc, f);
+ radv_dump_sampler_descriptor(chip_class, desc + 16, f);
+}
+
+static void
+radv_dump_descriptor_set(enum chip_class chip_class,
+ struct radv_descriptor_set *set, unsigned id, FILE *f)
+{
+ const struct radv_descriptor_set_layout *layout;
+ int i;
+
+ if (!set)
+ return;
+ layout = set->layout;
+
+ fprintf(f, "** descriptor set (%d) **\n", id);
+ fprintf(f, "va: 0x%"PRIx64"\n", set->va);
+ fprintf(f, "size: %d\n", set->size);
+ fprintf(f, "mapped_ptr:\n");
+
+ for (i = 0; i < set->size / 4; i++) {
+ fprintf(f, "\t[0x%x] = 0x%08x\n", i, set->mapped_ptr[i]);
+ }
+ fprintf(f, "\n");
+
+ fprintf(f, "\t*** layout ***\n");
+ fprintf(f, "\tbinding_count: %d\n", layout->binding_count);
+ fprintf(f, "\tsize: %d\n", layout->size);
+ fprintf(f, "\tshader_stages: %x\n", layout->shader_stages);
+ fprintf(f, "\tdynamic_shader_stages: %x\n",
+ layout->dynamic_shader_stages);
+ fprintf(f, "\tbuffer_count: %d\n", layout->buffer_count);
+ fprintf(f, "\tdynamic_offset_count: %d\n",
+ layout->dynamic_offset_count);
+ fprintf(f, "\n");
+
+ for (i = 0; i < set->layout->binding_count; i++) {
+ uint32_t *desc =
+ set->mapped_ptr + layout->binding[i].offset / 4;
+
+ fprintf(f, "\t\t**** binding layout (%d) ****\n", i);
+ fprintf(f, "\t\ttype: %s\n",
+ radv_get_descriptor_name(layout->binding[i].type));
+ fprintf(f, "\t\tarray_size: %d\n",
+ layout->binding[i].array_size);
+ fprintf(f, "\t\toffset: %d\n",
+ layout->binding[i].offset);
+ fprintf(f, "\t\tbuffer_offset: %d\n",
+ layout->binding[i].buffer_offset);
+ fprintf(f, "\t\tdynamic_offset_offset: %d\n",
+ layout->binding[i].dynamic_offset_offset);
+ fprintf(f, "\t\tdynamic_offset_count: %d\n",
+ layout->binding[i].dynamic_offset_count);
+ fprintf(f, "\t\tsize: %d\n",
+ layout->binding[i].size);
+ fprintf(f, "\t\timmutable_samplers_offset: %d\n",
+ layout->binding[i].immutable_samplers_offset);
+ fprintf(f, "\t\timmutable_samplers_equal: %d\n",
+ layout->binding[i].immutable_samplers_equal);
+ fprintf(f, "\n");
+
+ switch (layout->binding[i].type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ radv_dump_buffer_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ radv_dump_image_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ radv_dump_sampler_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ /* todo */
+ break;
+ default:
+ assert(!"unknown descriptor type");
+ break;
+ }
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n\n");
+}
+
+static void
+radv_dump_descriptors(struct radv_pipeline *pipeline, FILE *f)
+{
+ struct radv_device *device = pipeline->device;
+ enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+ uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+ int i;
+
+ fprintf(f, "List of descriptors:\n");
+ for (i = 0; i < MAX_SETS; i++) {
+ struct radv_descriptor_set *set =
+ (struct radv_descriptor_set *)ptr[i + 3];
+
+ radv_dump_descriptor_set(chip_class, set, i, f);
+ }
+}
+
+struct radv_shader_inst {
+ char text[160]; /* one disasm line */
+ unsigned offset; /* instruction offset */
+ unsigned size; /* instruction size = 4 or 8 */
+};
+
+/* Split a disassembly string into lines and add them to the array pointed
+ * to by "instructions". */
+static void si_add_split_disasm(const char *disasm,
+ uint64_t start_addr,
+ unsigned *num,
+ struct radv_shader_inst *instructions)
+{
+ struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
+ char *next;
+
+ while ((next = strchr(disasm, '\n'))) {
+ struct radv_shader_inst *inst = &instructions[*num];
+ unsigned len = next - disasm;
+
+ assert(len < ARRAY_SIZE(inst->text));
+ memcpy(inst->text, disasm, len);
+ inst->text[len] = 0;
+ inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
+
+ const char *semicolon = strchr(disasm, ';');
+ assert(semicolon);
+ /* More than 16 chars after ";" means the instruction is 8 bytes long. */
+ inst->size = next - semicolon > 16 ? 8 : 4;
+
+ snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
+ " [PC=0x%"PRIx64", off=%u, size=%u]",
+ start_addr + inst->offset, inst->offset, inst->size);
+
+ last_inst = inst;
+ (*num)++;
+ disasm = next + 1;
+ }
+}
+
+static void
+radv_dump_annotated_shader(struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader,
+ gl_shader_stage stage,
+ struct ac_wave_info *waves, unsigned num_waves,
+ FILE *f)
+{
+ uint64_t start_addr, end_addr;
+ unsigned i;
+
+ if (!shader)
+ return;
+
+ start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ end_addr = start_addr + shader->code_size;
+
+ /* See if any wave executes the shader. */
+ for (i = 0; i < num_waves; i++) {
+ if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
+ break;
+ }
+
+ if (i == num_waves)
+ return; /* the shader is not being executed */
+
+ /* Remember the first found wave. The waves are sorted according to PC. */
+ waves = &waves[i];
+ num_waves -= i;
+
+ /* Get the list of instructions.
+ * Buffer size / 4 is the upper bound of the instruction count.
+ */
+ unsigned num_inst = 0;
+ struct radv_shader_inst *instructions =
+ calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
+
+ si_add_split_disasm(shader->disasm_string,
+ start_addr, &num_inst, instructions);
+
+ fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
+ radv_get_shader_name(shader, stage));
+
+ /* Print instructions with annotations. */
+ for (i = 0; i < num_inst; i++) {
+ struct radv_shader_inst *inst = &instructions[i];
+
+ fprintf(f, "%s\n", inst->text);
+
+ /* Print which waves execute the instruction right now. */
+ while (num_waves && start_addr + inst->offset == waves->pc) {
+ fprintf(f,
+ " " COLOR_GREEN "^ SE%u SH%u CU%u "
+ "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
+ waves->se, waves->sh, waves->cu, waves->simd,
+ waves->wave, waves->exec);
+
+ if (inst->size == 4) {
+ fprintf(f, "INST32=%08X" COLOR_RESET "\n",
+ waves->inst_dw0);
+ } else {
+ fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
+ waves->inst_dw0, waves->inst_dw1);
+ }
+
+ waves->matched = true;
+ waves = &waves[1];
+ num_waves--;
+ }
+ }
+
+ fprintf(f, "\n\n");
+ free(instructions);
+}
+
+static void
+radv_dump_annotated_shaders(struct radv_pipeline *pipeline,
+ struct radv_shader_variant *compute_shader,
+ FILE *f)
+{
+ struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
+ unsigned num_waves = ac_get_wave_info(waves);
+ unsigned mask;
+
+ fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
+ "\n\n", num_waves);
+
+ /* Dump annotated active graphics shaders. */
+ mask = pipeline->active_stages;
+ while (mask) {
+ int stage = u_bit_scan(&mask);
+
+ radv_dump_annotated_shader(pipeline, pipeline->shaders[stage],
+ stage, waves, num_waves, f);
+ }
+
+ radv_dump_annotated_shader(pipeline, compute_shader,
+ MESA_SHADER_COMPUTE, waves, num_waves, f);
+
+ /* Print waves executing shaders that are not currently bound. */
+ unsigned i;
+ bool found = false;
+ for (i = 0; i < num_waves; i++) {
+ if (waves[i].matched)
+ continue;
+
+ if (!found) {
+ fprintf(f, COLOR_CYAN
+ "Waves not executing currently-bound shaders:"
+ COLOR_RESET "\n");
+ found = true;
+ }
+ fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
+ " INST=%08X %08X PC=%"PRIx64"\n",
+ waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
+ waves[i].wave, waves[i].exec, waves[i].inst_dw0,
+ waves[i].inst_dw1, waves[i].pc);
+ }
+ if (found)
+ fprintf(f, "\n\n");
+}
+
+static void
+radv_dump_shader(struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader, gl_shader_stage stage,
+ FILE *f)
+{
+ if (!shader)
+ return;
+
+ fprintf(f, "%s:\n\n", radv_get_shader_name(shader, stage));
+
+ if (shader->spirv) {
+ fprintf(f, "SPIRV:\n");
+ radv_print_spirv(shader->spirv, shader->spirv_size, f);
+ }
+
+ if (shader->nir) {
+ fprintf(f, "NIR:\n");
+ nir_print_shader(shader->nir, f);
+ }
+
+ fprintf(stderr, "DISASM:\n%s\n", shader->disasm_string);
+
+ radv_shader_dump_stats(pipeline->device, shader, stage, f);
+}
+
+static void
+radv_dump_shaders(struct radv_pipeline *pipeline,
+ struct radv_shader_variant *compute_shader, FILE *f)
+{
+ unsigned mask;
+
+ /* Dump active graphics shaders. */
+ mask = pipeline->active_stages;
+ while (mask) {
+ int stage = u_bit_scan(&mask);
+
+ radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
+ }
+
+ radv_dump_shader(pipeline, compute_shader, MESA_SHADER_COMPUTE, f);
+}
+
+static void
+radv_dump_graphics_state(struct radv_pipeline *graphics_pipeline,
+ struct radv_pipeline *compute_pipeline, FILE *f)
+{
+ struct radv_shader_variant *compute_shader =
+ compute_pipeline ? compute_pipeline->shaders[MESA_SHADER_COMPUTE] : NULL;
+
+ if (!graphics_pipeline)
+ return;
+
+ radv_dump_shaders(graphics_pipeline, compute_shader, f);
+ radv_dump_annotated_shaders(graphics_pipeline, compute_shader, f);
+ radv_dump_descriptors(graphics_pipeline, f);
+}
+
+static void
+radv_dump_compute_state(struct radv_pipeline *compute_pipeline, FILE *f)
+{
+ if (!compute_pipeline)
+ return;
+
+ radv_dump_shaders(compute_pipeline,
+ compute_pipeline->shaders[MESA_SHADER_COMPUTE], f);
+ radv_dump_annotated_shaders(compute_pipeline,
+ compute_pipeline->shaders[MESA_SHADER_COMPUTE],
+ f);
+ radv_dump_descriptors(compute_pipeline, f);
+}
+
+static struct radv_pipeline *
+radv_get_saved_graphics_pipeline(struct radv_device *device)
+{
+ uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+
+ return (struct radv_pipeline *)ptr[1];
+}
+
+static struct radv_pipeline *
+radv_get_saved_compute_pipeline(struct radv_device *device)
+{
+ uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+
+ return (struct radv_pipeline *)ptr[2];
+}
+
+static void
+radv_dump_dmesg(FILE *f)
+{
+ char line[2000];
+ FILE *p;
+
+ p = popen("dmesg | tail -n60", "r");
+ if (!p)
+ return;
+
+ fprintf(f, "\nLast 60 lines of dmesg:\n\n");
+ while (fgets(line, sizeof(line), p))
+ fputs(line, f);
+ fprintf(f, "\n");
+
+ pclose(p);
+}
+
+static void
+radv_dump_enabled_options(struct radv_device *device, FILE *f)
+{
+ uint64_t mask;
+
+ fprintf(f, "Enabled debug options: ");
+
+ mask = device->instance->debug_flags;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ fprintf(f, "%s, ", radv_get_debug_option_name(i));
+ }
+ fprintf(f, "\n");
+
+ fprintf(f, "Enabled perftest options: ");
+
+ mask = device->instance->perftest_flags;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ fprintf(f, "%s, ", radv_get_perftest_option_name(i));
+ }
+ fprintf(f, "\n");
+}
+
+static void
+radv_dump_device_name(struct radv_device *device, FILE *f)
+{
+ struct radeon_info *info = &device->physical_device->rad_info;
+ char llvm_string[32] = {}, kernel_version[128] = {};
+ struct utsname uname_data;
+ const char *chip_name;
+
+ chip_name = device->ws->get_chip_name(device->ws);
+
+ if (uname(&uname_data) == 0)
+ snprintf(kernel_version, sizeof(kernel_version),
+ " / %s", uname_data.release);
+
+ if (HAVE_LLVM > 0) {
+ snprintf(llvm_string, sizeof(llvm_string),
+ ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+ }
+
+ fprintf(f, "Device name: %s (%s DRM %i.%i.%i%s%s)\n\n",
+ chip_name, device->physical_device->name,
+ info->drm_major, info->drm_minor, info->drm_patchlevel,
+ kernel_version, llvm_string);
+}
+
+static bool
+radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
+{
+ struct radeon_winsys *ws = queue->device->ws;
+
+ if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
+ return true;
+
+ return false;
+}
+
+void
+radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_winsys_cs *cs)
+{
+ struct radv_pipeline *graphics_pipeline, *compute_pipeline;
+ struct radv_device *device = queue->device;
+ enum ring_type ring;
+ uint64_t addr;
+
+ ring = radv_queue_family_to_ring(queue->queue_family_index);
+
+ bool hang_occurred = radv_gpu_hang_occured(queue, ring);
+ bool vm_fault_occurred = false;
+ if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
+ vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
+ &device->dmesg_timestamp, &addr);
+ if (!hang_occurred && !vm_fault_occurred)
+ return;
+
+ graphics_pipeline = radv_get_saved_graphics_pipeline(device);
+ compute_pipeline = radv_get_saved_compute_pipeline(device);
+
+ fprintf(stderr, "GPU hang report:\n\n");
+ radv_dump_device_name(device, stderr);
+
+ radv_dump_enabled_options(device, stderr);
+ radv_dump_dmesg(stderr);
+
+ if (vm_fault_occurred) {
+ fprintf(stderr, "VM fault report.\n\n");
+ fprintf(stderr, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
+ }
+
+ radv_dump_debug_registers(device, stderr);
+
+ switch (ring) {
+ case RING_GFX:
+ radv_dump_graphics_state(graphics_pipeline, compute_pipeline,
+ stderr);
+ break;
+ case RING_COMPUTE:
+ radv_dump_compute_state(compute_pipeline, stderr);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ radv_dump_trace(queue->device, cs);
+ abort();
+}
+
+void
+radv_print_spirv(uint32_t *data, uint32_t size, FILE *fp)
+{
+ char path[] = "/tmp/fileXXXXXX";
+ char line[2048], command[128];
+ FILE *p;
+ int fd;
+
+ /* Dump the binary into a temporary file. */
+ fd = mkstemp(path);
+ if (fd < 0)
+ return;
+
+ if (write(fd, data, size) == -1)
+ goto fail;
+
+ sprintf(command, "spirv-dis %s", path);
+
+ /* Disassemble using spirv-dis if installed. */
+ p = popen(command, "r");
+ if (p) {
+ while (fgets(line, sizeof(line), p))
+ fprintf(fp, "%s", line);
+ pclose(p);
+ }
+
+fail:
+ close(fd);
+ unlink(path);
+}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_debug.h mesa-17.3.3/src/amd/vulkan/radv_debug.h
--- mesa-17.2.4/src/amd/vulkan/radv_debug.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_debug.h 2018-01-18 21:30:28.000000000 +0000
@@ -24,6 +24,8 @@
#ifndef RADV_DEBUG_H
#define RADV_DEBUG_H
+#include "radv_private.h"
+
enum {
RADV_DEBUG_NO_FAST_CLEARS = 0x1,
RADV_DEBUG_NO_DCC = 0x2,
@@ -35,10 +37,24 @@
RADV_DEBUG_UNSAFE_MATH = 0x80,
RADV_DEBUG_ALL_BOS = 0x100,
RADV_DEBUG_NO_IBS = 0x200,
+ RADV_DEBUG_DUMP_SPIRV = 0x400,
+ RADV_DEBUG_VM_FAULTS = 0x800,
+ RADV_DEBUG_ZERO_VRAM = 0x1000,
+ RADV_DEBUG_SYNC_SHADERS = 0x2000,
};
enum {
- RADV_PERFTEST_BATCHCHAIN = 0x1,
+ RADV_PERFTEST_NO_BATCHCHAIN = 0x1,
RADV_PERFTEST_SISCHED = 0x2,
};
+
+bool
+radv_init_trace(struct radv_device *device);
+
+void
+radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_winsys_cs *cs);
+
+void
+radv_print_spirv(uint32_t *data, uint32_t size, FILE *fp);
+
#endif
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_descriptor_set.c mesa-17.3.3/src/amd/vulkan/radv_descriptor_set.c
--- mesa-17.2.4/src/amd/vulkan/radv_descriptor_set.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_descriptor_set.c 2018-01-18 21:30:28.000000000 +0000
@@ -67,6 +67,7 @@
set_layout->binding_count = max_binding + 1;
set_layout->shader_stages = 0;
set_layout->dynamic_shader_stages = 0;
+ set_layout->has_immutable_samplers = false;
set_layout->size = 0;
memset(set_layout->binding, 0, size - sizeof(struct radv_descriptor_set_layout));
@@ -132,6 +133,7 @@
if (binding->pImmutableSamplers) {
set_layout->binding[b].immutable_samplers_offset = samplers_offset;
set_layout->binding[b].immutable_samplers_equal = true;
+ set_layout->has_immutable_samplers = true;
for (uint32_t i = 0; i < binding->descriptorCount; i++)
@@ -299,7 +301,7 @@
if (pool->current_offset + layout_size <= pool->size) {
set->bo = pool->bo;
set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
- set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset;
+ set->va = radv_buffer_get_va(set->bo) + pool->current_offset;
pool->current_offset += layout_size;
list_addtail(&set->vram_list, &pool->vram_list);
} else if (!pool->host_memory_base) {
@@ -323,27 +325,29 @@
}
set->bo = pool->bo;
set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
- set->va = device->ws->buffer_get_va(set->bo) + offset;
+ set->va = radv_buffer_get_va(set->bo) + offset;
list_add(&set->vram_list, prev);
} else
return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
}
- for (unsigned i = 0; i < layout->binding_count; ++i) {
- if (!layout->binding[i].immutable_samplers_offset ||
- layout->binding[i].immutable_samplers_equal)
- continue;
-
- unsigned offset = layout->binding[i].offset / 4;
- if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
- offset += 16;
-
- const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset);
- for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
- memcpy(set->mapped_ptr + offset, samplers + 4 * j, 16);
- offset += layout->binding[i].size / 4;
- }
+ if (layout->has_immutable_samplers) {
+ for (unsigned i = 0; i < layout->binding_count; ++i) {
+ if (!layout->binding[i].immutable_samplers_offset ||
+ layout->binding[i].immutable_samplers_equal)
+ continue;
+
+ unsigned offset = layout->binding[i].offset / 4;
+ if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ offset += 16;
+
+ const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset);
+ for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
+ memcpy(set->mapped_ptr + offset, samplers + 4 * j, 16);
+ offset += layout->binding[i].size / 4;
+ }
+ }
}
*out_set = set;
return VK_SUCCESS;
@@ -556,7 +560,7 @@
const VkDescriptorBufferInfo *buffer_info)
{
RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
- uint64_t va = device->ws->buffer_get_va(buffer->bo);
+ uint64_t va = radv_buffer_get_va(buffer->bo);
uint32_t range = buffer_info->range;
if (buffer_info->range == VK_WHOLE_SIZE)
@@ -585,7 +589,7 @@
const VkDescriptorBufferInfo *buffer_info)
{
RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
- uint64_t va = device->ws->buffer_get_va(buffer->bo);
+ uint64_t va = radv_buffer_get_va(buffer->bo);
unsigned size = buffer_info->range;
if (buffer_info->range == VK_WHOLE_SIZE)
@@ -735,8 +739,59 @@
}
}
- if (descriptorCopyCount)
- radv_finishme("copy descriptors");
+
+ for (i = 0; i < descriptorCopyCount; i++) {
+ const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
+ RADV_FROM_HANDLE(radv_descriptor_set, src_set,
+ copyset->srcSet);
+ RADV_FROM_HANDLE(radv_descriptor_set, dst_set,
+ copyset->dstSet);
+ const struct radv_descriptor_set_binding_layout *src_binding_layout =
+ src_set->layout->binding + copyset->srcBinding;
+ const struct radv_descriptor_set_binding_layout *dst_binding_layout =
+ dst_set->layout->binding + copyset->dstBinding;
+ uint32_t *src_ptr = src_set->mapped_ptr;
+ uint32_t *dst_ptr = dst_set->mapped_ptr;
+ struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
+ struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
+
+ src_ptr += src_binding_layout->offset / 4;
+ dst_ptr += dst_binding_layout->offset / 4;
+
+ src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
+ dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
+
+ src_buffer_list += src_binding_layout->buffer_offset;
+ src_buffer_list += copyset->srcArrayElement;
+
+ dst_buffer_list += dst_binding_layout->buffer_offset;
+ dst_buffer_list += copyset->dstArrayElement;
+
+ for (j = 0; j < copyset->descriptorCount; ++j) {
+ switch (src_binding_layout->type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ unsigned src_idx = copyset->srcArrayElement + j;
+ unsigned dst_idx = copyset->dstArrayElement + j;
+ struct radv_descriptor_range *src_range, *dst_range;
+ src_idx += src_binding_layout->dynamic_offset_offset;
+ dst_idx += dst_binding_layout->dynamic_offset_offset;
+
+ src_range = src_set->dynamic_descriptors + src_idx;
+ dst_range = dst_set->dynamic_descriptors + dst_idx;
+ *dst_range = *src_range;
+ break;
+ }
+ default:
+ memcpy(dst_ptr, src_ptr, src_binding_layout->size);
+ }
+ src_ptr += src_binding_layout->size / 4;
+ dst_ptr += dst_binding_layout->size / 4;
+ dst_buffer_list[j] = src_buffer_list[j];
+ ++src_buffer_list;
+ ++dst_buffer_list;
+ }
+ }
}
void radv_UpdateDescriptorSets(
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_descriptor_set.h mesa-17.3.3/src/amd/vulkan/radv_descriptor_set.h
--- mesa-17.2.4/src/amd/vulkan/radv_descriptor_set.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_descriptor_set.h 2018-01-18 21:30:28.000000000 +0000
@@ -68,6 +68,8 @@
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
+ bool has_immutable_samplers;
+
/* Bindings in this descriptor set */
struct radv_descriptor_set_binding_layout binding[0];
};
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_device.c mesa-17.3.3/src/amd/vulkan/radv_device.c
--- mesa-17.2.4/src/amd/vulkan/radv_device.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_device.c 2018-01-18 21:30:28.000000000 +0000
@@ -29,7 +29,9 @@
#include
#include
#include
+#include "radv_debug.h"
#include "radv_private.h"
+#include "radv_shader.h"
#include "radv_cs.h"
#include "util/disk_cache.h"
#include "util/strtod.h"
@@ -63,204 +65,123 @@
}
static void
-radv_get_device_uuid(drmDevicePtr device, void *uuid) {
- memset(uuid, 0, VK_UUID_SIZE);
- memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
- memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
- memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
- memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
-}
-
-static const VkExtensionProperties instance_extensions[] = {
- {
- .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
- .specVersion = 25,
- },
-#ifdef VK_USE_PLATFORM_XCB_KHR
- {
- .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
- .specVersion = 6,
- },
-#endif
-#ifdef VK_USE_PLATFORM_XLIB_KHR
- {
- .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
- .specVersion = 6,
- },
-#endif
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- {
- .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
- .specVersion = 6,
- },
-#endif
- {
- .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
- .specVersion = 1,
- },
-};
-
-static const VkExtensionProperties common_device_extensions[] = {
- {
- .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
- .specVersion = 68,
- },
- {
- .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
- .specVersion = 1,
- },
-};
-static const VkExtensionProperties ext_sema_device_extensions[] = {
- {
- .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
- .specVersion = 1,
- },
- {
- .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
- .specVersion = 1,
- },
-};
-
-static VkResult
-radv_extensions_register(struct radv_instance *instance,
- struct radv_extensions *extensions,
- const VkExtensionProperties *new_ext,
- uint32_t num_ext)
+radv_get_driver_uuid(void *uuid)
{
- size_t new_size;
- VkExtensionProperties *new_ptr;
-
- assert(new_ext && num_ext > 0);
-
- if (!new_ext)
- return VK_ERROR_INITIALIZATION_FAILED;
-
- new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
- new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
- new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
-
- /* Old array continues to be valid, update nothing */
- if (!new_ptr)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- memcpy(&new_ptr[extensions->num_ext], new_ext,
- num_ext * sizeof(VkExtensionProperties));
- extensions->ext_array = new_ptr;
- extensions->num_ext += num_ext;
-
- return VK_SUCCESS;
+ ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
}
static void
-radv_extensions_finish(struct radv_instance *instance,
- struct radv_extensions *extensions)
+radv_get_device_uuid(struct radeon_info *info, void *uuid)
{
- assert(extensions);
-
- if (!extensions)
- radv_loge("Attemted to free invalid extension struct\n");
-
- if (extensions->ext_array)
- vk_free(&instance->alloc, extensions->ext_array);
+ ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
}
-static bool
-is_extension_enabled(const VkExtensionProperties *extensions,
- size_t num_ext,
- const char *name)
+static void
+radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
{
- assert(extensions && name);
+ const char *chip_string;
+ char llvm_string[32] = {};
- for (uint32_t i = 0; i < num_ext; i++) {
- if (strcmp(name, extensions[i].extensionName) == 0)
- return true;
+ switch (family) {
+ case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
+ case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
+ case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
+ case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
+ case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
+ case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
+ case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
+ case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
+ case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
+ case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
+ case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
+ case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
+ case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
+ case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
+ case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
+ case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
+ case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
+ case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
+ case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
+ case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
+ default: chip_string = "AMD RADV unknown"; break;
+ }
+
+ if (HAVE_LLVM > 0) {
+ snprintf(llvm_string, sizeof(llvm_string),
+ " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
}
- return false;
+ snprintf(name, name_len, "%s%s", chip_string, llvm_string);
}
-static const char *
-get_chip_name(enum radeon_family family)
+static void
+radv_physical_device_init_mem_types(struct radv_physical_device *device)
{
- switch (family) {
- case CHIP_TAHITI: return "AMD RADV TAHITI";
- case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
- case CHIP_VERDE: return "AMD RADV CAPE VERDE";
- case CHIP_OLAND: return "AMD RADV OLAND";
- case CHIP_HAINAN: return "AMD RADV HAINAN";
- case CHIP_BONAIRE: return "AMD RADV BONAIRE";
- case CHIP_KAVERI: return "AMD RADV KAVERI";
- case CHIP_KABINI: return "AMD RADV KABINI";
- case CHIP_HAWAII: return "AMD RADV HAWAII";
- case CHIP_MULLINS: return "AMD RADV MULLINS";
- case CHIP_TONGA: return "AMD RADV TONGA";
- case CHIP_ICELAND: return "AMD RADV ICELAND";
- case CHIP_CARRIZO: return "AMD RADV CARRIZO";
- case CHIP_FIJI: return "AMD RADV FIJI";
- case CHIP_POLARIS10: return "AMD RADV POLARIS10";
- case CHIP_POLARIS11: return "AMD RADV POLARIS11";
- case CHIP_POLARIS12: return "AMD RADV POLARIS12";
- case CHIP_STONEY: return "AMD RADV STONEY";
- case CHIP_VEGA10: return "AMD RADV VEGA";
- case CHIP_RAVEN: return "AMD RADV RAVEN";
- default: return "AMD RADV unknown";
+ STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
+ uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
+ device->rad_info.vram_vis_size);
+
+ int vram_index = -1, visible_vram_index = -1, gart_index = -1;
+ device->memory_properties.memoryHeapCount = 0;
+ if (device->rad_info.vram_size - visible_vram_size > 0) {
+ vram_index = device->memory_properties.memoryHeapCount++;
+ device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
+ .size = device->rad_info.vram_size - visible_vram_size,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ };
+ }
+ if (visible_vram_size) {
+ visible_vram_index = device->memory_properties.memoryHeapCount++;
+ device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
+ .size = visible_vram_size,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ };
+ }
+ if (device->rad_info.gart_size > 0) {
+ gart_index = device->memory_properties.memoryHeapCount++;
+ device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+ .size = device->rad_info.gart_size,
+ .flags = 0,
+ };
}
+
+ STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
+ unsigned type_count = 0;
+ if (vram_index >= 0) {
+ device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ .heapIndex = vram_index,
+ };
+ }
+ if (gart_index >= 0) {
+ device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ .heapIndex = gart_index,
+ };
+ }
+ if (visible_vram_index >= 0) {
+ device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ .heapIndex = visible_vram_index,
+ };
+ }
+ if (gart_index >= 0) {
+ device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ .heapIndex = gart_index,
+ };
+ }
+ device->memory_properties.memoryTypeCount = type_count;
}
static VkResult
@@ -311,7 +232,9 @@
goto fail;
}
- if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
+ radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
+
+ if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
radv_finish_wsi(device);
device->ws->destroy(device->ws);
result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
@@ -319,26 +242,22 @@
goto fail;
}
- result = radv_extensions_register(instance,
- &device->extensions,
- common_device_extensions,
- ARRAY_SIZE(common_device_extensions));
- if (result != VK_SUCCESS)
- goto fail;
+ /* These flags affect shader compilation. */
+ uint64_t shader_env_flags =
+ (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
+ (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
- if (device->rad_info.has_syncobj) {
- result = radv_extensions_register(instance,
- &device->extensions,
- ext_sema_device_extensions,
- ARRAY_SIZE(ext_sema_device_extensions));
- if (result != VK_SUCCESS)
- goto fail;
- }
+ /* The gpu id is already embeded in the uuid so we just pass "radv"
+ * when creating the cache.
+ */
+ char buf[VK_UUID_SIZE * 2 + 1];
+ disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
+ device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
- device->name = get_chip_name(device->rad_info.family);
- radv_get_device_uuid(drm_device, device->device_uuid);
+ radv_get_driver_uuid(&device->device_uuid);
+ radv_get_device_uuid(&device->rad_info, &device->device_uuid);
if (device->rad_info.family == CHIP_STONEY ||
device->rad_info.chip_class >= GFX9) {
@@ -346,6 +265,12 @@
device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
}
+ /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
+ * on SI.
+ */
+ device->has_clear_state = device->rad_info.chip_class >= CIK;
+
+ radv_physical_device_init_mem_types(device);
return VK_SUCCESS;
fail:
@@ -356,9 +281,9 @@
static void
radv_physical_device_finish(struct radv_physical_device *device)
{
- radv_extensions_finish(device->instance, &device->extensions);
radv_finish_wsi(device);
device->ws->destroy(device->ws);
+ disk_cache_destroy(device->disk_cache);
close(device->local_fd);
}
@@ -400,15 +325,33 @@
{"unsafemath", RADV_DEBUG_UNSAFE_MATH},
{"allbos", RADV_DEBUG_ALL_BOS},
{"noibs", RADV_DEBUG_NO_IBS},
+ {"spirv", RADV_DEBUG_DUMP_SPIRV},
+ {"vmfaults", RADV_DEBUG_VM_FAULTS},
+ {"zerovram", RADV_DEBUG_ZERO_VRAM},
+ {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
{NULL, 0}
};
+const char *
+radv_get_debug_option_name(int id)
+{
+ assert(id < ARRAY_SIZE(radv_debug_options) - 1);
+ return radv_debug_options[id].string;
+}
+
static const struct debug_control radv_perftest_options[] = {
- {"batchchain", RADV_PERFTEST_BATCHCHAIN},
+ {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
{"sisched", RADV_PERFTEST_SISCHED},
{NULL, 0}
};
+const char *
+radv_get_perftest_option_name(int id)
+{
+ assert(id < ARRAY_SIZE(radv_debug_options) - 1);
+ return radv_perftest_options[id].string;
+}
+
VkResult radv_CreateInstance(
const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
@@ -436,9 +379,8 @@
}
for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
- if (!is_extension_enabled(instance_extensions,
- ARRAY_SIZE(instance_extensions),
- pCreateInfo->ppEnabledExtensionNames[i]))
+ const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
+ if (!radv_instance_extension_supported(ext_name))
return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
}
@@ -511,7 +453,7 @@
for (unsigned i = 0; i < (unsigned)max_devices; i++) {
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
devices[i]->bustype == DRM_BUS_PCI &&
- devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
+ devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
result = radv_physical_device_init(instance->physicalDevices +
instance->physicalDeviceCount,
@@ -559,8 +501,6 @@
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures* pFeatures)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
memset(pFeatures, 0, sizeof(*pFeatures));
*pFeatures = (VkPhysicalDeviceFeatures) {
@@ -568,8 +508,8 @@
.fullDrawIndexUint32 = true,
.imageCubeArray = true,
.independentBlend = true,
- .geometryShader = !is_gfx9,
- .tessellationShader = !is_gfx9,
+ .geometryShader = true,
+ .tessellationShader = true,
.sampleRateShading = true,
.dualSrcBlend = true,
.logicOp = true,
@@ -624,6 +564,13 @@
features->variablePointers = false;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
+ VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
+ features->multiview = true;
+ features->multiviewGeometryShader = true;
+ features->multiviewTessellationShader = true;
+ break;
+ }
default:
break;
}
@@ -765,9 +712,9 @@
};
*pProperties = (VkPhysicalDeviceProperties) {
- .apiVersion = VK_MAKE_VERSION(1, 0, 42),
+ .apiVersion = radv_physical_device_api_version(pdevice),
.driverVersion = vk_get_driver_version(),
- .vendorID = 0x1002,
+ .vendorID = ATI_VENDOR_ID,
.deviceID = pdevice->rad_info.pci_id,
.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
.limits = limits,
@@ -775,7 +722,7 @@
};
strcpy(pProperties->deviceName, pdevice->name);
- memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
+ memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
}
void radv_GetPhysicalDeviceProperties2KHR(
@@ -795,11 +742,23 @@
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
- radv_device_get_cache_uuid(0, properties->driverUUID);
+ memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
properties->deviceLUIDValid = false;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
+ VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
+ properties->maxMultiviewViewCount = MAX_VIEWS;
+ properties->maxMultiviewInstanceIndex = INT_MAX;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
+ VkPhysicalDevicePointClippingPropertiesKHR *properties =
+ (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
+ properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
+ break;
+ }
default:
break;
}
@@ -902,49 +861,7 @@
{
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
- STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
-
- pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
- pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- .heapIndex = RADV_MEM_HEAP_VRAM,
- };
- pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
- .heapIndex = RADV_MEM_HEAP_GTT,
- };
- pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
- .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
- };
- pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
- .heapIndex = RADV_MEM_HEAP_GTT,
- };
-
- STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
- uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
- physical_device->rad_info.vram_vis_size);
-
- pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
- pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
- .size = physical_device->rad_info.vram_size -
- visible_vram_size,
- .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
- };
- pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
- .size = visible_vram_size,
- .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
- };
- pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
- .size = physical_device->rad_info.gart_size,
- .flags = 0,
- };
+ *pMemoryProperties = physical_device->memory_properties;
}
void radv_GetPhysicalDeviceMemoryProperties2KHR(
@@ -955,16 +872,40 @@
&pMemoryProperties->memoryProperties);
}
+static enum radeon_ctx_priority
+radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
+{
+ /* Default to MEDIUM when a specific global priority isn't requested */
+ if (!pObj)
+ return RADEON_CTX_PRIORITY_MEDIUM;
+
+ switch(pObj->globalPriority) {
+ case VK_QUEUE_GLOBAL_PRIORITY_REALTIME:
+ return RADEON_CTX_PRIORITY_REALTIME;
+ case VK_QUEUE_GLOBAL_PRIORITY_HIGH:
+ return RADEON_CTX_PRIORITY_HIGH;
+ case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM:
+ return RADEON_CTX_PRIORITY_MEDIUM;
+ case VK_QUEUE_GLOBAL_PRIORITY_LOW:
+ return RADEON_CTX_PRIORITY_LOW;
+ default:
+ unreachable("Illegal global priority value");
+ return RADEON_CTX_PRIORITY_INVALID;
+ }
+}
+
static int
radv_queue_init(struct radv_device *device, struct radv_queue *queue,
- int queue_family_index, int idx)
+ int queue_family_index, int idx,
+ const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
{
queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
queue->device = device;
queue->queue_family_index = queue_family_index;
queue->queue_idx = idx;
+ queue->priority = radv_get_queue_global_priority(global_priority);
- queue->hw_ctx = device->ws->ctx_create(device->ws);
+ queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
if (!queue->hw_ctx)
return VK_ERROR_OUT_OF_HOST_MEMORY;
@@ -977,6 +918,8 @@
if (queue->hw_ctx)
queue->device->ws->ctx_destroy(queue->hw_ctx);
+ if (queue->initial_full_flush_preamble_cs)
+ queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
if (queue->initial_preamble_cs)
queue->device->ws->cs_destroy(queue->initial_preamble_cs);
if (queue->continue_preamble_cs)
@@ -1041,9 +984,8 @@
struct radv_device *device;
for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
- if (!is_extension_enabled(physical_device->extensions.ext_array,
- physical_device->extensions.num_ext,
- pCreateInfo->ppEnabledExtensionNames[i]))
+ const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
+ if (!radv_physical_device_extension_supported(physical_device, ext_name))
return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
}
@@ -1072,17 +1014,22 @@
device->instance = physical_device->instance;
device->physical_device = physical_device;
- device->debug_flags = device->instance->debug_flags;
-
device->ws = physical_device->ws;
if (pAllocator)
device->alloc = *pAllocator;
else
device->alloc = physical_device->instance->alloc;
+ mtx_init(&device->shader_slab_mutex, mtx_plain);
+ list_inithead(&device->shader_slabs);
+
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
uint32_t qfi = queue_create->queueFamilyIndex;
+ const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
+ vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
+
+ assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
device->queues[qfi] = vk_alloc(&device->alloc,
queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
@@ -1096,7 +1043,7 @@
device->queue_count[qfi] = queue_create->queueCount;
for (unsigned q = 0; q < queue_create->queueCount; q++) {
- result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
+ result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
if (result != VK_SUCCESS)
goto fail;
}
@@ -1132,6 +1079,11 @@
device->physical_device->rad_info.chip_class >= VI &&
device->physical_device->rad_info.max_se >= 2;
+ if (getenv("RADV_TRACE_FILE")) {
+ if (!radv_init_trace(device))
+ goto fail;
+ }
+
result = radv_device_init_meta(device);
if (result != VK_SUCCESS)
goto fail;
@@ -1152,52 +1104,6 @@
break;
}
device->ws->cs_finalize(device->empty_cs[family]);
-
- device->flush_cs[family] = device->ws->cs_create(device->ws, family);
- switch (family) {
- case RADV_QUEUE_GENERAL:
- case RADV_QUEUE_COMPUTE:
- si_cs_emit_cache_flush(device->flush_cs[family],
- false,
- device->physical_device->rad_info.chip_class,
- NULL, 0,
- family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
- RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2);
- break;
- }
- device->ws->cs_finalize(device->flush_cs[family]);
-
- device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
- switch (family) {
- case RADV_QUEUE_GENERAL:
- case RADV_QUEUE_COMPUTE:
- si_cs_emit_cache_flush(device->flush_shader_cs[family],
- false,
- device->physical_device->rad_info.chip_class,
- NULL, 0,
- family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
- family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
- RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2);
- break;
- }
- device->ws->cs_finalize(device->flush_shader_cs[family]);
- }
-
- if (getenv("RADV_TRACE_FILE")) {
- device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
- RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
- if (!device->trace_bo)
- goto fail;
-
- device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
- if (!device->trace_id_ptr)
- goto fail;
}
if (device->physical_device->rad_info.chip_class >= CIK)
@@ -1260,58 +1166,15 @@
vk_free(&device->alloc, device->queues[i]);
if (device->empty_cs[i])
device->ws->cs_destroy(device->empty_cs[i]);
- if (device->flush_cs[i])
- device->ws->cs_destroy(device->flush_cs[i]);
- if (device->flush_shader_cs[i])
- device->ws->cs_destroy(device->flush_shader_cs[i]);
}
radv_device_finish_meta(device);
VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
- vk_free(&device->alloc, device);
-}
+ radv_destroy_shader_slabs(device);
-VkResult radv_EnumerateInstanceExtensionProperties(
- const char* pLayerName,
- uint32_t* pPropertyCount,
- VkExtensionProperties* pProperties)
-{
- if (pProperties == NULL) {
- *pPropertyCount = ARRAY_SIZE(instance_extensions);
- return VK_SUCCESS;
- }
-
- *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
- typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
-
- if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
- return VK_INCOMPLETE;
-
- return VK_SUCCESS;
-}
-
-VkResult radv_EnumerateDeviceExtensionProperties(
- VkPhysicalDevice physicalDevice,
- const char* pLayerName,
- uint32_t* pPropertyCount,
- VkExtensionProperties* pProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-
- if (pProperties == NULL) {
- *pPropertyCount = pdevice->extensions.num_ext;
- return VK_SUCCESS;
- }
-
- *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
- typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
-
- if (*pPropertyCount < pdevice->extensions.num_ext)
- return VK_INCOMPLETE;
-
- return VK_SUCCESS;
+ vk_free(&device->alloc, device);
}
VkResult radv_EnumerateInstanceLayerProperties(
@@ -1352,21 +1215,6 @@
*pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
}
-static void radv_dump_trace(struct radv_device *device,
- struct radeon_winsys_cs *cs)
-{
- const char *filename = getenv("RADV_TRACE_FILE");
- FILE *f = fopen(filename, "w");
- if (!f) {
- fprintf(stderr, "Failed to write trace dump to %s\n", filename);
- return;
- }
-
- fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
- device->ws->cs_dump(cs, f, *device->trace_id_ptr);
- fclose(f);
-}
-
static void
fill_geom_tess_rings(struct radv_queue *queue,
uint32_t *map,
@@ -1385,13 +1233,13 @@
uint32_t *desc = &map[4];
if (esgs_ring_bo)
- esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
+ esgs_va = radv_buffer_get_va(esgs_ring_bo);
if (gsvs_ring_bo)
- gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
+ gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
if (tess_factor_ring_bo)
- tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
+ tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
if (tess_offchip_ring_bo)
- tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
+ tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
/* stride 0, num records - size, add tid, swizzle, elsize4,
index stride 64 */
@@ -1569,6 +1417,7 @@
uint32_t gsvs_ring_size,
bool needs_tess_rings,
bool needs_sample_positions,
+ struct radeon_winsys_cs **initial_full_flush_preamble_cs,
struct radeon_winsys_cs **initial_preamble_cs,
struct radeon_winsys_cs **continue_preamble_cs)
{
@@ -1579,7 +1428,7 @@
struct radeon_winsys_bo *gsvs_ring_bo = NULL;
struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
- struct radeon_winsys_cs *dest_cs[2] = {0};
+ struct radeon_winsys_cs *dest_cs[3] = {0};
bool add_tess_rings = false, add_sample_positions = false;
unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
unsigned max_offchip_buffers;
@@ -1604,6 +1453,7 @@
gsvs_ring_size <= queue->gsvs_ring_size &&
!add_tess_rings && !add_sample_positions &&
queue->initial_preamble_cs) {
+ *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
*initial_preamble_cs = queue->initial_preamble_cs;
*continue_preamble_cs = queue->continue_preamble_cs;
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
@@ -1705,7 +1555,7 @@
} else
descriptor_bo = queue->descriptor_bo;
- for(int i = 0; i < 2; ++i) {
+ for(int i = 0; i < 3; ++i) {
struct radeon_winsys_cs *cs = NULL;
cs = queue->device->ws->cs_create(queue->device->ws,
queue->queue_family_index ? RING_COMPUTE : RING_GFX);
@@ -1736,7 +1586,7 @@
uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
if (scratch_bo) {
- uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
+ uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
S_008F04_SWIZZLE_ENABLE(1);
map[0] = scratch_va;
@@ -1774,7 +1624,7 @@
}
if (tess_factor_ring_bo) {
- uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
+ uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
if (queue->device->physical_device->rad_info.chip_class >= CIK) {
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
S_030938_SIZE(tess_factor_ring_size / 4));
@@ -1796,24 +1646,36 @@
}
if (descriptor_bo) {
- uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
- R_00B130_SPI_SHADER_USER_DATA_VS_0,
- R_00B230_SPI_SHADER_USER_DATA_GS_0,
- R_00B330_SPI_SHADER_USER_DATA_ES_0,
- R_00B430_SPI_SHADER_USER_DATA_HS_0,
- R_00B530_SPI_SHADER_USER_DATA_LS_0};
-
- uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
-
- for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
- radeon_set_sh_reg_seq(cs, regs[i], 2);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ uint64_t va = radv_buffer_get_va(descriptor_bo);
+ if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+ uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0,
+ R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+ R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
+
+ for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+ radeon_set_sh_reg_seq(cs, regs[i], 2);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ }
+ } else {
+ uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0,
+ R_00B530_SPI_SHADER_USER_DATA_LS_0};
+
+ for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+ radeon_set_sh_reg_seq(cs, regs[i], 2);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ }
}
}
if (compute_scratch_bo) {
- uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
+ uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
S_008F04_SWIZZLE_ENABLE(1);
@@ -1824,7 +1686,19 @@
radeon_emit(cs, rsrc1);
}
- if (!i) {
+ if (i == 0) {
+ si_cs_emit_cache_flush(cs,
+ false,
+ queue->device->physical_device->rad_info.chip_class,
+ NULL, 0,
+ queue->queue_family_index == RING_COMPUTE &&
+ queue->device->physical_device->rad_info.chip_class >= CIK,
+ (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
+ RADV_CMD_FLAG_INV_ICACHE |
+ RADV_CMD_FLAG_INV_SMEM_L1 |
+ RADV_CMD_FLAG_INV_VMEM_L1 |
+ RADV_CMD_FLAG_INV_GLOBAL_L2);
+ } else if (i == 1) {
si_cs_emit_cache_flush(cs,
false,
queue->device->physical_device->rad_info.chip_class,
@@ -1841,14 +1715,18 @@
goto fail;
}
+ if (queue->initial_full_flush_preamble_cs)
+ queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
+
if (queue->initial_preamble_cs)
queue->device->ws->cs_destroy(queue->initial_preamble_cs);
if (queue->continue_preamble_cs)
queue->device->ws->cs_destroy(queue->continue_preamble_cs);
- queue->initial_preamble_cs = dest_cs[0];
- queue->continue_preamble_cs = dest_cs[1];
+ queue->initial_full_flush_preamble_cs = dest_cs[0];
+ queue->initial_preamble_cs = dest_cs[1];
+ queue->continue_preamble_cs = dest_cs[2];
if (scratch_bo != queue->scratch_bo) {
if (queue->scratch_bo)
@@ -1897,6 +1775,7 @@
if (add_sample_positions)
queue->has_sample_positions = true;
+ *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
*initial_preamble_cs = queue->initial_preamble_cs;
*continue_preamble_cs = queue->continue_preamble_cs;
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
@@ -1960,10 +1839,6 @@
if (sem->temp_syncobj) {
counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
- if (reset_temp) {
- /* after we wait on a temp import - drop it */
- sem->temp_syncobj = 0;
- }
}
else if (sem->syncobj)
counts->syncobj[syncobj_idx++] = sem->syncobj;
@@ -1984,6 +1859,21 @@
free(sem_info->signal.sem);
}
+
+static void radv_free_temp_syncobjs(struct radv_device *device,
+ int num_sems,
+ const VkSemaphore *sems)
+{
+ for (uint32_t i = 0; i < num_sems; i++) {
+ RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
+
+ if (sem->temp_syncobj) {
+ device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
+ sem->temp_syncobj = 0;
+ }
+ }
+}
+
VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
int num_wait_sems,
const VkSemaphore *wait_sems,
@@ -2021,7 +1911,7 @@
uint32_t scratch_size = 0;
uint32_t compute_scratch_size = 0;
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
- struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
+ struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
VkResult result;
bool fence_emitted = false;
bool tess_rings_needed = false;
@@ -2046,7 +1936,7 @@
result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
esgs_ring_size, gsvs_ring_size, tess_rings_needed,
- sample_positions_needed,
+ sample_positions_needed, &initial_flush_preamble_cs,
&initial_preamble_cs, &continue_preamble_cs);
if (result != VK_SUCCESS)
return result;
@@ -2054,7 +1944,7 @@
for (uint32_t i = 0; i < submitCount; i++) {
struct radeon_winsys_cs **cs_array;
bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
- bool can_patch = !do_flush;
+ bool can_patch = true;
uint32_t advance;
struct radv_winsys_sem_info sem_info;
@@ -2084,35 +1974,31 @@
}
cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
- (pSubmits[i].commandBufferCount + do_flush));
-
- if(do_flush)
- cs_array[0] = pSubmits[i].waitSemaphoreCount ?
- queue->device->flush_shader_cs[queue->queue_family_index] :
- queue->device->flush_cs[queue->queue_family_index];
+ (pSubmits[i].commandBufferCount));
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
pSubmits[i].pCommandBuffers[j]);
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
- cs_array[j + do_flush] = cmd_buffer->cs;
+ cs_array[j] = cmd_buffer->cs;
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
can_patch = false;
}
- for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
+ for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
+ struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
advance = MIN2(max_cs_submission,
- pSubmits[i].commandBufferCount + do_flush - j);
+ pSubmits[i].commandBufferCount - j);
if (queue->device->trace_bo)
*queue->device->trace_id_ptr = 0;
sem_info.cs_emit_wait = j == 0;
- sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount + do_flush;
+ sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
- advance, initial_preamble_cs, continue_preamble_cs,
+ advance, initial_preamble, continue_preamble_cs,
&sem_info,
can_patch, base_fence);
@@ -2122,19 +2008,13 @@
}
fence_emitted = true;
if (queue->device->trace_bo) {
- bool success = queue->device->ws->ctx_wait_idle(
- queue->hw_ctx,
- radv_queue_family_to_ring(
- queue->queue_family_index),
- queue->queue_idx);
-
- if (!success) { /* Hang */
- radv_dump_trace(queue->device, cs_array[j]);
- abort();
- }
+ radv_check_gpu_hangs(queue, cs_array[j]);
}
}
+ radv_free_temp_syncobjs(queue->device,
+ pSubmits[i].waitSemaphoreCount,
+ pSubmits[i].pWaitSemaphores);
radv_free_sem_info(&sem_info);
free(cs_array);
}
@@ -2222,17 +2102,18 @@
pFD);
}
-VkResult radv_AllocateMemory(
- VkDevice _device,
- const VkMemoryAllocateInfo* pAllocateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkDeviceMemory* pMem)
+VkResult radv_alloc_memory(VkDevice _device,
+ const VkMemoryAllocateInfo* pAllocateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ enum radv_mem_flags_bits mem_flags,
+ VkDeviceMemory* pMem)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_device_memory *mem;
VkResult result;
enum radeon_bo_domain domain;
uint32_t flags = 0;
+ enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
@@ -2275,20 +2156,23 @@
}
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
- if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
- pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
+ if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
+ mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
domain = RADEON_DOMAIN_GTT;
else
domain = RADEON_DOMAIN_VRAM;
- if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
+ if (mem_type_index == RADV_MEM_TYPE_VRAM)
flags |= RADEON_FLAG_NO_CPU_ACCESS;
else
flags |= RADEON_FLAG_CPU_ACCESS;
- if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
+ if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
flags |= RADEON_FLAG_GTT_WC;
+ if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
+ flags |= RADEON_FLAG_IMPLICIT_SYNC;
+
mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
domain, flags);
@@ -2296,7 +2180,7 @@
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
goto fail;
}
- mem->type_index = pAllocateInfo->memoryTypeIndex;
+ mem->type_index = mem_type_index;
out_success:
*pMem = radv_device_memory_to_handle(mem);
@@ -2308,6 +2192,15 @@
return result;
}
+VkResult radv_AllocateMemory(
+ VkDevice _device,
+ const VkMemoryAllocateInfo* pAllocateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkDeviceMemory* pMem)
+{
+ return radv_alloc_memory(_device, pAllocateInfo, pAllocator, 0, pMem);
+}
+
void radv_FreeMemory(
VkDevice _device,
VkDeviceMemory _mem,
@@ -2380,13 +2273,14 @@
}
void radv_GetBufferMemoryRequirements(
- VkDevice device,
+ VkDevice _device,
VkBuffer _buffer,
VkMemoryRequirements* pMemoryRequirements)
{
+ RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+ pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
pMemoryRequirements->alignment = 4096;
@@ -2420,13 +2314,14 @@
}
void radv_GetImageMemoryRequirements(
- VkDevice device,
+ VkDevice _device,
VkImage _image,
VkMemoryRequirements* pMemoryRequirements)
{
+ RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_image, image, _image);
- pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+ pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
pMemoryRequirements->size = image->size;
pMemoryRequirements->alignment = image->alignment;
@@ -2483,44 +2378,74 @@
*pCommittedMemoryInBytes = 0;
}
+VkResult radv_BindBufferMemory2KHR(VkDevice device,
+ uint32_t bindInfoCount,
+ const VkBindBufferMemoryInfoKHR *pBindInfos)
+{
+ for (uint32_t i = 0; i < bindInfoCount; ++i) {
+ RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
+ RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
+
+ if (mem) {
+ buffer->bo = mem->bo;
+ buffer->offset = pBindInfos[i].memoryOffset;
+ } else {
+ buffer->bo = NULL;
+ }
+ }
+ return VK_SUCCESS;
+}
+
VkResult radv_BindBufferMemory(
VkDevice device,
- VkBuffer _buffer,
- VkDeviceMemory _memory,
+ VkBuffer buffer,
+ VkDeviceMemory memory,
VkDeviceSize memoryOffset)
{
- RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ const VkBindBufferMemoryInfoKHR info = {
+ .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
+ .buffer = buffer,
+ .memory = memory,
+ .memoryOffset = memoryOffset
+ };
- if (mem) {
- buffer->bo = mem->bo;
- buffer->offset = memoryOffset;
- } else {
- buffer->bo = NULL;
- buffer->offset = 0;
- }
+ return radv_BindBufferMemory2KHR(device, 1, &info);
+}
+
+VkResult radv_BindImageMemory2KHR(VkDevice device,
+ uint32_t bindInfoCount,
+ const VkBindImageMemoryInfoKHR *pBindInfos)
+{
+ for (uint32_t i = 0; i < bindInfoCount; ++i) {
+ RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
+ RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
+ if (mem) {
+ image->bo = mem->bo;
+ image->offset = pBindInfos[i].memoryOffset;
+ } else {
+ image->bo = NULL;
+ image->offset = 0;
+ }
+ }
return VK_SUCCESS;
}
+
VkResult radv_BindImageMemory(
VkDevice device,
- VkImage _image,
- VkDeviceMemory _memory,
+ VkImage image,
+ VkDeviceMemory memory,
VkDeviceSize memoryOffset)
{
- RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
- RADV_FROM_HANDLE(radv_image, image, _image);
-
- if (mem) {
- image->bo = mem->bo;
- image->offset = memoryOffset;
- } else {
- image->bo = NULL;
- image->offset = 0;
- }
+ const VkBindImageMemoryInfoKHR info = {
+ .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
+ .image = image,
+ .memory = memory,
+ .memoryOffset = memoryOffset
+ };
- return VK_SUCCESS;
+ return radv_BindImageMemory2KHR(device, 1, &info);
}
@@ -2957,7 +2882,9 @@
/* Intensity is implemented as Red, so treat it that way. */
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
- va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+
+ cb->cb_color_base = va >> 8;
if (device->physical_device->rad_info.chip_class >= GFX9) {
struct gfx9_surf_meta_flags meta;
@@ -2971,12 +2898,15 @@
S_028C74_RB_ALIGNED(meta.rb_aligned) |
S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
- va += iview->image->surface.u.gfx9.surf_offset >> 8;
+ cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
+ cb->cb_color_base |= iview->image->surface.tile_swizzle;
} else {
const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
- va += level_info->offset;
+ cb->cb_color_base += level_info->offset >> 8;
+ if (level_info->mode == RADEON_SURF_MODE_2D)
+ cb->cb_color_base |= iview->image->surface.tile_swizzle;
pitch_tile_max = level_info->nblk_x / 8 - 1;
slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
@@ -3003,19 +2933,15 @@
}
}
- cb->cb_color_base = va >> 8;
- if (device->physical_device->rad_info.chip_class < GFX9)
- cb->cb_color_base |= iview->image->surface.u.legacy.tile_swizzle;
/* CMASK variables */
- va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->cmask.offset;
cb->cb_color_cmask = va >> 8;
- va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->dcc_offset;
cb->cb_dcc_base = va >> 8;
- if (device->physical_device->rad_info.chip_class < GFX9)
- cb->cb_dcc_base |= iview->image->surface.u.legacy.tile_swizzle;
+ cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
uint32_t max_slice = radv_surface_layer_count(iview);
cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@@ -3029,10 +2955,9 @@
}
if (iview->image->fmask.size) {
- va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
cb->cb_color_fmask = va >> 8;
- if (device->physical_device->rad_info.chip_class < GFX9)
- cb->cb_color_fmask |= iview->image->surface.u.legacy.tile_swizzle;
+ cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
} else {
cb->cb_color_fmask = cb->cb_color_base;
}
@@ -3088,10 +3013,10 @@
}
if (iview->image->cmask.size &&
- !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
+ !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
- if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
+ if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
if (device->physical_device->rad_info.chip_class >= VI) {
@@ -3165,7 +3090,7 @@
}
format = radv_translate_dbformat(iview->image->vk_format);
- stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
+ stencil_format = iview->image->surface.has_stencil ?
V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
uint32_t max_slice = radv_surface_layer_count(iview);
@@ -3175,7 +3100,7 @@
ds->db_htile_data_base = 0;
ds->db_htile_surface = 0;
- va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset;
s_offs = z_offs = va;
if (device->physical_device->rad_info.chip_class >= GFX9) {
@@ -3196,14 +3121,25 @@
ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
S_02801C_Y_MAX(iview->image->info.height - 1);
- /* Only use HTILE for the first level. */
- if (iview->image->surface.htile_size && !level) {
+ if (radv_htile_enabled(iview->image, level)) {
ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
- if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
+ if (iview->image->tc_compatible_htile) {
+ unsigned max_zplanes = 4;
+
+ if (iview->vk_format == VK_FORMAT_D16_UNORM &&
+ iview->image->info.samples > 1)
+ max_zplanes = 2;
+
+ ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
+ S_028038_ITERATE_FLUSH(1);
+ ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
+ }
+
+ if (!iview->image->surface.has_stencil)
/* Use all of the htile_buffer for depth if there's no stencil. */
ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
- va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset +
iview->image->htile_offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
@@ -3219,7 +3155,7 @@
z_offs += iview->image->surface.u.legacy.level[level].offset;
s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
- ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+ ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
ds->db_stencil_info = S_028044_FORMAT(stencil_format);
@@ -3260,17 +3196,29 @@
S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
- if (iview->image->surface.htile_size && !level) {
+ if (radv_htile_enabled(iview->image, level)) {
ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
- if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
+ if (!iview->image->surface.has_stencil &&
+ !iview->image->tc_compatible_htile)
/* Use all of the htile_buffer for depth if there's no stencil. */
ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
- va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset +
iview->image->htile_offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+ if (iview->image->tc_compatible_htile) {
+ ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+
+ if (iview->image->info.samples <= 1)
+ ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
+ else if (iview->image->info.samples <= 4)
+ ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
+ else
+ ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
+ }
}
}
@@ -3592,6 +3540,7 @@
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
uint32_t syncobj_handle = 0;
+ uint32_t *syncobj_dst = NULL;
assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
@@ -3599,10 +3548,15 @@
return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
- sem->temp_syncobj = syncobj_handle;
+ syncobj_dst = &sem->temp_syncobj;
} else {
- sem->syncobj = syncobj_handle;
+ syncobj_dst = &sem->syncobj;
}
+
+ if (*syncobj_dst)
+ device->ws->destroy_syncobj(device->ws, *syncobj_dst);
+
+ *syncobj_dst = syncobj_handle;
close(pImportSemaphoreFdInfo->fd);
return VK_SUCCESS;
}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_entrypoints.c mesa-17.3.3/src/amd/vulkan/radv_entrypoints.c
--- mesa-17.2.4/src/amd/vulkan/radv_entrypoints.c 2017-10-30 14:50:58.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_entrypoints.c 2018-01-18 21:31:11.000000000 +0000
@@ -206,6 +206,8 @@
"vkGetPhysicalDeviceExternalSemaphorePropertiesKHR\0"
"vkGetSemaphoreFdKHR\0"
"vkImportSemaphoreFdKHR\0"
+ "vkBindBufferMemory2KHR\0"
+ "vkBindImageMemory2KHR\0"
"vkCreateDescriptorUpdateTemplateKHR\0"
"vkDestroyDescriptorUpdateTemplateKHR\0"
"vkUpdateDescriptorSetWithTemplateKHR\0"
@@ -216,183 +218,185 @@
;
static const struct radv_entrypoint entrypoints[] = {
- { 0, 0x38a581a6 },
- { 17, 0x9bd21af2 },
- { 35, 0x5787c327 },
- { 62, 0xba013486 },
- { 82, 0x3d2ae9ad },
- { 104, 0x52fe22c9 },
- { 134, 0x4e5fc88a },
- { 175, 0xa90da4da },
- { 211, 0x113e2f33 },
- { 239, 0x3e54b398 },
- { 275, 0xdd36a867 },
- { 316, 0x85ed23f },
- { 331, 0x1fbcc9cb },
- { 347, 0x81f69d8 },
- { 382, 0xeb27627e },
- { 421, 0x2f8566e7 },
- { 454, 0x5fd13eed },
- { 491, 0xcc920d9a },
- { 508, 0xfa4713ec },
- { 522, 0x6f8fc2a5 },
- { 538, 0xd46c5f24 },
- { 555, 0x522b85d3 },
- { 572, 0x8f6f838a },
- { 585, 0xcb977bd8 },
- { 597, 0x1a1a0e2f },
- { 611, 0xff52f051 },
- { 637, 0x1e115cca },
- { 668, 0x46e38db5 },
- { 696, 0xab98422a },
- { 726, 0x6bcbdcb },
- { 745, 0x916f1e63 },
- { 774, 0x5caaae4a },
- { 792, 0x15855f5b },
- { 827, 0x272ef8ef },
- { 874, 0xc3628a09 },
- { 892, 0x958af968 },
- { 906, 0xfc64ee3c },
- { 921, 0x684781dc },
- { 935, 0x5f391892 },
- { 952, 0x19d64c81 },
- { 968, 0xf2065e5b },
- { 986, 0xcaab1faf },
- { 1005, 0xe7188731 },
- { 1019, 0x4df27c05 },
- { 1034, 0x96d834b },
- { 1051, 0x592ae5f5 },
- { 1062, 0x6d373ba8 },
- { 1075, 0x5edcd92b },
- { 1093, 0x37819a7f },
- { 1112, 0xbf3f2cb3 },
- { 1134, 0x7d4282b9 },
- { 1149, 0x94a07a45 },
- { 1165, 0x925bd256 },
- { 1184, 0x98b27962 },
- { 1204, 0x652128c2 },
- { 1218, 0xcbfb1d96 },
- { 1233, 0x9163b686 },
- { 1261, 0xdce077ff },
- { 1279, 0xb5853953 },
- { 1298, 0xa0d3cea2 },
- { 1319, 0x2d77af6e },
- { 1341, 0xcbf6489f },
- { 1363, 0x4112a673 },
- { 1386, 0x2092a349 },
- { 1409, 0xc3499606 },
- { 1431, 0x4b59f96d },
- { 1457, 0xf70c85eb },
- { 1482, 0x6aac68af },
- { 1500, 0x451ef1ed },
- { 1523, 0x9146f879 },
- { 1547, 0x13cf03f },
- { 1563, 0x3b645153 },
- { 1580, 0x3c14cc74 },
- { 1608, 0xa4227b08 },
- { 1637, 0xfb95a8a4 },
- { 1660, 0x47bdaf30 },
- { 1684, 0x9bd85f5 },
- { 1706, 0x4c449d3a },
- { 1731, 0x7a1347b1 },
- { 1752, 0xbfd090ae },
- { 1775, 0x887a38c4 },
- { 1795, 0xdc428e58 },
- { 1816, 0x109a9c18 },
- { 1835, 0x16f14324 },
- { 1855, 0xa9820d22 },
- { 1882, 0x820fe476 },
- { 1902, 0xd5d83a0a },
- { 1923, 0x6da9f7fd },
- { 1942, 0x8c0c811a },
- { 1967, 0xb9db2b91 },
- { 1988, 0xc54f7327 },
- { 2009, 0xaffb5725 },
- { 2028, 0x847dc731 },
- { 2049, 0x3af9fd84 },
- { 2067, 0x53d6c2b },
- { 2084, 0x48f28c7f },
- { 2100, 0x32282165 },
- { 2118, 0x30f14d07 },
- { 2136, 0x1c989dfb },
- { 2159, 0x7b3a8a63 },
- { 2179, 0xa8f534e2 },
- { 2206, 0xe7c4b134 },
- { 2231, 0x83e2b024 },
- { 2256, 0x28c7a5da },
- { 2280, 0x4c22d870 },
- { 2301, 0xa9c83f1d },
- { 2324, 0x9912c1a1 },
- { 2334, 0xbe5a8058 },
- { 2351, 0xe9ac41bf },
- { 2369, 0x94e7ed36 },
- { 2394, 0xbd58e867 },
- { 2408, 0xd6353005 },
- { 2430, 0xc939a0da },
- { 2446, 0x278effa9 },
- { 2461, 0x331ebf89 },
- { 2476, 0x929847e },
- { 2499, 0x68cddbac },
- { 2522, 0xd2986b5e },
- { 2540, 0x5bdd2ae0 },
- { 2556, 0xb4bc8d08 },
- { 2577, 0x4f88e4ba },
- { 2605, 0x93cb5cb8 },
- { 2627, 0x671bb594 },
- { 2645, 0xe257f075 },
- { 2659, 0x4fccce28 },
- { 2675, 0x3b9346b3 },
- { 2691, 0x97fccfe8 },
- { 2712, 0xf5064ea4 },
- { 2728, 0xd556fd22 },
- { 2742, 0x2f614082 },
- { 2762, 0xec4d324c },
- { 2782, 0xdee8c6d4 },
- { 2808, 0xb1c6b468 },
- { 2827, 0xcb7a58e3 },
- { 2848, 0x2eeec2f9 },
- { 2865, 0xdcdb0235 },
- { 2884, 0x9eaabe40 },
- { 2905, 0xf204ce7d },
- { 2925, 0x1a687885 },
- { 2962, 0x77890558 },
- { 3004, 0xe32227c8 },
- { 3041, 0x31c3cbd1 },
- { 3083, 0xcdefcaa8 },
- { 3104, 0x5a93ab74 },
- { 3126, 0x57695f28 },
- { 3150, 0xc3fedb2e },
- { 3172, 0xfc5fb6ce },
- { 3190, 0x2b2a4b79 },
- { 3216, 0x84e085ac },
- { 3265, 0xa693bc66 },
- { 3288, 0x34a063ab },
- { 3334, 0xc5e5b106 },
- { 3356, 0x41782cb9 },
- { 3401, 0xe5ad0a50 },
- { 3427, 0xc86e9287 },
- { 3460, 0x6a9a3636 },
- { 3492, 0xcd15838c },
- { 3526, 0x9099cbbb },
- { 3566, 0x102ff7ea },
- { 3611, 0x5ceb2bed },
- { 3656, 0xc8c3da3d },
- { 3696, 0x8746ed72 },
- { 3747, 0xf17232a1 },
- { 3773, 0x51177c8d },
- { 3794, 0xee68b389 },
- { 3841, 0x503c14c5 },
- { 3858, 0xb028a792 },
- { 3885, 0x984c3fa7 },
- { 3935, 0x3e0e9884 },
- { 3955, 0x36337c05 },
- { 3978, 0x5189488a },
- { 4014, 0xaa83901e },
- { 4051, 0x214ad230 },
- { 4088, 0x3d528981 },
- { 4126, 0x78dbe98d },
- { 4160, 0x8de28366 },
- { 4193, 0x3df40f5e },
+ [0] = { 0, 0x38a581a6 }, /* vkCreateInstance */
+ [1] = { 17, 0x9bd21af2 }, /* vkDestroyInstance */
+ [2] = { 35, 0x5787c327 }, /* vkEnumeratePhysicalDevices */
+ [3] = { 62, 0xba013486 }, /* vkGetDeviceProcAddr */
+ [4] = { 82, 0x3d2ae9ad }, /* vkGetInstanceProcAddr */
+ [5] = { 104, 0x52fe22c9 }, /* vkGetPhysicalDeviceProperties */
+ [6] = { 134, 0x4e5fc88a }, /* vkGetPhysicalDeviceQueueFamilyProperties */
+ [7] = { 175, 0xa90da4da }, /* vkGetPhysicalDeviceMemoryProperties */
+ [8] = { 211, 0x113e2f33 }, /* vkGetPhysicalDeviceFeatures */
+ [9] = { 239, 0x3e54b398 }, /* vkGetPhysicalDeviceFormatProperties */
+ [10] = { 275, 0xdd36a867 }, /* vkGetPhysicalDeviceImageFormatProperties */
+ [11] = { 316, 0x85ed23f }, /* vkCreateDevice */
+ [12] = { 331, 0x1fbcc9cb }, /* vkDestroyDevice */
+ [13] = { 347, 0x81f69d8 }, /* vkEnumerateInstanceLayerProperties */
+ [14] = { 382, 0xeb27627e }, /* vkEnumerateInstanceExtensionProperties */
+ [15] = { 421, 0x2f8566e7 }, /* vkEnumerateDeviceLayerProperties */
+ [16] = { 454, 0x5fd13eed }, /* vkEnumerateDeviceExtensionProperties */
+ [17] = { 491, 0xcc920d9a }, /* vkGetDeviceQueue */
+ [18] = { 508, 0xfa4713ec }, /* vkQueueSubmit */
+ [19] = { 522, 0x6f8fc2a5 }, /* vkQueueWaitIdle */
+ [20] = { 538, 0xd46c5f24 }, /* vkDeviceWaitIdle */
+ [21] = { 555, 0x522b85d3 }, /* vkAllocateMemory */
+ [22] = { 572, 0x8f6f838a }, /* vkFreeMemory */
+ [23] = { 585, 0xcb977bd8 }, /* vkMapMemory */
+ [24] = { 597, 0x1a1a0e2f }, /* vkUnmapMemory */
+ [25] = { 611, 0xff52f051 }, /* vkFlushMappedMemoryRanges */
+ [26] = { 637, 0x1e115cca }, /* vkInvalidateMappedMemoryRanges */
+ [27] = { 668, 0x46e38db5 }, /* vkGetDeviceMemoryCommitment */
+ [28] = { 696, 0xab98422a }, /* vkGetBufferMemoryRequirements */
+ [29] = { 726, 0x6bcbdcb }, /* vkBindBufferMemory */
+ [30] = { 745, 0x916f1e63 }, /* vkGetImageMemoryRequirements */
+ [31] = { 774, 0x5caaae4a }, /* vkBindImageMemory */
+ [32] = { 792, 0x15855f5b }, /* vkGetImageSparseMemoryRequirements */
+ [33] = { 827, 0x272ef8ef }, /* vkGetPhysicalDeviceSparseImageFormatProperties */
+ [34] = { 874, 0xc3628a09 }, /* vkQueueBindSparse */
+ [35] = { 892, 0x958af968 }, /* vkCreateFence */
+ [36] = { 906, 0xfc64ee3c }, /* vkDestroyFence */
+ [37] = { 921, 0x684781dc }, /* vkResetFences */
+ [38] = { 935, 0x5f391892 }, /* vkGetFenceStatus */
+ [39] = { 952, 0x19d64c81 }, /* vkWaitForFences */
+ [40] = { 968, 0xf2065e5b }, /* vkCreateSemaphore */
+ [41] = { 986, 0xcaab1faf }, /* vkDestroySemaphore */
+ [42] = { 1005, 0xe7188731 }, /* vkCreateEvent */
+ [43] = { 1019, 0x4df27c05 }, /* vkDestroyEvent */
+ [44] = { 1034, 0x96d834b }, /* vkGetEventStatus */
+ [45] = { 1051, 0x592ae5f5 }, /* vkSetEvent */
+ [46] = { 1062, 0x6d373ba8 }, /* vkResetEvent */
+ [47] = { 1075, 0x5edcd92b }, /* vkCreateQueryPool */
+ [48] = { 1093, 0x37819a7f }, /* vkDestroyQueryPool */
+ [49] = { 1112, 0xbf3f2cb3 }, /* vkGetQueryPoolResults */
+ [50] = { 1134, 0x7d4282b9 }, /* vkCreateBuffer */
+ [51] = { 1149, 0x94a07a45 }, /* vkDestroyBuffer */
+ [52] = { 1165, 0x925bd256 }, /* vkCreateBufferView */
+ [53] = { 1184, 0x98b27962 }, /* vkDestroyBufferView */
+ [54] = { 1204, 0x652128c2 }, /* vkCreateImage */
+ [55] = { 1218, 0xcbfb1d96 }, /* vkDestroyImage */
+ [56] = { 1233, 0x9163b686 }, /* vkGetImageSubresourceLayout */
+ [57] = { 1261, 0xdce077ff }, /* vkCreateImageView */
+ [58] = { 1279, 0xb5853953 }, /* vkDestroyImageView */
+ [59] = { 1298, 0xa0d3cea2 }, /* vkCreateShaderModule */
+ [60] = { 1319, 0x2d77af6e }, /* vkDestroyShaderModule */
+ [61] = { 1341, 0xcbf6489f }, /* vkCreatePipelineCache */
+ [62] = { 1363, 0x4112a673 }, /* vkDestroyPipelineCache */
+ [63] = { 1386, 0x2092a349 }, /* vkGetPipelineCacheData */
+ [64] = { 1409, 0xc3499606 }, /* vkMergePipelineCaches */
+ [65] = { 1431, 0x4b59f96d }, /* vkCreateGraphicsPipelines */
+ [66] = { 1457, 0xf70c85eb }, /* vkCreateComputePipelines */
+ [67] = { 1482, 0x6aac68af }, /* vkDestroyPipeline */
+ [68] = { 1500, 0x451ef1ed }, /* vkCreatePipelineLayout */
+ [69] = { 1523, 0x9146f879 }, /* vkDestroyPipelineLayout */
+ [70] = { 1547, 0x13cf03f }, /* vkCreateSampler */
+ [71] = { 1563, 0x3b645153 }, /* vkDestroySampler */
+ [72] = { 1580, 0x3c14cc74 }, /* vkCreateDescriptorSetLayout */
+ [73] = { 1608, 0xa4227b08 }, /* vkDestroyDescriptorSetLayout */
+ [74] = { 1637, 0xfb95a8a4 }, /* vkCreateDescriptorPool */
+ [75] = { 1660, 0x47bdaf30 }, /* vkDestroyDescriptorPool */
+ [76] = { 1684, 0x9bd85f5 }, /* vkResetDescriptorPool */
+ [77] = { 1706, 0x4c449d3a }, /* vkAllocateDescriptorSets */
+ [78] = { 1731, 0x7a1347b1 }, /* vkFreeDescriptorSets */
+ [79] = { 1752, 0xbfd090ae }, /* vkUpdateDescriptorSets */
+ [80] = { 1775, 0x887a38c4 }, /* vkCreateFramebuffer */
+ [81] = { 1795, 0xdc428e58 }, /* vkDestroyFramebuffer */
+ [82] = { 1816, 0x109a9c18 }, /* vkCreateRenderPass */
+ [83] = { 1835, 0x16f14324 }, /* vkDestroyRenderPass */
+ [84] = { 1855, 0xa9820d22 }, /* vkGetRenderAreaGranularity */
+ [85] = { 1882, 0x820fe476 }, /* vkCreateCommandPool */
+ [86] = { 1902, 0xd5d83a0a }, /* vkDestroyCommandPool */
+ [87] = { 1923, 0x6da9f7fd }, /* vkResetCommandPool */
+ [88] = { 1942, 0x8c0c811a }, /* vkAllocateCommandBuffers */
+ [89] = { 1967, 0xb9db2b91 }, /* vkFreeCommandBuffers */
+ [90] = { 1988, 0xc54f7327 }, /* vkBeginCommandBuffer */
+ [91] = { 2009, 0xaffb5725 }, /* vkEndCommandBuffer */
+ [92] = { 2028, 0x847dc731 }, /* vkResetCommandBuffer */
+ [93] = { 2049, 0x3af9fd84 }, /* vkCmdBindPipeline */
+ [94] = { 2067, 0x53d6c2b }, /* vkCmdSetViewport */
+ [95] = { 2084, 0x48f28c7f }, /* vkCmdSetScissor */
+ [96] = { 2100, 0x32282165 }, /* vkCmdSetLineWidth */
+ [97] = { 2118, 0x30f14d07 }, /* vkCmdSetDepthBias */
+ [98] = { 2136, 0x1c989dfb }, /* vkCmdSetBlendConstants */
+ [99] = { 2159, 0x7b3a8a63 }, /* vkCmdSetDepthBounds */
+ [100] = { 2179, 0xa8f534e2 }, /* vkCmdSetStencilCompareMask */
+ [101] = { 2206, 0xe7c4b134 }, /* vkCmdSetStencilWriteMask */
+ [102] = { 2231, 0x83e2b024 }, /* vkCmdSetStencilReference */
+ [103] = { 2256, 0x28c7a5da }, /* vkCmdBindDescriptorSets */
+ [104] = { 2280, 0x4c22d870 }, /* vkCmdBindIndexBuffer */
+ [105] = { 2301, 0xa9c83f1d }, /* vkCmdBindVertexBuffers */
+ [106] = { 2324, 0x9912c1a1 }, /* vkCmdDraw */
+ [107] = { 2334, 0xbe5a8058 }, /* vkCmdDrawIndexed */
+ [108] = { 2351, 0xe9ac41bf }, /* vkCmdDrawIndirect */
+ [109] = { 2369, 0x94e7ed36 }, /* vkCmdDrawIndexedIndirect */
+ [110] = { 2394, 0xbd58e867 }, /* vkCmdDispatch */
+ [111] = { 2408, 0xd6353005 }, /* vkCmdDispatchIndirect */
+ [112] = { 2430, 0xc939a0da }, /* vkCmdCopyBuffer */
+ [113] = { 2446, 0x278effa9 }, /* vkCmdCopyImage */
+ [114] = { 2461, 0x331ebf89 }, /* vkCmdBlitImage */
+ [115] = { 2476, 0x929847e }, /* vkCmdCopyBufferToImage */
+ [116] = { 2499, 0x68cddbac }, /* vkCmdCopyImageToBuffer */
+ [117] = { 2522, 0xd2986b5e }, /* vkCmdUpdateBuffer */
+ [118] = { 2540, 0x5bdd2ae0 }, /* vkCmdFillBuffer */
+ [119] = { 2556, 0xb4bc8d08 }, /* vkCmdClearColorImage */
+ [120] = { 2577, 0x4f88e4ba }, /* vkCmdClearDepthStencilImage */
+ [121] = { 2605, 0x93cb5cb8 }, /* vkCmdClearAttachments */
+ [122] = { 2627, 0x671bb594 }, /* vkCmdResolveImage */
+ [123] = { 2645, 0xe257f075 }, /* vkCmdSetEvent */
+ [124] = { 2659, 0x4fccce28 }, /* vkCmdResetEvent */
+ [125] = { 2675, 0x3b9346b3 }, /* vkCmdWaitEvents */
+ [126] = { 2691, 0x97fccfe8 }, /* vkCmdPipelineBarrier */
+ [127] = { 2712, 0xf5064ea4 }, /* vkCmdBeginQuery */
+ [128] = { 2728, 0xd556fd22 }, /* vkCmdEndQuery */
+ [129] = { 2742, 0x2f614082 }, /* vkCmdResetQueryPool */
+ [130] = { 2762, 0xec4d324c }, /* vkCmdWriteTimestamp */
+ [131] = { 2782, 0xdee8c6d4 }, /* vkCmdCopyQueryPoolResults */
+ [132] = { 2808, 0xb1c6b468 }, /* vkCmdPushConstants */
+ [133] = { 2827, 0xcb7a58e3 }, /* vkCmdBeginRenderPass */
+ [134] = { 2848, 0x2eeec2f9 }, /* vkCmdNextSubpass */
+ [135] = { 2865, 0xdcdb0235 }, /* vkCmdEndRenderPass */
+ [136] = { 2884, 0x9eaabe40 }, /* vkCmdExecuteCommands */
+ [137] = { 2905, 0xf204ce7d }, /* vkDestroySurfaceKHR */
+ [138] = { 2925, 0x1a687885 }, /* vkGetPhysicalDeviceSurfaceSupportKHR */
+ [139] = { 2962, 0x77890558 }, /* vkGetPhysicalDeviceSurfaceCapabilitiesKHR */
+ [140] = { 3004, 0xe32227c8 }, /* vkGetPhysicalDeviceSurfaceFormatsKHR */
+ [141] = { 3041, 0x31c3cbd1 }, /* vkGetPhysicalDeviceSurfacePresentModesKHR */
+ [142] = { 3083, 0xcdefcaa8 }, /* vkCreateSwapchainKHR */
+ [143] = { 3104, 0x5a93ab74 }, /* vkDestroySwapchainKHR */
+ [144] = { 3126, 0x57695f28 }, /* vkGetSwapchainImagesKHR */
+ [145] = { 3150, 0xc3fedb2e }, /* vkAcquireNextImageKHR */
+ [146] = { 3172, 0xfc5fb6ce }, /* vkQueuePresentKHR */
+ [147] = { 3190, 0x2b2a4b79 }, /* vkCreateWaylandSurfaceKHR */
+ [148] = { 3216, 0x84e085ac }, /* vkGetPhysicalDeviceWaylandPresentationSupportKHR */
+ [149] = { 3265, 0xa693bc66 }, /* vkCreateXlibSurfaceKHR */
+ [150] = { 3288, 0x34a063ab }, /* vkGetPhysicalDeviceXlibPresentationSupportKHR */
+ [151] = { 3334, 0xc5e5b106 }, /* vkCreateXcbSurfaceKHR */
+ [152] = { 3356, 0x41782cb9 }, /* vkGetPhysicalDeviceXcbPresentationSupportKHR */
+ [153] = { 3401, 0xe5ad0a50 }, /* vkCmdDrawIndirectCountAMD */
+ [154] = { 3427, 0xc86e9287 }, /* vkCmdDrawIndexedIndirectCountAMD */
+ [155] = { 3460, 0x6a9a3636 }, /* vkGetPhysicalDeviceFeatures2KHR */
+ [156] = { 3492, 0xcd15838c }, /* vkGetPhysicalDeviceProperties2KHR */
+ [157] = { 3526, 0x9099cbbb }, /* vkGetPhysicalDeviceFormatProperties2KHR */
+ [158] = { 3566, 0x102ff7ea }, /* vkGetPhysicalDeviceImageFormatProperties2KHR */
+ [159] = { 3611, 0x5ceb2bed }, /* vkGetPhysicalDeviceQueueFamilyProperties2KHR */
+ [160] = { 3656, 0xc8c3da3d }, /* vkGetPhysicalDeviceMemoryProperties2KHR */
+ [161] = { 3696, 0x8746ed72 }, /* vkGetPhysicalDeviceSparseImageFormatProperties2KHR */
+ [162] = { 3747, 0xf17232a1 }, /* vkCmdPushDescriptorSetKHR */
+ [163] = { 3773, 0x51177c8d }, /* vkTrimCommandPoolKHR */
+ [164] = { 3794, 0xee68b389 }, /* vkGetPhysicalDeviceExternalBufferPropertiesKHR */
+ [165] = { 3841, 0x503c14c5 }, /* vkGetMemoryFdKHR */
+ [166] = { 3858, 0xb028a792 }, /* vkGetMemoryFdPropertiesKHR */
+ [167] = { 3885, 0x984c3fa7 }, /* vkGetPhysicalDeviceExternalSemaphorePropertiesKHR */
+ [168] = { 3935, 0x3e0e9884 }, /* vkGetSemaphoreFdKHR */
+ [169] = { 3955, 0x36337c05 }, /* vkImportSemaphoreFdKHR */
+ [170] = { 3978, 0x6878d3ce }, /* vkBindBufferMemory2KHR */
+ [171] = { 4001, 0xf18729ad }, /* vkBindImageMemory2KHR */
+ [172] = { 4023, 0x5189488a }, /* vkCreateDescriptorUpdateTemplateKHR */
+ [173] = { 4059, 0xaa83901e }, /* vkDestroyDescriptorUpdateTemplateKHR */
+ [174] = { 4096, 0x214ad230 }, /* vkUpdateDescriptorSetWithTemplateKHR */
+ [175] = { 4133, 0x3d528981 }, /* vkCmdPushDescriptorSetWithTemplateKHR */
+ [176] = { 4171, 0x78dbe98d }, /* vkGetBufferMemoryRequirements2KHR */
+ [177] = { 4205, 0x8de28366 }, /* vkGetImageMemoryRequirements2KHR */
+ [178] = { 4238, 0x3df40f5e }, /* vkGetImageSparseMemoryRequirements2KHR */
};
/* Weak aliases for all potential implementations. These will resolve to
@@ -582,6 +586,8 @@
void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo, VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties) __attribute__ ((weak));
VkResult radv_GetSemaphoreFdKHR(VkDevice device, const VkSemaphoreGetFdInfoKHR* pGetFdInfo, int* pFd) __attribute__ ((weak));
VkResult radv_ImportSemaphoreFdKHR(VkDevice device, const VkImportSemaphoreFdInfoKHR* pImportSemaphoreFdInfo) __attribute__ ((weak));
+ VkResult radv_BindBufferMemory2KHR(VkDevice device, uint32_t bindInfoCount, const VkBindBufferMemoryInfoKHR* pBindInfos) __attribute__ ((weak));
+ VkResult radv_BindImageMemory2KHR(VkDevice device, uint32_t bindInfoCount, const VkBindImageMemoryInfoKHR* pBindInfos) __attribute__ ((weak));
VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice device, const VkDescriptorUpdateTemplateCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorUpdateTemplateKHR* pDescriptorUpdateTemplate) __attribute__ ((weak));
void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice device, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const VkAllocationCallbacks* pAllocator) __attribute__ ((weak));
void radv_UpdateDescriptorSetWithTemplateKHR(VkDevice device, VkDescriptorSet descriptorSet, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const void* pData) __attribute__ ((weak));
@@ -773,6 +779,8 @@
.GetPhysicalDeviceExternalSemaphorePropertiesKHR = radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR,
.GetSemaphoreFdKHR = radv_GetSemaphoreFdKHR,
.ImportSemaphoreFdKHR = radv_ImportSemaphoreFdKHR,
+ .BindBufferMemory2KHR = radv_BindBufferMemory2KHR,
+ .BindImageMemory2KHR = radv_BindImageMemory2KHR,
.CreateDescriptorUpdateTemplateKHR = radv_CreateDescriptorUpdateTemplateKHR,
.DestroyDescriptorUpdateTemplateKHR = radv_DestroyDescriptorUpdateTemplateKHR,
.UpdateDescriptorSetWithTemplateKHR = radv_UpdateDescriptorSetWithTemplateKHR,
@@ -793,8 +801,8 @@
* collisions entries:
* 0 115
* 1 32
- * 2 11
- * 3 8
+ * 2 12
+ * 3 9
* 4 4
* 5 1
* 6 3
@@ -835,7 +843,7 @@
0x004c,
none,
0x0069,
- 0x00ab,
+ 0x00ad,
none,
none,
none,
@@ -872,7 +880,7 @@
0x0088,
0x0091,
none,
- 0x00ac,
+ 0x00ae,
0x005c,
0x0033,
none,
@@ -918,7 +926,7 @@
0x0028,
none,
0x0068,
- 0x00b0,
+ 0x00b2,
0x00a1,
0x003e,
0x0048,
@@ -936,7 +944,7 @@
none,
0x0027,
0x0081,
- 0x00af,
+ 0x00b1,
0x005d,
0x008a,
0x0003,
@@ -965,7 +973,7 @@
0x0016,
none,
0x003d,
- 0x00ae,
+ 0x00b0,
0x006a,
0x003b,
none,
@@ -981,7 +989,7 @@
0x0004,
0x004f,
0x0029,
- 0x00aa,
+ 0x00ac,
0x004e,
0x0095,
0x0031,
@@ -1035,7 +1043,7 @@
0x0085,
none,
none,
- none,
+ 0x00ab,
0x000f,
0x007e,
none,
@@ -1048,8 +1056,8 @@
0x008b,
0x0079,
0x0001,
- 0x00ad,
- none,
+ 0x00af,
+ 0x00aa,
0x002d,
none,
none,
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_entrypoints_gen.py mesa-17.3.3/src/amd/vulkan/radv_entrypoints_gen.py
--- mesa-17.2.4/src/amd/vulkan/radv_entrypoints_gen.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_entrypoints_gen.py 2018-01-18 21:30:28.000000000 +0000
@@ -25,212 +25,184 @@
import argparse
import functools
import os
-import textwrap
import xml.etree.cElementTree as et
from mako.template import Template
-MAX_API_VERSION = 1.0
-
-SUPPORTED_EXTENSIONS = [
- 'VK_AMD_draw_indirect_count',
- 'VK_NV_dedicated_allocation',
- 'VK_KHR_descriptor_update_template',
- 'VK_KHR_get_physical_device_properties2',
- 'VK_KHR_incremental_present',
- 'VK_KHR_maintenance1',
- 'VK_KHR_push_descriptor',
- 'VK_KHR_sampler_mirror_clamp_to_edge',
- 'VK_KHR_shader_draw_parameters',
- 'VK_KHR_surface',
- 'VK_KHR_swapchain',
- 'VK_KHR_wayland_surface',
- 'VK_KHR_xcb_surface',
- 'VK_KHR_xlib_surface',
- 'VK_KHR_get_memory_requirements2',
- 'VK_KHR_dedicated_allocation',
- 'VK_KHR_external_memory_capabilities',
- 'VK_KHR_external_memory',
- 'VK_KHR_external_memory_fd',
- 'VK_KHR_storage_buffer_storage_class',
- 'VK_KHR_variable_pointers',
- 'VK_KHR_external_semaphore_capabilities',
- 'VK_KHR_external_semaphore',
- 'VK_KHR_external_semaphore_fd',
-]
+from radv_extensions import *
# We generate a static hash table for entry point lookup
# (vkGetProcAddress). We use a linear congruential generator for our hash
# function and a power-of-two size table. The prime numbers are determined
# experimentally.
-TEMPLATE_H = Template(textwrap.dedent("""\
- /* This file generated from ${filename}, don't edit directly. */
-
- struct radv_dispatch_table {
- union {
- void *entrypoints[${len(entrypoints)}];
- struct {
- % for _, name, _, _, _, guard in entrypoints:
- % if guard is not None:
- #ifdef ${guard}
- PFN_vk${name} ${name};
- #else
- void *${name};
- # endif
- % else:
- PFN_vk${name} ${name};
- % endif
- % endfor
- };
- };
- };
-
- % for type_, name, args, num, h, guard in entrypoints:
- % if guard is not None:
- #ifdef ${guard}
- % endif
- ${type_} radv_${name}(${args});
- % if guard is not None:
- #endif // ${guard}
- % endif
- % endfor
- """), output_encoding='utf-8')
-
-TEMPLATE_C = Template(textwrap.dedent(u"""\
- /*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
- /* This file generated from ${filename}, don't edit directly. */
-
- #include "radv_private.h"
-
- struct radv_entrypoint {
- uint32_t name;
- uint32_t hash;
- };
-
- /* We use a big string constant to avoid lots of reloctions from the entry
- * point table to lots of little strings. The entries in the entry point table
- * store the index into this big string.
- */
-
- static const char strings[] =
- % for _, name, _, _, _, _ in entrypoints:
- "vk${name}\\0"
- % endfor
- ;
-
- static const struct radv_entrypoint entrypoints[] = {
- % for _, _, _, num, h, _ in entrypoints:
- { ${offsets[num]}, ${'{:0=#8x}'.format(h)} },
- % endfor
- };
-
- /* Weak aliases for all potential implementations. These will resolve to
- * NULL if they're not defined, which lets the resolve_entrypoint() function
- * either pick the correct entry point.
- */
+TEMPLATE_H = Template("""\
+/* This file generated from ${filename}, don't edit directly. */
- % for layer in ['radv']:
- % for type_, name, args, _, _, guard in entrypoints:
+struct radv_dispatch_table {
+ union {
+ void *entrypoints[${len(entrypoints)}];
+ struct {
+ % for _, name, _, _, _, guard in entrypoints:
% if guard is not None:
- #ifdef ${guard}
- % endif
- ${type_} ${layer}_${name}(${args}) __attribute__ ((weak));
- % if guard is not None:
- #endif // ${guard}
- % endif
- % endfor
-
- const struct radv_dispatch_table ${layer}_layer = {
- % for _, name, args, _, _, guard in entrypoints:
- % if guard is not None:
- #ifdef ${guard}
- % endif
- .${name} = ${layer}_${name},
- % if guard is not None:
- #endif // ${guard}
- % endif
- % endfor
- };
- % endfor
-
- static void * __attribute__ ((noinline))
- radv_resolve_entrypoint(uint32_t index)
- {
- return radv_layer.entrypoints[index];
- }
-
- /* Hash table stats:
- * size ${hash_size} entries
- * collisions entries:
- % for i in xrange(10):
- * ${i}${'+' if i == 9 else ''} ${collisions[i]}
- % endfor
- */
-
- #define none ${'{:#x}'.format(none)}
- static const uint16_t map[] = {
- % for i in xrange(0, hash_size, 8):
- % for j in xrange(i, i + 8):
- ## This is 6 because the 0x is counted in the length
- % if mapping[j] & 0xffff == 0xffff:
- none,
+#ifdef ${guard}
+ PFN_vk${name} ${name};
+#else
+ void *${name};
+# endif
% else:
- ${'{:0=#6x}'.format(mapping[j] & 0xffff)},
+ PFN_vk${name} ${name};
% endif
% endfor
- % endfor
- };
+ };
+ };
+};
- void *
- radv_lookup_entrypoint(const char *name)
- {
- static const uint32_t prime_factor = ${prime_factor};
- static const uint32_t prime_step = ${prime_step};
- const struct radv_entrypoint *e;
- uint32_t hash, h, i;
- const char *p;
-
- hash = 0;
- for (p = name; *p; p++)
- hash = hash * prime_factor + *p;
-
- h = hash;
- do {
- i = map[h & ${hash_mask}];
- if (i == none)
- return NULL;
- e = &entrypoints[i];
- h += prime_step;
- } while (e->hash != hash);
+% for type_, name, args, num, h, guard in entrypoints:
+ % if guard is not None:
+#ifdef ${guard}
+ % endif
+ ${type_} radv_${name}(${args});
+ % if guard is not None:
+#endif // ${guard}
+ % endif
+% endfor
+""", output_encoding='utf-8')
+
+TEMPLATE_C = Template(u"""\
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* This file generated from ${filename}, don't edit directly. */
+
+#include "radv_private.h"
+
+struct radv_entrypoint {
+ uint32_t name;
+ uint32_t hash;
+};
+
+/* We use a big string constant to avoid lots of reloctions from the entry
+ * point table to lots of little strings. The entries in the entry point table
+ * store the index into this big string.
+ */
+
+static const char strings[] =
+% for _, name, _, _, _, _ in entrypoints:
+ "vk${name}\\0"
+% endfor
+;
+
+static const struct radv_entrypoint entrypoints[] = {
+% for _, name, _, num, h, _ in entrypoints:
+ [${num}] = { ${offsets[num]}, ${'{:0=#8x}'.format(h)} }, /* vk${name} */
+% endfor
+};
+
+/* Weak aliases for all potential implementations. These will resolve to
+ * NULL if they're not defined, which lets the resolve_entrypoint() function
+ * either pick the correct entry point.
+ */
+
+% for layer in ['radv']:
+ % for type_, name, args, _, _, guard in entrypoints:
+ % if guard is not None:
+#ifdef ${guard}
+ % endif
+ ${type_} ${layer}_${name}(${args}) __attribute__ ((weak));
+ % if guard is not None:
+#endif // ${guard}
+ % endif
+ % endfor
+
+ const struct radv_dispatch_table ${layer}_layer = {
+ % for _, name, args, _, _, guard in entrypoints:
+ % if guard is not None:
+#ifdef ${guard}
+ % endif
+ .${name} = ${layer}_${name},
+ % if guard is not None:
+#endif // ${guard}
+ % endif
+ % endfor
+ };
+% endfor
+
+static void * __attribute__ ((noinline))
+radv_resolve_entrypoint(uint32_t index)
+{
+ return radv_layer.entrypoints[index];
+}
+
+/* Hash table stats:
+ * size ${hash_size} entries
+ * collisions entries:
+% for i in xrange(10):
+ * ${i}${'+' if i == 9 else ''} ${collisions[i]}
+% endfor
+ */
+
+#define none ${'{:#x}'.format(none)}
+static const uint16_t map[] = {
+% for i in xrange(0, hash_size, 8):
+ % for j in xrange(i, i + 8):
+ ## This is 6 because the 0x is counted in the length
+ % if mapping[j] & 0xffff == 0xffff:
+ none,
+ % else:
+ ${'{:0=#6x}'.format(mapping[j] & 0xffff)},
+ % endif
+ % endfor
+% endfor
+};
+
+void *
+radv_lookup_entrypoint(const char *name)
+{
+ static const uint32_t prime_factor = ${prime_factor};
+ static const uint32_t prime_step = ${prime_step};
+ const struct radv_entrypoint *e;
+ uint32_t hash, h, i;
+ const char *p;
+
+ hash = 0;
+ for (p = name; *p; p++)
+ hash = hash * prime_factor + *p;
+
+ h = hash;
+ do {
+ i = map[h & ${hash_mask}];
+ if (i == none)
+ return NULL;
+ e = &entrypoints[i];
+ h += prime_step;
+ } while (e->hash != hash);
- if (strcmp(name, strings + e->name) != 0)
- return NULL;
+ if (strcmp(name, strings + e->name) != 0)
+ return NULL;
- return radv_resolve_entrypoint(i);
- }"""), output_encoding='utf-8')
+ return radv_resolve_entrypoint(i);
+}""", output_encoding='utf-8')
NONE = 0xffff
HASH_SIZE = 256
@@ -247,28 +219,29 @@
lambda h, c: (h * PRIME_FACTOR + ord(c)) & U32_MASK, name, 0)
-def get_entrypoints(doc, entrypoints_to_defines):
+def get_entrypoints(doc, entrypoints_to_defines, start_index):
"""Extract the entry points from the registry."""
entrypoints = []
enabled_commands = set()
for feature in doc.findall('./feature'):
assert feature.attrib['api'] == 'vulkan'
- if float(feature.attrib['number']) > MAX_API_VERSION:
+ if VkVersion(feature.attrib['number']) > MAX_API_VERSION:
continue
for command in feature.findall('./require/command'):
enabled_commands.add(command.attrib['name'])
+ supported = set(ext.name for ext in EXTENSIONS)
for extension in doc.findall('.extensions/extension'):
- if extension.attrib['name'] not in SUPPORTED_EXTENSIONS:
+ if extension.attrib['name'] not in supported:
continue
assert extension.attrib['supported'] == 'vulkan'
for command in extension.findall('./require/command'):
enabled_commands.add(command.attrib['name'])
- index = 0
+ index = start_index
for command in doc.findall('./commands/command'):
type = command.find('./proto/type').text
fullname = command.find('./proto/name').text
@@ -337,12 +310,22 @@
parser = argparse.ArgumentParser()
parser.add_argument('--outdir', help='Where to write the files.',
required=True)
- parser.add_argument('--xml', help='Vulkan API XML file.', required=True)
+ parser.add_argument('--xml',
+ help='Vulkan API XML file.',
+ required=True,
+ action='append',
+ dest='xml_files')
args = parser.parse_args()
- doc = et.parse(args.xml)
- entrypoints = get_entrypoints(doc, get_entrypoints_defines(doc))
+ entrypoints = []
+
+ for filename in args.xml_files:
+ doc = et.parse(filename)
+ entrypoints += get_entrypoints(doc, get_entrypoints_defines(doc),
+ start_index=len(entrypoints))
+ # For outputting entrypoints.h we generate a radv_EntryPoint() prototype
+ # per entry point.
with open(os.path.join(args.outdir, 'radv_entrypoints.h'), 'wb') as f:
f.write(TEMPLATE_H.render(entrypoints=entrypoints,
filename=os.path.basename(__file__)))
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_entrypoints.h mesa-17.3.3/src/amd/vulkan/radv_entrypoints.h
--- mesa-17.2.4/src/amd/vulkan/radv_entrypoints.h 2017-10-30 14:50:58.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_entrypoints.h 2018-01-18 21:31:11.000000000 +0000
@@ -2,7 +2,7 @@
struct radv_dispatch_table {
union {
- void *entrypoints[177];
+ void *entrypoints[179];
struct {
PFN_vkCreateInstance CreateInstance;
PFN_vkDestroyInstance DestroyInstance;
@@ -198,6 +198,8 @@
PFN_vkGetPhysicalDeviceExternalSemaphorePropertiesKHR GetPhysicalDeviceExternalSemaphorePropertiesKHR;
PFN_vkGetSemaphoreFdKHR GetSemaphoreFdKHR;
PFN_vkImportSemaphoreFdKHR ImportSemaphoreFdKHR;
+ PFN_vkBindBufferMemory2KHR BindBufferMemory2KHR;
+ PFN_vkBindImageMemory2KHR BindImageMemory2KHR;
PFN_vkCreateDescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR;
PFN_vkDestroyDescriptorUpdateTemplateKHR DestroyDescriptorUpdateTemplateKHR;
PFN_vkUpdateDescriptorSetWithTemplateKHR UpdateDescriptorSetWithTemplateKHR;
@@ -391,6 +393,8 @@
void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo, VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties);
VkResult radv_GetSemaphoreFdKHR(VkDevice device, const VkSemaphoreGetFdInfoKHR* pGetFdInfo, int* pFd);
VkResult radv_ImportSemaphoreFdKHR(VkDevice device, const VkImportSemaphoreFdInfoKHR* pImportSemaphoreFdInfo);
+ VkResult radv_BindBufferMemory2KHR(VkDevice device, uint32_t bindInfoCount, const VkBindBufferMemoryInfoKHR* pBindInfos);
+ VkResult radv_BindImageMemory2KHR(VkDevice device, uint32_t bindInfoCount, const VkBindImageMemoryInfoKHR* pBindInfos);
VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice device, const VkDescriptorUpdateTemplateCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorUpdateTemplateKHR* pDescriptorUpdateTemplate);
void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice device, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const VkAllocationCallbacks* pAllocator);
void radv_UpdateDescriptorSetWithTemplateKHR(VkDevice device, VkDescriptorSet descriptorSet, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const void* pData);
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_extensions.c mesa-17.3.3/src/amd/vulkan/radv_extensions.c
--- mesa-17.2.4/src/amd/vulkan/radv_extensions.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_extensions.c 2018-01-18 21:31:11.000000000 +0000
@@ -0,0 +1,407 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "radv_private.h"
+
+#include "vk_util.h"
+
+/* Convert the VK_USE_PLATFORM_* defines to booleans */
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+# undef VK_USE_PLATFORM_ANDROID_KHR
+# define VK_USE_PLATFORM_ANDROID_KHR true
+#else
+# define VK_USE_PLATFORM_ANDROID_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+# undef VK_USE_PLATFORM_WAYLAND_KHR
+# define VK_USE_PLATFORM_WAYLAND_KHR true
+#else
+# define VK_USE_PLATFORM_WAYLAND_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_XCB_KHR
+# undef VK_USE_PLATFORM_XCB_KHR
+# define VK_USE_PLATFORM_XCB_KHR true
+#else
+# define VK_USE_PLATFORM_XCB_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+# undef VK_USE_PLATFORM_XLIB_KHR
+# define VK_USE_PLATFORM_XLIB_KHR true
+#else
+# define VK_USE_PLATFORM_XLIB_KHR false
+#endif
+
+/* And ANDROID too */
+#ifdef ANDROID
+# undef ANDROID
+# define ANDROID true
+#else
+# define ANDROID false
+#endif
+
+#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR)
+
+bool
+radv_instance_extension_supported(const char *name)
+{
+ if (strcmp(name, "VK_KHR_external_memory_capabilities") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_semaphore_capabilities") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_get_physical_device_properties2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_surface") == 0)
+ return RADV_HAS_SURFACE;
+ if (strcmp(name, "VK_KHR_wayland_surface") == 0)
+ return VK_USE_PLATFORM_WAYLAND_KHR;
+ if (strcmp(name, "VK_KHR_xcb_surface") == 0)
+ return VK_USE_PLATFORM_XCB_KHR;
+ if (strcmp(name, "VK_KHR_xlib_surface") == 0)
+ return VK_USE_PLATFORM_XLIB_KHR;
+ return false;
+}
+
+VkResult radv_EnumerateInstanceExtensionProperties(
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory_capabilities",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore_capabilities",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_get_physical_device_properties2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (RADV_HAS_SURFACE) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_surface",
+ .specVersion = 25,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_WAYLAND_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_wayland_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_XCB_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_xcb_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_XLIB_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_xlib_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+uint32_t
+radv_physical_device_api_version(struct radv_physical_device *dev)
+{
+ return VK_MAKE_VERSION(1, 0, 57);
+}
+
+bool
+radv_physical_device_extension_supported(struct radv_physical_device *device,
+ const char *name)
+{
+ if (strcmp(name, "VK_KHR_bind_memory2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_dedicated_allocation") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_descriptor_update_template") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_memory") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_memory_fd") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_semaphore") == 0)
+ return device->rad_info.has_syncobj;
+ if (strcmp(name, "VK_KHR_external_semaphore_fd") == 0)
+ return device->rad_info.has_syncobj;
+ if (strcmp(name, "VK_KHR_get_memory_requirements2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_image_format_list") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_incremental_present") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_maintenance1") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_maintenance2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_push_descriptor") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_relaxed_block_layout") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_sampler_mirror_clamp_to_edge") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_shader_draw_parameters") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_storage_buffer_storage_class") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_swapchain") == 0)
+ return RADV_HAS_SURFACE;
+ if (strcmp(name, "VK_KHR_variable_pointers") == 0)
+ return true;
+ if (strcmp(name, "VK_KHX_multiview") == 0)
+ return true;
+ if (strcmp(name, "VK_EXT_global_priority") == 0)
+ return device->rad_info.has_ctx_priority;
+ if (strcmp(name, "VK_AMD_draw_indirect_count") == 0)
+ return true;
+ if (strcmp(name, "VK_AMD_rasterization_order") == 0)
+ return device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2;
+ return false;
+}
+
+VkResult radv_EnumerateDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice,
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+ (void)device;
+
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_bind_memory2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_dedicated_allocation",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_descriptor_update_template",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory_fd",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->rad_info.has_syncobj) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->rad_info.has_syncobj) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore_fd",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_get_memory_requirements2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_image_format_list",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_incremental_present",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_maintenance1",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_maintenance2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_push_descriptor",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_relaxed_block_layout",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_sampler_mirror_clamp_to_edge",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_shader_draw_parameters",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_storage_buffer_storage_class",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (RADV_HAS_SURFACE) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_swapchain",
+ .specVersion = 68,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_variable_pointers",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHX_multiview",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->rad_info.has_ctx_priority) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_EXT_global_priority",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_AMD_draw_indirect_count",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_AMD_rasterization_order",
+ .specVersion = 1,
+ };
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_extensions.py mesa-17.3.3/src/amd/vulkan/radv_extensions.py
--- mesa-17.2.4/src/amd/vulkan/radv_extensions.py 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_extensions.py 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,278 @@
+COPYRIGHT = """\
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+"""
+
+import argparse
+import copy
+import re
+import xml.etree.cElementTree as et
+
+from mako.template import Template
+
+MAX_API_VERSION = '1.0.57'
+
+class Extension:
+ def __init__(self, name, ext_version, enable):
+ self.name = name
+ self.ext_version = int(ext_version)
+ if enable is True:
+ self.enable = 'true';
+ elif enable is False:
+ self.enable = 'false';
+ else:
+ self.enable = enable;
+
+# On Android, we disable all surface and swapchain extensions. Android's Vulkan
+# loader implements VK_KHR_surface and VK_KHR_swapchain, and applications
+# cannot access the driver's implementation. Moreoever, if the driver exposes
+# the those extension strings, then tests dEQP-VK.api.info.instance.extensions
+# and dEQP-VK.api.info.device fail due to the duplicated strings.
+EXTENSIONS = [
+ Extension('VK_KHR_bind_memory2', 1, True),
+ Extension('VK_KHR_dedicated_allocation', 1, True),
+ Extension('VK_KHR_descriptor_update_template', 1, True),
+ Extension('VK_KHR_external_memory', 1, True),
+ Extension('VK_KHR_external_memory_capabilities', 1, True),
+ Extension('VK_KHR_external_memory_fd', 1, True),
+ Extension('VK_KHR_external_semaphore', 1, 'device->rad_info.has_syncobj'),
+ Extension('VK_KHR_external_semaphore_capabilities', 1, True),
+ Extension('VK_KHR_external_semaphore_fd', 1, 'device->rad_info.has_syncobj'),
+ Extension('VK_KHR_get_memory_requirements2', 1, True),
+ Extension('VK_KHR_get_physical_device_properties2', 1, True),
+ Extension('VK_KHR_image_format_list', 1, True),
+ Extension('VK_KHR_incremental_present', 1, True),
+ Extension('VK_KHR_maintenance1', 1, True),
+ Extension('VK_KHR_maintenance2', 1, True),
+ Extension('VK_KHR_push_descriptor', 1, True),
+ Extension('VK_KHR_relaxed_block_layout', 1, True),
+ Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True),
+ Extension('VK_KHR_shader_draw_parameters', 1, True),
+ Extension('VK_KHR_storage_buffer_storage_class', 1, True),
+ Extension('VK_KHR_surface', 25, 'RADV_HAS_SURFACE'),
+ Extension('VK_KHR_swapchain', 68, 'RADV_HAS_SURFACE'),
+ Extension('VK_KHR_variable_pointers', 1, True),
+ Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
+ Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
+ Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
+ Extension('VK_KHX_multiview', 1, True),
+ Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'),
+ Extension('VK_AMD_draw_indirect_count', 1, True),
+ Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
+]
+
+class VkVersion:
+ def __init__(self, string):
+ split = string.split('.')
+ self.major = int(split[0])
+ self.minor = int(split[1])
+ if len(split) > 2:
+ assert len(split) == 3
+ self.patch = int(split[2])
+ else:
+ self.patch = None
+
+ # Sanity check. The range bits are required by the definition of the
+ # VK_MAKE_VERSION macro
+ assert self.major < 1024 and self.minor < 1024
+ assert self.patch is None or self.patch < 4096
+ assert(str(self) == string)
+
+ def __str__(self):
+ ver_list = [str(self.major), str(self.minor)]
+ if self.patch is not None:
+ ver_list.append(str(self.patch))
+ return '.'.join(ver_list)
+
+ def c_vk_version(self):
+ ver_list = [str(self.major), str(self.minor), str(self.patch)]
+ return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')'
+
+ def __int_ver(self):
+ # This is just an expansion of VK_VERSION
+ patch = self.patch if self.patch is not None else 0
+ return (self.major << 22) | (self.minor << 12) | patch
+
+ def __cmp__(self, other):
+ # If only one of them has a patch version, "ignore" it by making
+ # other's patch version match self.
+ if (self.patch is None) != (other.patch is None):
+ other = copy.copy(other)
+ other.patch = self.patch
+
+ return self.__int_ver().__cmp__(other.__int_ver())
+
+MAX_API_VERSION = VkVersion(MAX_API_VERSION)
+
+def _init_exts_from_xml(xml):
+ """ Walk the Vulkan XML and fill out extra extension information. """
+
+ xml = et.parse(xml)
+
+ ext_name_map = {}
+ for ext in EXTENSIONS:
+ ext_name_map[ext.name] = ext
+
+ for ext_elem in xml.findall('.extensions/extension'):
+ ext_name = ext_elem.attrib['name']
+ if ext_name not in ext_name_map:
+ continue
+
+ # Workaround for VK_ANDROID_native_buffer. Its element in
+ # vk.xml lists it as supported="disabled" and provides only a stub
+ # definition. Its element in Mesa's custom
+ # vk_android_native_buffer.xml, though, lists it as
+ # supported='android-vendor' and fully defines the extension. We want
+ # to skip the element in vk.xml.
+ if ext_elem.attrib['supported'] == 'disabled':
+ assert ext_name == 'VK_ANDROID_native_buffer'
+ continue
+
+ ext = ext_name_map[ext_name]
+ ext.type = ext_elem.attrib['type']
+
+_TEMPLATE = Template(COPYRIGHT + """
+#include "radv_private.h"
+
+#include "vk_util.h"
+
+/* Convert the VK_USE_PLATFORM_* defines to booleans */
+%for platform in ['ANDROID', 'WAYLAND', 'XCB', 'XLIB']:
+#ifdef VK_USE_PLATFORM_${platform}_KHR
+# undef VK_USE_PLATFORM_${platform}_KHR
+# define VK_USE_PLATFORM_${platform}_KHR true
+#else
+# define VK_USE_PLATFORM_${platform}_KHR false
+#endif
+%endfor
+
+/* And ANDROID too */
+#ifdef ANDROID
+# undef ANDROID
+# define ANDROID true
+#else
+# define ANDROID false
+#endif
+
+#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || \\
+ VK_USE_PLATFORM_XCB_KHR || \\
+ VK_USE_PLATFORM_XLIB_KHR)
+
+bool
+radv_instance_extension_supported(const char *name)
+{
+%for ext in instance_extensions:
+ if (strcmp(name, "${ext.name}") == 0)
+ return ${ext.enable};
+%endfor
+ return false;
+}
+
+VkResult radv_EnumerateInstanceExtensionProperties(
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+
+%for ext in instance_extensions:
+ if (${ext.enable}) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "${ext.name}",
+ .specVersion = ${ext.ext_version},
+ };
+ }
+ }
+%endfor
+
+ return vk_outarray_status(&out);
+}
+
+uint32_t
+radv_physical_device_api_version(struct radv_physical_device *dev)
+{
+ return ${MAX_API_VERSION.c_vk_version()};
+}
+
+bool
+radv_physical_device_extension_supported(struct radv_physical_device *device,
+ const char *name)
+{
+%for ext in device_extensions:
+ if (strcmp(name, "${ext.name}") == 0)
+ return ${ext.enable};
+%endfor
+ return false;
+}
+
+VkResult radv_EnumerateDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice,
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+ (void)device;
+
+%for ext in device_extensions:
+ if (${ext.enable}) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "${ext.name}",
+ .specVersion = ${ext.ext_version},
+ };
+ }
+ }
+%endfor
+
+ return vk_outarray_status(&out);
+}
+""")
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--out', help='Output C file.', required=True)
+ parser.add_argument('--xml',
+ help='Vulkan API XML file.',
+ required=True,
+ action='append',
+ dest='xml_files')
+ args = parser.parse_args()
+
+ for filename in args.xml_files:
+ _init_exts_from_xml(filename)
+
+ for ext in EXTENSIONS:
+ assert ext.type == 'instance' or ext.type == 'device'
+
+ template_env = {
+ 'MAX_API_VERSION': MAX_API_VERSION,
+ 'instance_extensions': [e for e in EXTENSIONS if e.type == 'instance'],
+ 'device_extensions': [e for e in EXTENSIONS if e.type == 'device'],
+ }
+
+ with open(args.out, 'w') as f:
+ f.write(_TEMPLATE.render(**template_env))
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_formats.c mesa-17.3.3/src/amd/vulkan/radv_formats.c
--- mesa-17.2.4/src/amd/vulkan/radv_formats.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_formats.c 2018-01-18 21:30:28.000000000 +0000
@@ -26,7 +26,6 @@
#include "vk_format.h"
#include "sid.h"
-#include "r600d_common.h"
#include "vk_util.h"
@@ -767,7 +766,7 @@
#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == VK_SWIZZLE_##swz)
if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32)
- return V_0280A0_SWAP_STD;
+ return V_028C70_SWAP_STD;
if (desc->layout != VK_FORMAT_LAYOUT_PLAIN)
return ~0U;
@@ -775,45 +774,45 @@
switch (desc->nr_channels) {
case 1:
if (HAS_SWIZZLE(0,X))
- return V_0280A0_SWAP_STD; /* X___ */
+ return V_028C70_SWAP_STD; /* X___ */
else if (HAS_SWIZZLE(3,X))
- return V_0280A0_SWAP_ALT_REV; /* ___X */
+ return V_028C70_SWAP_ALT_REV; /* ___X */
break;
case 2:
if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
(HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
- return V_0280A0_SWAP_STD; /* XY__ */
+ return V_028C70_SWAP_STD; /* XY__ */
else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
(HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
/* YX__ */
- return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV);
+ return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
- return V_0280A0_SWAP_ALT; /* X__Y */
+ return V_028C70_SWAP_ALT; /* X__Y */
else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
- return V_0280A0_SWAP_ALT_REV; /* Y__X */
+ return V_028C70_SWAP_ALT_REV; /* Y__X */
break;
case 3:
if (HAS_SWIZZLE(0,X))
- return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD);
+ return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
else if (HAS_SWIZZLE(0,Z))
- return V_0280A0_SWAP_STD_REV; /* ZYX */
+ return V_028C70_SWAP_STD_REV; /* ZYX */
break;
case 4:
/* check the middle channels, the 1st and 4th channel can be NONE */
if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
- return V_0280A0_SWAP_STD; /* XYZW */
+ return V_028C70_SWAP_STD; /* XYZW */
} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
- return V_0280A0_SWAP_STD_REV; /* WZYX */
+ return V_028C70_SWAP_STD_REV; /* WZYX */
} else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
- return V_0280A0_SWAP_ALT; /* ZYXW */
+ return V_028C70_SWAP_ALT; /* ZYXW */
} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
/* YZWX */
if (desc->is_array)
- return V_0280A0_SWAP_ALT_REV;
+ return V_028C70_SWAP_ALT_REV;
else
- return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV);
+ return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
}
break;
}
@@ -1323,3 +1322,88 @@
.compatibleHandleTypes = compat_flags,
};
}
+
+/* DCC channel type categories within which formats can be reinterpreted
+ * while keeping the same DCC encoding. The swizzle must also match. */
+enum dcc_channel_type {
+ dcc_channel_float32,
+ dcc_channel_uint32,
+ dcc_channel_sint32,
+ dcc_channel_float16,
+ dcc_channel_uint16,
+ dcc_channel_sint16,
+ dcc_channel_uint_10_10_10_2,
+ dcc_channel_uint8,
+ dcc_channel_sint8,
+ dcc_channel_incompatible,
+};
+
+/* Return the type of DCC encoding. */
+static enum dcc_channel_type
+radv_get_dcc_channel_type(const struct vk_format_description *desc)
+{
+ int i;
+
+ /* Find the first non-void channel. */
+ for (i = 0; i < desc->nr_channels; i++)
+ if (desc->channel[i].type != VK_FORMAT_TYPE_VOID)
+ break;
+ if (i == desc->nr_channels)
+ return dcc_channel_incompatible;
+
+ switch (desc->channel[i].size) {
+ case 32:
+ if (desc->channel[i].type == VK_FORMAT_TYPE_FLOAT)
+ return dcc_channel_float32;
+ if (desc->channel[i].type == VK_FORMAT_TYPE_UNSIGNED)
+ return dcc_channel_uint32;
+ return dcc_channel_sint32;
+ case 16:
+ if (desc->channel[i].type == VK_FORMAT_TYPE_FLOAT)
+ return dcc_channel_float16;
+ if (desc->channel[i].type == VK_FORMAT_TYPE_UNSIGNED)
+ return dcc_channel_uint16;
+ return dcc_channel_sint16;
+ case 10:
+ return dcc_channel_uint_10_10_10_2;
+ case 8:
+ if (desc->channel[i].type == VK_FORMAT_TYPE_UNSIGNED)
+ return dcc_channel_uint8;
+ return dcc_channel_sint8;
+ default:
+ return dcc_channel_incompatible;
+ }
+}
+
+/* Return if it's allowed to reinterpret one format as another with DCC enabled. */
+bool radv_dcc_formats_compatible(VkFormat format1,
+ VkFormat format2)
+{
+ const struct vk_format_description *desc1, *desc2;
+ enum dcc_channel_type type1, type2;
+ int i;
+
+ if (format1 == format2)
+ return true;
+
+ desc1 = vk_format_description(format1);
+ desc2 = vk_format_description(format2);
+
+ if (desc1->nr_channels != desc2->nr_channels)
+ return false;
+
+ /* Swizzles must be the same. */
+ for (i = 0; i < desc1->nr_channels; i++)
+ if (desc1->swizzle[i] <= VK_SWIZZLE_W &&
+ desc2->swizzle[i] <= VK_SWIZZLE_W &&
+ desc1->swizzle[i] != desc2->swizzle[i])
+ return false;
+
+ type1 = radv_get_dcc_channel_type(desc1);
+ type2 = radv_get_dcc_channel_type(desc2);
+
+ return type1 != dcc_channel_incompatible &&
+ type2 != dcc_channel_incompatible &&
+ type1 == type2;
+}
+
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_image.c mesa-17.3.3/src/amd/vulkan/radv_image.c
--- mesa-17.2.4/src/amd/vulkan/radv_image.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_image.c 2018-01-18 21:30:28.000000000 +0000
@@ -25,6 +25,7 @@
* IN THE SOFTWARE.
*/
+#include "radv_debug.h"
#include "radv_private.h"
#include "vk_format.h"
#include "vk_util.h"
@@ -108,28 +109,59 @@
if (is_depth) {
surface->flags |= RADEON_SURF_ZBUFFER;
+ if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+ !(pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR)) &&
+ pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR &&
+ pCreateInfo->mipLevels <= 1 &&
+ device->physical_device->rad_info.chip_class >= VI &&
+ ((pCreateInfo->format == VK_FORMAT_D32_SFLOAT ||
+ pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT) ||
+ (device->physical_device->rad_info.chip_class >= GFX9 &&
+ pCreateInfo->format == VK_FORMAT_D16_UNORM)))
+ surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
}
if (is_stencil)
surface->flags |= RADEON_SURF_SBUFFER;
- surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
+ bool dcc_compatible_formats = radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
+ if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
+ const struct VkImageFormatListCreateInfoKHR *format_list =
+ (const struct VkImageFormatListCreateInfoKHR *)
+ vk_find_struct_const(pCreateInfo->pNext,
+ IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
+
+ /* We have to ignore the existence of the list if viewFormatCount = 0 */
+ if (format_list && format_list->viewFormatCount) {
+ /* compatibility is transitive, so we only need to check
+ * one format with everything else. */
+ for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
+ if (!radv_dcc_formats_compatible(pCreateInfo->format,
+ format_list->pViewFormats[i]))
+ dcc_compatible_formats = false;
+ }
+ } else {
+ dcc_compatible_formats = false;
+ }
+ }
+
if ((pCreateInfo->usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_STORAGE_BIT)) ||
- (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
+ (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) ||
+ !dcc_compatible_formats ||
(pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
device->physical_device->rad_info.chip_class < VI ||
- create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) ||
- !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable))
+ create_info->scanout || (device->instance->debug_flags & RADV_DEBUG_NO_DCC))
surface->flags |= RADEON_SURF_DISABLE_DCC;
if (create_info->scanout)
surface->flags |= RADEON_SURF_SCANOUT;
return 0;
}
-#define ATI_VENDOR_ID 0x1002
+
static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
{
return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
@@ -172,7 +204,7 @@
{
const struct vk_format_description *desc;
unsigned stride;
- uint64_t gpu_address = device->ws->buffer_get_va(buffer->bo);
+ uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
uint64_t va = gpu_address + buffer->offset;
unsigned num_format, data_format;
int first_non_void;
@@ -209,7 +241,7 @@
unsigned block_width, bool is_stencil,
uint32_t *state)
{
- uint64_t gpu_address = image->bo ? device->ws->buffer_get_va(image->bo) + image->offset : 0;
+ uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
uint64_t va = gpu_address;
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
uint64_t meta_va = 0;
@@ -222,22 +254,27 @@
va += base_level_info->offset;
state[0] = va >> 8;
- if (chip_class < GFX9)
- state[0] |= image->surface.u.legacy.tile_swizzle;
+ if (chip_class >= GFX9 ||
+ base_level_info->mode == RADEON_SURF_MODE_2D)
+ state[0] |= image->surface.tile_swizzle;
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
if (chip_class >= VI) {
state[6] &= C_008F28_COMPRESSION_EN;
state[7] = 0;
- if (image->surface.dcc_size && first_level < image->surface.num_dcc_levels) {
+ if (radv_vi_dcc_enabled(image, first_level)) {
meta_va = gpu_address + image->dcc_offset;
if (chip_class <= VI)
meta_va += base_level_info->dcc_offset;
+ } else if(image->tc_compatible_htile && image->surface.htile_size) {
+ meta_va = gpu_address + image->htile_offset;
+ }
+
+ if (meta_va) {
state[6] |= S_008F28_COMPRESSION_EN(1);
state[7] = meta_va >> 8;
- if (chip_class < GFX9)
- state[7] |= image->surface.u.legacy.tile_swizzle;
+ state[7] |= image->surface.tile_swizzle;
}
}
@@ -307,7 +344,7 @@
}
}
-static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
+static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
{
unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
@@ -412,7 +449,7 @@
state[7] = 0;
if (device->physical_device->rad_info.chip_class >= GFX9) {
- unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
+ unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
/* Depth is the the last accessible layer on Gfx9.
* The hw doesn't need to know the total number of layers.
@@ -450,7 +487,7 @@
/* Initialize the sampler view for FMASK. */
if (image->fmask.size) {
uint32_t fmask_format, num_format;
- uint64_t gpu_address = device->ws->buffer_get_va(image->bo);
+ uint64_t gpu_address = radv_buffer_get_va(image->bo);
uint64_t va;
va = gpu_address + image->offset + image->fmask.offset;
@@ -489,8 +526,7 @@
}
fmask_state[0] = va >> 8;
- if (device->physical_device->rad_info.chip_class < GFX9)
- fmask_state[0] |= image->surface.u.legacy.tile_swizzle;
+ fmask_state[0] |= image->fmask.tile_swizzle;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
S_008F14_DATA_FORMAT_GFX6(fmask_format) |
S_008F14_NUM_FORMAT_GFX6(num_format);
@@ -623,6 +659,9 @@
info.samples = 1;
fmask.flags = image->surface.flags | RADEON_SURF_FMASK;
+ if (!image->shareable)
+ info.surf_index = &device->fmask_mrt_offset_counter;
+
/* Force 2D tiling if it wasn't set. This may occur when creating
* FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
* destination buffer must have an FMASK too. */
@@ -651,8 +690,11 @@
out->tile_mode_index = fmask.u.legacy.tiling_index[0];
out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
out->bank_height = fmask.u.legacy.bankh;
+ out->tile_swizzle = fmask.tile_swizzle;
out->alignment = MAX2(256, fmask.surf_alignment);
out->size = fmask.surf_size;
+
+ assert(!out->tile_swizzle || !image->shareable);
}
static void
@@ -739,8 +781,7 @@
}
static void
-radv_image_alloc_dcc(struct radv_device *device,
- struct radv_image *image)
+radv_image_alloc_dcc(struct radv_image *image)
{
image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
/* + 16 for storing the clear values + dcc pred */
@@ -751,14 +792,8 @@
}
static void
-radv_image_alloc_htile(struct radv_device *device,
- struct radv_image *image)
+radv_image_alloc_htile(struct radv_image *image)
{
- if ((device->debug_flags & RADV_DEBUG_NO_HIZ) || image->info.levels > 1) {
- image->surface.htile_size = 0;
- return;
- }
-
image->htile_offset = align64(image->size, image->surface.htile_alignment);
/* + 8 for storing the clear values */
@@ -767,6 +802,49 @@
image->alignment = align64(image->alignment, image->surface.htile_alignment);
}
+static inline bool
+radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
+{
+ return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
+ (image->exclusive || image->queue_family_mask == 1);
+}
+
+static inline bool
+radv_image_can_enable_dcc(struct radv_image *image)
+{
+ return radv_image_can_enable_dcc_or_cmask(image) &&
+ image->surface.dcc_size;
+}
+
+static inline bool
+radv_image_can_enable_cmask(struct radv_image *image)
+{
+ if (image->surface.bpe > 8 && image->info.samples == 1) {
+ /* Do not enable CMASK for non-MSAA images (fast color clear)
+ * because 128 bit formats are not supported, but FMASK might
+ * still be used.
+ */
+ return false;
+ }
+
+ return radv_image_can_enable_dcc_or_cmask(image) &&
+ image->info.levels == 1 &&
+ image->info.depth == 1 &&
+ !image->surface.is_linear;
+}
+
+static inline bool
+radv_image_can_enable_fmask(struct radv_image *image)
+{
+ return image->info.samples > 1 && vk_format_is_color(image->vk_format);
+}
+
+static inline bool
+radv_image_can_enable_htile(struct radv_image *image)
+{
+ return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
+}
+
VkResult
radv_image_create(VkDevice _device,
const struct radv_image_create_info *create_info,
@@ -776,7 +854,6 @@
RADV_FROM_HANDLE(radv_device, device, _device);
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
struct radv_image *image = NULL;
- bool can_cmask_dcc = false;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
radv_assert(pCreateInfo->mipLevels > 0);
@@ -817,7 +894,7 @@
image->shareable = vk_find_struct_const(pCreateInfo->pNext,
EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
- image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter) - 1;
+ image->info.surf_index = &device->image_mrt_offset_counter;
}
radv_init_surface(device, &image->surface, create_info);
@@ -827,26 +904,29 @@
image->size = image->surface.surf_size;
image->alignment = image->surface.surf_alignment;
- if (image->exclusive || image->queue_family_mask == 1)
- can_cmask_dcc = true;
-
- if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
- image->surface.dcc_size && can_cmask_dcc)
- radv_image_alloc_dcc(device, image);
- else
+ /* Try to enable DCC first. */
+ if (radv_image_can_enable_dcc(image)) {
+ radv_image_alloc_dcc(image);
+ } else {
+ /* When DCC cannot be enabled, try CMASK. */
image->surface.dcc_size = 0;
+ if (radv_image_can_enable_cmask(image)) {
+ radv_image_alloc_cmask(device, image);
+ }
+ }
- if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
- pCreateInfo->mipLevels == 1 &&
- !image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc &&
- !image->surface.is_linear)
- radv_image_alloc_cmask(device, image);
-
- if (image->info.samples > 1 && vk_format_is_color(pCreateInfo->format)) {
+ /* Try to enable FMASK for multisampled images. */
+ if (radv_image_can_enable_fmask(image)) {
radv_image_alloc_fmask(device, image);
- } else if (vk_format_is_depth(pCreateInfo->format)) {
-
- radv_image_alloc_htile(device, image);
+ } else {
+ /* Otherwise, try to enable HTILE for depth surfaces. */
+ if (radv_image_can_enable_htile(image) &&
+ !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
+ radv_image_alloc_htile(image);
+ image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
+ } else {
+ image->surface.htile_size = 0;
+ }
}
if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
@@ -986,6 +1066,9 @@
VkImageLayout layout,
unsigned queue_mask)
{
+ if (image->surface.htile_size && image->tc_compatible_htile)
+ return layout != VK_IMAGE_LAYOUT_GENERAL;
+
return image->surface.htile_size &&
(layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
@@ -996,6 +1079,9 @@
VkImageLayout layout,
unsigned queue_mask)
{
+ if (image->surface.htile_size && image->tc_compatible_htile)
+ return layout != VK_IMAGE_LAYOUT_GENERAL;
+
return image->surface.htile_size &&
(layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
@@ -1120,8 +1206,7 @@
void radv_buffer_view_init(struct radv_buffer_view *view,
struct radv_device *device,
- const VkBufferViewCreateInfo* pCreateInfo,
- struct radv_cmd_buffer *cmd_buffer)
+ const VkBufferViewCreateInfo* pCreateInfo)
{
RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
@@ -1148,7 +1233,7 @@
if (!view)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- radv_buffer_view_init(view, device, pCreateInfo, NULL);
+ radv_buffer_view_init(view, device, pCreateInfo);
*pView = radv_buffer_view_to_handle(view);
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_blit2d.c mesa-17.3.3/src/amd/vulkan/radv_meta_blit2d.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_blit2d.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_blit2d.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,24 +28,9 @@
#include "nir/nir_builder.h"
#include "vk_format.h"
-enum blit2d_dst_type {
- /* We can bind this destination as a "normal" render target and render
- * to it just like you would anywhere else.
- */
- BLIT2D_DST_TYPE_NORMAL,
-
- /* The destination has a 3-channel RGB format. Since we can't render to
- * non-power-of-two textures, we have to bind it as a red texture and
- * select the correct component for the given red pixel in the shader.
- */
- BLIT2D_DST_TYPE_RGB,
-
- BLIT2D_NUM_DST_TYPES,
-};
-
-
enum blit2d_src_type {
BLIT2D_SRC_TYPE_IMAGE,
+ BLIT2D_SRC_TYPE_IMAGE_3D,
BLIT2D_SRC_TYPE_BUFFER,
BLIT2D_NUM_SRC_TYPES,
};
@@ -57,6 +42,8 @@
VkImageAspectFlagBits aspects)
{
VkFormat format;
+ VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
+ radv_meta_get_view_type(surf->image);
if (depth_format)
format = depth_format;
@@ -67,7 +54,7 @@
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(surf->image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .viewType = view_type,
.format = format,
.subresourceRange = {
.aspectMask = aspects,
@@ -98,7 +85,7 @@
.format = format,
.offset = src->offset,
.range = VK_WHOLE_SIZE,
- }, cmd_buffer);
+ });
}
@@ -142,6 +129,12 @@
} else {
create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);
+ if (src_type == BLIT2D_SRC_TYPE_IMAGE_3D)
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.blit2d.p_layouts[src_type],
+ VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
+ &src_img->layer);
+
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
device->meta_state.blit2d.p_layouts[src_type],
0, /* set */
@@ -196,24 +189,14 @@
}
static void
-blit2d_unbind_dst(struct radv_cmd_buffer *cmd_buffer,
- struct blit2d_dst_temps *tmp)
-{
- VkDevice vk_device = radv_device_to_handle(cmd_buffer->device);
- radv_DestroyFramebuffer(vk_device, tmp->fb, &cmd_buffer->pool->alloc);
-}
-
-static void
bind_pipeline(struct radv_cmd_buffer *cmd_buffer,
enum blit2d_src_type src_type, unsigned fs_key)
{
VkPipeline pipeline =
cmd_buffer->device->meta_state.blit2d.pipelines[src_type][fs_key];
- if (cmd_buffer->state.pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- }
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
static void
@@ -223,10 +206,8 @@
VkPipeline pipeline =
cmd_buffer->device->meta_state.blit2d.depth_only_pipeline[src_type];
- if (cmd_buffer->state.pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- }
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
static void
@@ -236,10 +217,8 @@
VkPipeline pipeline =
cmd_buffer->device->meta_state.blit2d.stencil_only_pipeline[src_type];
- if (cmd_buffer->state.pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- }
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
static void
@@ -299,10 +278,11 @@
bind_pipeline(cmd_buffer, src_type, fs_key);
} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit2d.depth_only_rp,
+ .renderPass = device->meta_state.blit2d.depth_only_rp[ds_layout],
.framebuffer = dst_temps.fb,
.renderArea = {
.offset = { rects[r].dst_x, rects[r].dst_y, },
@@ -316,10 +296,11 @@
bind_depth_pipeline(cmd_buffer, src_type);
} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit2d.stencil_only_rp,
+ .renderPass = device->meta_state.blit2d.stencil_only_rp[ds_layout],
.framebuffer = dst_temps.fb,
.renderArea = {
.offset = { rects[r].dst_x, rects[r].dst_y, },
@@ -356,7 +337,9 @@
/* At the point where we emit the draw call, all data from the
* descriptor sets, etc. has been used. We are free to delete it.
*/
- blit2d_unbind_dst(cmd_buffer, &dst_temps);
+ radv_DestroyFramebuffer(radv_device_to_handle(device),
+ dst_temps.fb,
+ &cmd_buffer->pool->alloc);
}
}
}
@@ -369,8 +352,10 @@
unsigned num_rects,
struct radv_meta_blit2d_rect *rects)
{
+ bool use_3d = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+ (src_img && src_img->image->type == VK_IMAGE_TYPE_3D);
enum blit2d_src_type src_type = src_buf ? BLIT2D_SRC_TYPE_BUFFER :
- BLIT2D_SRC_TYPE_IMAGE;
+ use_3d ? BLIT2D_SRC_TYPE_IMAGE_3D : BLIT2D_SRC_TYPE_IMAGE;
radv_meta_blit2d_normal_dst(cmd_buffer, src_img, src_buf, dst,
num_rects, rects, src_type);
}
@@ -435,29 +420,46 @@
typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *,
struct radv_device *,
- nir_ssa_def *);
+ nir_ssa_def *, bool);
static nir_ssa_def *
build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device,
- nir_ssa_def *tex_pos)
+ nir_ssa_def *tex_pos, bool is_3d)
{
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *sampler_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_UINT);
+ glsl_sampler_type(dim, false, false, GLSL_TYPE_UINT);
nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
+ nir_ssa_def *tex_pos_3d = NULL;
+ if (is_3d) {
+ nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
+ nir_intrinsic_set_base(layer, 16);
+ nir_intrinsic_set_range(layer, 4);
+ layer->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+ layer->num_components = 1;
+ nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
+ nir_builder_instr_insert(b, &layer->instr);
+
+ nir_ssa_def *chans[3];
+ chans[0] = nir_channel(b, tex_pos, 0);
+ chans[1] = nir_channel(b, tex_pos, 1);
+ chans[2] = &layer->dest.ssa;
+ tex_pos_3d = nir_vec(b, chans, 3);
+ }
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
- tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ tex->sampler_dim = dim;
tex->op = nir_texop_txf;
tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(tex_pos);
+ tex->src[0].src = nir_src_for_ssa(is_3d ? tex_pos_3d : tex_pos);
tex->src[1].src_type = nir_tex_src_lod;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
tex->dest_type = nir_type_uint;
tex->is_array = false;
- tex->coord_components = 2;
+ tex->coord_components = is_3d ? 3 : 2;
tex->texture = nir_deref_var_create(tex, sampler);
tex->sampler = NULL;
@@ -470,7 +472,7 @@
static nir_ssa_def *
build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
- nir_ssa_def *tex_pos)
+ nir_ssa_def *tex_pos, bool is_3d)
{
const struct glsl_type *sampler_type =
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
@@ -518,7 +520,7 @@
static nir_shader *
build_nir_copy_fragment_shader(struct radv_device *device,
- texel_fetch_build_func txf_func, const char* name)
+ texel_fetch_build_func txf_func, const char* name, bool is_3d)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
@@ -539,7 +541,7 @@
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
- nir_ssa_def *color = txf_func(&b, device, tex_pos);
+ nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d);
nir_store_var(&b, color_out, color, 0xf);
return b.shader;
@@ -547,7 +549,7 @@
static nir_shader *
build_nir_copy_fragment_shader_depth(struct radv_device *device,
- texel_fetch_build_func txf_func, const char* name)
+ texel_fetch_build_func txf_func, const char* name, bool is_3d)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
@@ -568,7 +570,7 @@
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
- nir_ssa_def *color = txf_func(&b, device, tex_pos);
+ nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d);
nir_store_var(&b, color_out, color, 0x1);
return b.shader;
@@ -576,7 +578,7 @@
static nir_shader *
build_nir_copy_fragment_shader_stencil(struct radv_device *device,
- texel_fetch_build_func txf_func, const char* name)
+ texel_fetch_build_func txf_func, const char* name, bool is_3d)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
@@ -597,7 +599,7 @@
unsigned swiz[4] = { 0, 1 };
nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
- nir_ssa_def *color = txf_func(&b, device, tex_pos);
+ nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d);
nir_store_var(&b, color_out, color, 0x1);
return b.shader;
@@ -606,48 +608,41 @@
void
radv_device_finish_meta_blit2d_state(struct radv_device *device)
{
+ struct radv_meta_state *state = &device->meta_state;
+
for(unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- if (device->meta_state.blit2d.render_passes[j]) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- device->meta_state.blit2d.render_passes[j],
- &device->meta_state.alloc);
- }
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->blit2d.render_passes[j],
+ &state->alloc);
}
- radv_DestroyRenderPass(radv_device_to_handle(device),
- device->meta_state.blit2d.depth_only_rp,
- &device->meta_state.alloc);
- radv_DestroyRenderPass(radv_device_to_handle(device),
- device->meta_state.blit2d.stencil_only_rp,
- &device->meta_state.alloc);
+ for (enum radv_blit_ds_layout j = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; j < RADV_BLIT_DS_LAYOUT_COUNT; j++) {
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->blit2d.depth_only_rp[j], &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->blit2d.stencil_only_rp[j], &state->alloc);
+ }
for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
- if (device->meta_state.blit2d.p_layouts[src]) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.blit2d.p_layouts[src],
- &device->meta_state.alloc);
- }
-
- if (device->meta_state.blit2d.ds_layouts[src]) {
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.blit2d.ds_layouts[src],
- &device->meta_state.alloc);
- }
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->blit2d.p_layouts[src],
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->blit2d.ds_layouts[src],
+ &state->alloc);
for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- if (device->meta_state.blit2d.pipelines[src][j]) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit2d.pipelines[src][j],
- &device->meta_state.alloc);
- }
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit2d.pipelines[src][j],
+ &state->alloc);
}
radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit2d.depth_only_pipeline[src],
- &device->meta_state.alloc);
+ state->blit2d.depth_only_pipeline[src],
+ &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit2d.stencil_only_pipeline[src],
- &device->meta_state.alloc);
+ state->blit2d.stencil_only_pipeline[src],
+ &state->alloc);
}
}
@@ -666,6 +661,10 @@
src_func = build_nir_texel_fetch;
name = "meta_blit2d_image_fs";
break;
+ case BLIT2D_SRC_TYPE_IMAGE_3D:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit3d_image_fs";
+ break;
case BLIT2D_SRC_TYPE_BUFFER:
src_func = build_nir_buffer_fetch;
name = "meta_blit2d_buffer_fs";
@@ -679,7 +678,7 @@
struct radv_shader_module fs = { .nir = NULL };
- fs.nir = build_nir_copy_fragment_shader(device, src_func, name);
+ fs.nir = build_nir_copy_fragment_shader(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D);
vi_create_info = &normal_vi_create_info;
struct radv_shader_module vs = {
@@ -825,6 +824,10 @@
src_func = build_nir_texel_fetch;
name = "meta_blit2d_depth_image_fs";
break;
+ case BLIT2D_SRC_TYPE_IMAGE_3D:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit3d_depth_image_fs";
+ break;
case BLIT2D_SRC_TYPE_BUFFER:
src_func = build_nir_buffer_fetch;
name = "meta_blit2d_depth_buffer_fs";
@@ -837,7 +840,7 @@
const VkPipelineVertexInputStateCreateInfo *vi_create_info;
struct radv_shader_module fs = { .nir = NULL };
- fs.nir = build_nir_copy_fragment_shader_depth(device, src_func, name);
+ fs.nir = build_nir_copy_fragment_shader_depth(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D);
vi_create_info = &normal_vi_create_info;
struct radv_shader_module vs = {
@@ -860,34 +863,37 @@
},
};
- if (!device->meta_state.blit2d.depth_only_rp) {
- result = radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+ if (!device->meta_state.blit2d.depth_only_rp[ds_layout]) {
+ VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &(VkAttachmentDescription) {
- .format = VK_FORMAT_D32_SFLOAT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- .finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- },
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, &device->meta_state.blit2d.depth_only_rp);
+ .format = VK_FORMAT_D32_SFLOAT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit2d.depth_only_rp[ds_layout]);
+ }
}
const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
@@ -946,7 +952,7 @@
},
.flags = 0,
.layout = device->meta_state.blit2d.p_layouts[src_type],
- .renderPass = device->meta_state.blit2d.depth_only_rp,
+ .renderPass = device->meta_state.blit2d.depth_only_rp[0],
.subpass = 0,
};
@@ -980,6 +986,10 @@
src_func = build_nir_texel_fetch;
name = "meta_blit2d_stencil_image_fs";
break;
+ case BLIT2D_SRC_TYPE_IMAGE_3D:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit3d_stencil_image_fs";
+ break;
case BLIT2D_SRC_TYPE_BUFFER:
src_func = build_nir_buffer_fetch;
name = "meta_blit2d_stencil_buffer_fs";
@@ -992,7 +1002,7 @@
const VkPipelineVertexInputStateCreateInfo *vi_create_info;
struct radv_shader_module fs = { .nir = NULL };
- fs.nir = build_nir_copy_fragment_shader_stencil(device, src_func, name);
+ fs.nir = build_nir_copy_fragment_shader_stencil(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D);
vi_create_info = &normal_vi_create_info;
struct radv_shader_module vs = {
@@ -1015,34 +1025,37 @@
},
};
- if (!device->meta_state.blit2d.stencil_only_rp) {
- result = radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+ if (!device->meta_state.blit2d.stencil_only_rp[ds_layout]) {
+ VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &(VkAttachmentDescription) {
- .format = VK_FORMAT_S8_UINT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- .finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- },
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, &device->meta_state.blit2d.stencil_only_rp);
+ .format = VK_FORMAT_S8_UINT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit2d.stencil_only_rp[ds_layout]);
+ }
}
const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
@@ -1117,7 +1130,7 @@
},
.flags = 0,
.layout = device->meta_state.blit2d.p_layouts[src_type],
- .renderPass = device->meta_state.blit2d.stencil_only_rp,
+ .renderPass = device->meta_state.blit2d.stencil_only_rp[0],
.subpass = 0,
};
@@ -1157,8 +1170,7 @@
radv_device_init_meta_blit2d_state(struct radv_device *device)
{
VkResult result;
-
- zero(device->meta_state.blit2d);
+ bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
const VkPushConstantRange push_constant_ranges[] = {
{VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
@@ -1194,6 +1206,37 @@
if (result != VK_SUCCESS)
goto fail;
+ if (create_3d) {
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+ &(VkDescriptorSetLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL
+ },
+ }
+ }, &device->meta_state.alloc, &device->meta_state.blit2d.ds_layouts[BLIT2D_SRC_TYPE_IMAGE_3D]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+ &(VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.blit2d.ds_layouts[BLIT2D_SRC_TYPE_IMAGE_3D],
+ .pushConstantRangeCount = 2,
+ .pPushConstantRanges = push_constant_ranges,
+ },
+ &device->meta_state.alloc, &device->meta_state.blit2d.p_layouts[BLIT2D_SRC_TYPE_IMAGE_3D]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&(VkDescriptorSetLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
@@ -1226,6 +1269,8 @@
goto fail;
for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
+ if (src == BLIT2D_SRC_TYPE_IMAGE_3D && !create_3d)
+ continue;
for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
result = blit2d_init_color_pipeline(device, src, pipeline_formats[j]);
if (result != VK_SUCCESS)
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_blit.c mesa-17.3.3/src/amd/vulkan/radv_meta_blit.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_blit.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_blit.c 2018-01-18 21:30:28.000000000 +0000
@@ -265,12 +265,14 @@
meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
struct radv_image_view *src_iview,
+ VkImageLayout src_image_layout,
VkOffset3D src_offset_0,
VkOffset3D src_offset_1,
struct radv_image *dest_image,
struct radv_image_view *dest_iview,
- VkOffset3D dest_offset_0,
- VkOffset3D dest_offset_1,
+ VkImageLayout dest_image_layout,
+ VkOffset2D dest_offset_0,
+ VkOffset2D dest_offset_1,
VkRect2D dest_box,
VkFilter blit_filter)
{
@@ -351,11 +353,12 @@
}
break;
}
- case VK_IMAGE_ASPECT_DEPTH_BIT:
+ case VK_IMAGE_ASPECT_DEPTH_BIT: {
+ enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit.depth_only_rp,
+ .renderPass = device->meta_state.blit.depth_only_rp[ds_layout],
.framebuffer = fb,
.renderArea = {
.offset = { dest_box.offset.x, dest_box.offset.y },
@@ -378,11 +381,13 @@
unreachable(!"bad VkImageType");
}
break;
- case VK_IMAGE_ASPECT_STENCIL_BIT:
+ }
+ case VK_IMAGE_ASPECT_STENCIL_BIT: {
+ enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit.stencil_only_rp,
+ .renderPass = device->meta_state.blit.stencil_only_rp[ds_layout],
.framebuffer = fb,
.renderArea = {
.offset = { dest_box.offset.x, dest_box.offset.y },
@@ -405,14 +410,13 @@
unreachable(!"bad VkImageType");
}
break;
+ }
default:
unreachable(!"bad VkImageType");
}
- if (cmd_buffer->state.pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
- }
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
device->meta_state.blit.pipeline_layout,
@@ -512,26 +516,14 @@
assert(src_image->info.samples == 1);
assert(dest_image->info.samples == 1);
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
for (unsigned r = 0; r < regionCount; r++) {
const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
- struct radv_image_view src_iview;
- radv_image_view_init(&src_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = srcImage,
- .viewType = radv_meta_get_view_type(src_image),
- .format = src_image->vk_format,
- .subresourceRange = {
- .aspectMask = src_res->aspectMask,
- .baseMipLevel = src_res->mipLevel,
- .levelCount = 1,
- .baseArrayLayer = src_res->baseArrayLayer,
- .layerCount = 1
- },
- });
unsigned dst_start, dst_end;
if (dest_image->type == VK_IMAGE_TYPE_3D) {
@@ -578,18 +570,17 @@
dest_box.extent.width = abs(dst_x1 - dst_x0);
dest_box.extent.height = abs(dst_y1 - dst_y0);
- struct radv_image_view dest_iview;
const unsigned num_layers = dst_end - dst_start;
for (unsigned i = 0; i < num_layers; i++) {
- const VkOffset3D dest_offset_0 = {
+ struct radv_image_view dest_iview, src_iview;
+
+ const VkOffset2D dest_offset_0 = {
.x = dst_x0,
.y = dst_y0,
- .z = dst_start + i ,
};
- const VkOffset3D dest_offset_1 = {
+ const VkOffset2D dest_offset_1 = {
.x = dst_x1,
.y = dst_y1,
- .z = dst_start + i ,
};
VkOffset3D src_offset_0 = {
.x = src_x0,
@@ -601,9 +592,10 @@
.y = src_y1,
.z = src_start + i * src_z_step,
};
- const uint32_t dest_array_slice =
- radv_meta_get_iview_layer(dest_image, dst_res,
- &dest_offset_0);
+ const uint32_t dest_array_slice = dst_start + i;
+
+ /* 3D images have just 1 layer */
+ const uint32_t src_array_slice = src_image->type == VK_IMAGE_TYPE_3D ? 0 : src_start + i;
radv_image_view_init(&dest_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
@@ -619,10 +611,24 @@
.layerCount = 1
},
});
+ radv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = srcImage,
+ .viewType = radv_meta_get_view_type(src_image),
+ .format = src_image->vk_format,
+ .subresourceRange = {
+ .aspectMask = src_res->aspectMask,
+ .baseMipLevel = src_res->mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_array_slice,
+ .layerCount = 1
+ },
+ });
meta_emit_blit(cmd_buffer,
- src_image, &src_iview,
+ src_image, &src_iview, srcImageLayout,
src_offset_0, src_offset_1,
- dest_image, &dest_iview,
+ dest_image, &dest_iview, destImageLayout,
dest_offset_0, dest_offset_1,
dest_box,
filter);
@@ -635,65 +641,52 @@
void
radv_device_finish_meta_blit_state(struct radv_device *device)
{
- for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
- if (device->meta_state.blit.render_pass[i])
- radv_DestroyRenderPass(radv_device_to_handle(device),
- device->meta_state.blit.render_pass[i],
- &device->meta_state.alloc);
- if (device->meta_state.blit.pipeline_1d_src[i])
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.pipeline_1d_src[i],
- &device->meta_state.alloc);
- if (device->meta_state.blit.pipeline_2d_src[i])
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.pipeline_2d_src[i],
- &device->meta_state.alloc);
- if (device->meta_state.blit.pipeline_3d_src[i])
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.pipeline_3d_src[i],
- &device->meta_state.alloc);
- }
+ struct radv_meta_state *state = &device->meta_state;
- if (device->meta_state.blit.depth_only_rp)
+ for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
radv_DestroyRenderPass(radv_device_to_handle(device),
- device->meta_state.blit.depth_only_rp,
- &device->meta_state.alloc);
- if (device->meta_state.blit.depth_only_1d_pipeline)
+ state->blit.render_pass[i],
+ &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.depth_only_1d_pipeline,
- &device->meta_state.alloc);
- if (device->meta_state.blit.depth_only_2d_pipeline)
+ state->blit.pipeline_1d_src[i],
+ &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.depth_only_2d_pipeline,
- &device->meta_state.alloc);
- if (device->meta_state.blit.depth_only_3d_pipeline)
+ state->blit.pipeline_2d_src[i],
+ &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.depth_only_3d_pipeline,
- &device->meta_state.alloc);
- if (device->meta_state.blit.stencil_only_rp)
+ state->blit.pipeline_3d_src[i],
+ &state->alloc);
+ }
+
+ for (enum radv_blit_ds_layout i = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; i < RADV_BLIT_DS_LAYOUT_COUNT; i++) {
radv_DestroyRenderPass(radv_device_to_handle(device),
- device->meta_state.blit.stencil_only_rp,
- &device->meta_state.alloc);
- if (device->meta_state.blit.stencil_only_1d_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.stencil_only_1d_pipeline,
- &device->meta_state.alloc);
- if (device->meta_state.blit.stencil_only_2d_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.stencil_only_2d_pipeline,
- &device->meta_state.alloc);
- if (device->meta_state.blit.stencil_only_3d_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.blit.stencil_only_3d_pipeline,
- &device->meta_state.alloc);
- if (device->meta_state.blit.pipeline_layout)
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.blit.pipeline_layout,
- &device->meta_state.alloc);
- if (device->meta_state.blit.ds_layout)
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.blit.ds_layout,
- &device->meta_state.alloc);
+ state->blit.depth_only_rp[i], &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->blit.stencil_only_rp[i], &state->alloc);
+ }
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit.depth_only_1d_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit.depth_only_2d_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit.depth_only_3d_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit.stencil_only_1d_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit.stencil_only_2d_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit.stencil_only_3d_pipeline,
+ &state->alloc);
+
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->blit.pipeline_layout, &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->blit.ds_layout, &state->alloc);
}
static VkFormat pipeline_formats[] = {
@@ -883,35 +876,38 @@
fs_2d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_2D);
fs_3d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_3D);
- result = radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+ VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &(VkAttachmentDescription) {
- .format = VK_FORMAT_D32_SFLOAT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
+ .format = VK_FORMAT_D32_SFLOAT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
},
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp);
- if (result != VK_SUCCESS)
- goto fail;
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp[ds_layout]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
VkPipelineVertexInputStateCreateInfo vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -991,7 +987,7 @@
},
.flags = 0,
.layout = device->meta_state.blit.pipeline_layout,
- .renderPass = device->meta_state.blit.depth_only_rp,
+ .renderPass = device->meta_state.blit.depth_only_rp[0],
.subpass = 0,
};
@@ -1041,33 +1037,36 @@
fs_2d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_2D);
fs_3d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_3D);
- result = radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+ VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &(VkAttachmentDescription) {
- .format = VK_FORMAT_S8_UINT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
+ .format = VK_FORMAT_S8_UINT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
.subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
},
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp);
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp[ds_layout]);
+ }
if (result != VK_SUCCESS)
goto fail;
@@ -1151,7 +1150,6 @@
},
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
},
-
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = 6,
@@ -1166,7 +1164,7 @@
},
.flags = 0,
.layout = device->meta_state.blit.pipeline_layout,
- .renderPass = device->meta_state.blit.stencil_only_rp,
+ .renderPass = device->meta_state.blit.stencil_only_rp[0],
.subpass = 0,
};
@@ -1198,6 +1196,7 @@
if (result != VK_SUCCESS)
goto fail;
+
fail:
ralloc_free(fs_1d.nir);
ralloc_free(fs_2d.nir);
@@ -1210,7 +1209,6 @@
{
VkResult result;
struct radv_shader_module vs = {0};
- zero(device->meta_state.blit);
VkDescriptorSetLayoutCreateInfo ds_layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_buffer.c mesa-17.3.3/src/amd/vulkan/radv_meta_buffer.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_buffer.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_buffer.c 2018-01-18 21:30:28.000000000 +0000
@@ -121,8 +121,6 @@
struct radv_shader_module fill_cs = { .nir = NULL };
struct radv_shader_module copy_cs = { .nir = NULL };
- zero(device->meta_state.buffer);
-
fill_cs.nir = build_buffer_fill_shader(device);
copy_cs.nir = build_buffer_copy_shader(device);
@@ -263,35 +261,22 @@
void radv_device_finish_meta_buffer_state(struct radv_device *device)
{
- if (device->meta_state.buffer.copy_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.buffer.copy_pipeline,
- &device->meta_state.alloc);
-
- if (device->meta_state.buffer.fill_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.buffer.fill_pipeline,
- &device->meta_state.alloc);
-
- if (device->meta_state.buffer.copy_p_layout)
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.buffer.copy_p_layout,
- &device->meta_state.alloc);
-
- if (device->meta_state.buffer.fill_p_layout)
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.buffer.fill_p_layout,
- &device->meta_state.alloc);
-
- if (device->meta_state.buffer.copy_ds_layout)
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.buffer.copy_ds_layout,
- &device->meta_state.alloc);
-
- if (device->meta_state.buffer.fill_ds_layout)
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.buffer.fill_ds_layout,
- &device->meta_state.alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->buffer.copy_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->buffer.fill_pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->buffer.copy_p_layout, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->buffer.fill_p_layout, &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->buffer.copy_ds_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->buffer.fill_ds_layout,
+ &state->alloc);
}
static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
@@ -300,9 +285,12 @@
{
struct radv_device *device = cmd_buffer->device;
uint64_t block_count = round_up_u64(size, 1024);
- struct radv_meta_saved_compute_state saved_state;
+ struct radv_meta_saved_state saved_state;
- radv_meta_save_compute(&saved_state, cmd_buffer, 4);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
struct radv_buffer dst_buffer = {
.bo = bo,
@@ -340,7 +328,7 @@
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
- radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
@@ -351,9 +339,11 @@
{
struct radv_device *device = cmd_buffer->device;
uint64_t block_count = round_up_u64(size, 1024);
- struct radv_meta_saved_compute_state saved_state;
+ struct radv_meta_saved_state saved_state;
- radv_meta_save_compute(&saved_state, cmd_buffer, 0);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_DESCRIPTORS);
struct radv_buffer dst_buffer = {
.bo = dst_bo,
@@ -404,7 +394,7 @@
radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
- radv_meta_restore_compute(&saved_state, cmd_buffer, 0);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
@@ -418,7 +408,7 @@
if (size >= 4096)
fill_buffer_shader(cmd_buffer, bo, offset, size, value);
else if (size) {
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(bo);
+ uint64_t va = radv_buffer_get_va(bo);
va += offset;
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, bo, 8);
si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
@@ -436,8 +426,8 @@
copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
src_offset, dst_offset, size);
else if (size) {
- uint64_t src_va = cmd_buffer->device->ws->buffer_get_va(src_bo);
- uint64_t dst_va = cmd_buffer->device->ws->buffer_get_va(dst_bo);
+ uint64_t src_va = radv_buffer_get_va(src_bo);
+ uint64_t dst_va = radv_buffer_get_va(dst_bo);
src_va += src_offset;
dst_va += dst_offset;
@@ -497,7 +487,7 @@
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
uint64_t words = dataSize / 4;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
+ uint64_t va = radv_buffer_get_va(dst_buffer->bo);
va += dstOffset + dst_buffer->offset;
assert(!(dataSize & 3));
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_bufimage.c mesa-17.3.3/src/amd/vulkan/radv_meta_bufimage.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_bufimage.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_bufimage.c 2018-01-18 21:30:28.000000000 +0000
@@ -29,11 +29,15 @@
* Compute queue: implementation also of buffer->image, image->image, and image clear.
*/
+/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
+ * for that.
+ */
static nir_shader *
-build_nir_itob_compute_shader(struct radv_device *dev)
+build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
{
nir_builder b;
- const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+ const struct glsl_type *sampler_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
@@ -42,7 +46,7 @@
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info.name = ralloc_strdup(b.shader, "meta_itob_cs");
+ b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
@@ -69,32 +73,31 @@
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(offset, 0);
- nir_intrinsic_set_range(offset, 12);
+ nir_intrinsic_set_range(offset, 16);
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- offset->num_components = 2;
- nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
+ offset->num_components = is_3d ? 3 : 2;
+ nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
nir_builder_instr_insert(&b, &offset->instr);
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(stride, 0);
- nir_intrinsic_set_range(stride, 12);
- stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+ nir_intrinsic_set_range(stride, 16);
+ stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
stride->num_components = 1;
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
nir_builder_instr_insert(&b, &stride->instr);
nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
-
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
- tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ tex->sampler_dim = dim;
tex->op = nir_texop_txf;
tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, 0x3));
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
tex->src[1].src_type = nir_tex_src_lod;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
tex->dest_type = nir_type_float;
tex->is_array = false;
- tex->coord_components = 2;
+ tex->coord_components = is_3d ? 3 : 2;
tex->texture = nir_deref_var_create(tex, input_img);
tex->sampler = NULL;
@@ -126,10 +129,11 @@
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
+ struct radv_shader_module cs_3d = { .nir = NULL };
- zero(device->meta_state.itob);
-
- cs.nir = build_nir_itob_compute_shader(device);
+ cs.nir = build_nir_itob_compute_shader(device, false);
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ cs_3d.nir = build_nir_itob_compute_shader(device, true);
/*
* two descriptors one for the image being sampled
@@ -170,7 +174,7 @@
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.itob.img_ds_layout,
.pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -204,47 +208,71 @@
if (result != VK_SUCCESS)
goto fail;
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = radv_shader_module_to_handle(&cs_3d),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage_3d,
+ .flags = 0,
+ .layout = device->meta_state.itob.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ 1, &vk_pipeline_info_3d, NULL,
+ &device->meta_state.itob.pipeline_3d);
+ if (result != VK_SUCCESS)
+ goto fail;
+ ralloc_free(cs_3d.nir);
+ }
ralloc_free(cs.nir);
+
return VK_SUCCESS;
fail:
ralloc_free(cs.nir);
+ ralloc_free(cs_3d.nir);
return result;
}
static void
radv_device_finish_meta_itob_state(struct radv_device *device)
{
- if (device->meta_state.itob.img_p_layout) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.itob.img_p_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.itob.img_ds_layout) {
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.itob.img_ds_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.itob.pipeline) {
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->itob.img_p_layout, &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->itob.img_ds_layout,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->itob.pipeline, &state->alloc);
+ if (device->physical_device->rad_info.chip_class >= GFX9)
radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.itob.pipeline,
- &device->meta_state.alloc);
- }
+ state->itob.pipeline_3d, &state->alloc);
}
static nir_shader *
-build_nir_btoi_compute_shader(struct radv_device *dev)
+build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
{
nir_builder b;
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
false,
false,
GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+ const struct glsl_type *img_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_cs");
+ b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
@@ -269,16 +297,16 @@
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(offset, 0);
- nir_intrinsic_set_range(offset, 12);
+ nir_intrinsic_set_range(offset, 16);
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- offset->num_components = 2;
- nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
+ offset->num_components = is_3d ? 3 : 2;
+ nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
nir_builder_instr_insert(&b, &offset->instr);
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(stride, 0);
- nir_intrinsic_set_range(stride, 12);
- stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+ nir_intrinsic_set_range(stride, 16);
+ stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
stride->num_components = 1;
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
nir_builder_instr_insert(&b, &stride->instr);
@@ -326,11 +354,10 @@
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
-
- zero(device->meta_state.btoi);
-
- cs.nir = build_nir_btoi_compute_shader(device);
-
+ struct radv_shader_module cs_3d = { .nir = NULL };
+ cs.nir = build_nir_btoi_compute_shader(device, false);
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ cs_3d.nir = build_nir_btoi_compute_shader(device, true);
/*
* two descriptors one for the image being sampled
* one for the buffer being written.
@@ -370,7 +397,7 @@
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
.pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -404,9 +431,33 @@
if (result != VK_SUCCESS)
goto fail;
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = radv_shader_module_to_handle(&cs_3d),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage_3d,
+ .flags = 0,
+ .layout = device->meta_state.btoi.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ 1, &vk_pipeline_info_3d, NULL,
+ &device->meta_state.btoi.pipeline_3d);
+ ralloc_free(cs_3d.nir);
+ }
ralloc_free(cs.nir);
+
return VK_SUCCESS;
fail:
+ ralloc_free(cs_3d.nir);
ralloc_free(cs.nir);
return result;
}
@@ -414,37 +465,34 @@
static void
radv_device_finish_meta_btoi_state(struct radv_device *device)
{
- if (device->meta_state.btoi.img_p_layout) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.btoi.img_p_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.btoi.img_ds_layout) {
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.btoi.img_ds_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.btoi.pipeline) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.btoi.pipeline,
- &device->meta_state.alloc);
- }
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->btoi.img_p_layout, &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->btoi.img_ds_layout,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->btoi.pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->btoi.pipeline_3d, &state->alloc);
}
static nir_shader *
-build_nir_itoi_compute_shader(struct radv_device *dev)
+build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
{
nir_builder b;
- const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+ const struct glsl_type *buf_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+ const struct glsl_type *img_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_cs");
+ b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
@@ -469,18 +517,18 @@
nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(src_offset, 0);
- nir_intrinsic_set_range(src_offset, 16);
+ nir_intrinsic_set_range(src_offset, 24);
src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
- src_offset->num_components = 2;
- nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+ src_offset->num_components = is_3d ? 3 : 2;
+ nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
nir_builder_instr_insert(&b, &src_offset->instr);
nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(dst_offset, 0);
- nir_intrinsic_set_range(dst_offset, 16);
- dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
- dst_offset->num_components = 2;
- nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
+ nir_intrinsic_set_range(dst_offset, 24);
+ dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
+ dst_offset->num_components = is_3d ? 3 : 2;
+ nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
nir_builder_instr_insert(&b, &dst_offset->instr);
nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
@@ -488,15 +536,15 @@
nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
- tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ tex->sampler_dim = dim;
tex->op = nir_texop_txf;
tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 3));
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
tex->src[1].src_type = nir_tex_src_lod;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
tex->dest_type = nir_type_float;
tex->is_array = false;
- tex->coord_components = 2;
+ tex->coord_components = is_3d ? 3 : 2;
tex->texture = nir_deref_var_create(tex, input_img);
tex->sampler = NULL;
@@ -520,11 +568,10 @@
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
-
- zero(device->meta_state.itoi);
-
- cs.nir = build_nir_itoi_compute_shader(device);
-
+ struct radv_shader_module cs_3d = { .nir = NULL };
+ cs.nir = build_nir_itoi_compute_shader(device, false);
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ cs_3d.nir = build_nir_itoi_compute_shader(device, true);
/*
* two descriptors one for the image being sampled
* one for the buffer being written.
@@ -564,7 +611,7 @@
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
.pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -598,43 +645,66 @@
if (result != VK_SUCCESS)
goto fail;
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = radv_shader_module_to_handle(&cs_3d),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage_3d,
+ .flags = 0,
+ .layout = device->meta_state.itoi.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ 1, &vk_pipeline_info_3d, NULL,
+ &device->meta_state.itoi.pipeline_3d);
+
+ ralloc_free(cs_3d.nir);
+ }
ralloc_free(cs.nir);
+
return VK_SUCCESS;
fail:
ralloc_free(cs.nir);
+ ralloc_free(cs_3d.nir);
return result;
}
static void
radv_device_finish_meta_itoi_state(struct radv_device *device)
{
- if (device->meta_state.itoi.img_p_layout) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.itoi.img_p_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.itoi.img_ds_layout) {
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.itoi.img_ds_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.itoi.pipeline) {
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->itoi.img_p_layout, &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->itoi.img_ds_layout,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->itoi.pipeline, &state->alloc);
+ if (device->physical_device->rad_info.chip_class >= GFX9)
radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.itoi.pipeline,
- &device->meta_state.alloc);
- }
+ state->itoi.pipeline_3d, &state->alloc);
}
static nir_shader *
-build_nir_cleari_compute_shader(struct radv_device *dev)
+build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
{
nir_builder b;
- const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+ const struct glsl_type *img_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
- b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_cs");
+ b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
@@ -655,12 +725,29 @@
nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(clear_val, 0);
- nir_intrinsic_set_range(clear_val, 16);
+ nir_intrinsic_set_range(clear_val, 20);
clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
clear_val->num_components = 4;
nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
nir_builder_instr_insert(&b, &clear_val->instr);
+ nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+ nir_intrinsic_set_base(layer, 0);
+ nir_intrinsic_set_range(layer, 20);
+ layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
+ layer->num_components = 1;
+ nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
+ nir_builder_instr_insert(&b, &layer->instr);
+
+ nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
+
+ nir_ssa_def *comps[4];
+ comps[0] = nir_channel(&b, global_id, 0);
+ comps[1] = nir_channel(&b, global_id, 1);
+ comps[2] = global_z;
+ comps[3] = nir_imm_int(&b, 0);
+ global_id = nir_vec(&b, comps, 4);
+
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
store->src[0] = nir_src_for_ssa(global_id);
store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
@@ -676,10 +763,10 @@
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
-
- zero(device->meta_state.cleari);
-
- cs.nir = build_nir_cleari_compute_shader(device);
+ struct radv_shader_module cs_3d = { .nir = NULL };
+ cs.nir = build_nir_cleari_compute_shader(device, false);
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ cs_3d.nir = build_nir_cleari_compute_shader(device, true);
/*
* two descriptors one for the image being sampled
@@ -713,7 +800,7 @@
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
.pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -747,31 +834,55 @@
if (result != VK_SUCCESS)
goto fail;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ /* compute shader */
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = radv_shader_module_to_handle(&cs_3d),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage_3d,
+ .flags = 0,
+ .layout = device->meta_state.cleari.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ 1, &vk_pipeline_info_3d, NULL,
+ &device->meta_state.cleari.pipeline_3d);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ ralloc_free(cs_3d.nir);
+ }
ralloc_free(cs.nir);
return VK_SUCCESS;
fail:
ralloc_free(cs.nir);
+ ralloc_free(cs_3d.nir);
return result;
}
static void
radv_device_finish_meta_cleari_state(struct radv_device *device)
{
- if (device->meta_state.cleari.img_p_layout) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.cleari.img_p_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.cleari.img_ds_layout) {
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.cleari.img_ds_layout,
- &device->meta_state.alloc);
- }
- if (device->meta_state.cleari.pipeline) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.cleari.pipeline,
- &device->meta_state.alloc);
- }
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->cleari.img_p_layout, &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->cleari.img_ds_layout,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->cleari.pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->cleari.pipeline_3d, &state->alloc);
}
void
@@ -814,59 +925,18 @@
return result;
}
-void
-radv_meta_begin_itoi(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_save_compute(save, cmd_buffer, 16);
-}
-
-void
-radv_meta_end_itoi(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_restore_compute(save, cmd_buffer, 16);
-}
-
-void
-radv_meta_begin_bufimage(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_save_compute(save, cmd_buffer, 12);
-}
-
-void
-radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_restore_compute(save, cmd_buffer, 12);
-}
-
-void
-radv_meta_begin_cleari(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_save_compute(save, cmd_buffer, 16);
-}
-
-void
-radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save)
-{
- radv_meta_restore_compute(save, cmd_buffer, 16);
-}
-
static void
create_iview(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *surf,
struct radv_image_view *iview)
{
-
+ VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
+ radv_meta_get_view_type(surf->image);
radv_image_view_init(iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(surf->image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .viewType = view_type,
.format = surf->format,
.subresourceRange = {
.aspectMask = surf->aspect_mask,
@@ -893,18 +963,14 @@
.format = format,
.offset = offset,
.range = VK_WHOLE_SIZE,
- }, cmd_buffer);
+ });
}
-struct itob_temps {
- struct radv_image_view src_iview;
- struct radv_buffer_view dst_bview;
-};
-
static void
itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct itob_temps *tmp)
+ struct radv_image_view *src,
+ struct radv_buffer_view *dst)
{
struct radv_device *device = cmd_buffer->device;
@@ -923,7 +989,7 @@
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->src_iview),
+ .imageView = radv_image_view_to_handle(src),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
@@ -934,23 +1000,11 @@
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->dst_bview) },
+ .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
}
});
}
-static void
-itob_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
-{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.itob.pipeline;
-
- if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- }
-}
-
void
radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *src,
@@ -958,38 +1012,42 @@
unsigned num_rects,
struct radv_meta_blit2d_rect *rects)
{
+ VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
struct radv_device *device = cmd_buffer->device;
- struct itob_temps temps;
+ struct radv_image_view src_view;
+ struct radv_buffer_view dst_view;
- create_iview(cmd_buffer, src, &temps.src_iview);
- create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &temps.dst_bview);
- itob_bind_descriptors(cmd_buffer, &temps);
+ create_iview(cmd_buffer, src, &src_view);
+ create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
+ itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ src->image->type == VK_IMAGE_TYPE_3D)
+ pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
- itob_bind_pipeline(cmd_buffer);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[3] = {
+ unsigned push_constants[4] = {
rects[r].src_x,
rects[r].src_y,
+ src->layer,
dst->pitch
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.itob.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
}
}
-struct btoi_temps {
- struct radv_buffer_view src_bview;
- struct radv_image_view dst_iview;
-};
-
static void
btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct btoi_temps *tmp)
+ struct radv_buffer_view *src,
+ struct radv_image_view *dst)
{
struct radv_device *device = cmd_buffer->device;
@@ -1005,7 +1063,7 @@
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->src_bview) },
+ .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
@@ -1016,7 +1074,7 @@
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->dst_iview),
+ .imageView = radv_image_view_to_handle(dst),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
@@ -1024,18 +1082,6 @@
});
}
-static void
-btoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
-{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.btoi.pipeline;
-
- if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- }
-}
-
void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_buffer *src,
@@ -1043,38 +1089,41 @@
unsigned num_rects,
struct radv_meta_blit2d_rect *rects)
{
+ VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
struct radv_device *device = cmd_buffer->device;
- struct btoi_temps temps;
+ struct radv_buffer_view src_view;
+ struct radv_image_view dst_view;
- create_bview(cmd_buffer, src->buffer, src->offset, src->format, &temps.src_bview);
- create_iview(cmd_buffer, dst, &temps.dst_iview);
- btoi_bind_descriptors(cmd_buffer, &temps);
-
- btoi_bind_pipeline(cmd_buffer);
+ create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
+ create_iview(cmd_buffer, dst, &dst_view);
+ btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ dst->image->type == VK_IMAGE_TYPE_3D)
+ pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[3] = {
+ unsigned push_constants[4] = {
rects[r].dst_x,
rects[r].dst_y,
- src->pitch
+ dst->layer,
+ src->pitch,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.btoi.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
}
}
-struct itoi_temps {
- struct radv_image_view src_iview;
- struct radv_image_view dst_iview;
-};
-
static void
itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct itoi_temps *tmp)
+ struct radv_image_view *src,
+ struct radv_image_view *dst)
{
struct radv_device *device = cmd_buffer->device;
@@ -1093,7 +1142,7 @@
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->src_iview),
+ .imageView = radv_image_view_to_handle(src),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
@@ -1107,7 +1156,7 @@
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->dst_iview),
+ .imageView = radv_image_view_to_handle(dst),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
@@ -1115,18 +1164,6 @@
});
}
-static void
-itoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
-{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.itoi.pipeline;
-
- if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- }
-}
-
void
radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *src,
@@ -1134,26 +1171,33 @@
unsigned num_rects,
struct radv_meta_blit2d_rect *rects)
{
+ VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
struct radv_device *device = cmd_buffer->device;
- struct itoi_temps temps;
+ struct radv_image_view src_view, dst_view;
- create_iview(cmd_buffer, src, &temps.src_iview);
- create_iview(cmd_buffer, dst, &temps.dst_iview);
+ create_iview(cmd_buffer, src, &src_view);
+ create_iview(cmd_buffer, dst, &dst_view);
- itoi_bind_descriptors(cmd_buffer, &temps);
+ itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
- itoi_bind_pipeline(cmd_buffer);
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ src->image->type == VK_IMAGE_TYPE_3D)
+ pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[4] = {
+ unsigned push_constants[6] = {
rects[r].src_x,
rects[r].src_y,
+ src->layer,
rects[r].dst_x,
rects[r].dst_y,
+ dst->layer,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.itoi.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
@@ -1189,41 +1233,36 @@
});
}
-static void
-cleari_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
-{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.cleari.pipeline;
-
- if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- }
-}
-
void
radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *dst,
const VkClearColorValue *clear_color)
{
+ VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
struct radv_device *device = cmd_buffer->device;
struct radv_image_view dst_iview;
create_iview(cmd_buffer, dst, &dst_iview);
cleari_bind_descriptors(cmd_buffer, &dst_iview);
- cleari_bind_pipeline(cmd_buffer);
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ dst->image->type == VK_IMAGE_TYPE_3D)
+ pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- unsigned push_constants[4] = {
+ unsigned push_constants[5] = {
clear_color->uint32[0],
clear_color->uint32[1],
clear_color->uint32[2],
clear_color->uint32[3],
+ dst->layer,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.cleari.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
push_constants);
radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta.c mesa-17.3.3/src/amd/vulkan/radv_meta.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,94 +30,122 @@
#include
#include
-static void
-radv_meta_save_novertex(struct radv_meta_saved_state *state,
- const struct radv_cmd_buffer *cmd_buffer,
- uint32_t dynamic_mask)
-{
- state->old_pipeline = cmd_buffer->state.pipeline;
- state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
-
- state->dynamic_mask = dynamic_mask;
- radv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic,
- dynamic_mask);
-
- memcpy(state->push_constants, cmd_buffer->push_constants, MAX_PUSH_CONSTANTS_SIZE);
- state->vertex_saved = false;
-}
-
void
-radv_meta_restore(const struct radv_meta_saved_state *state,
- struct radv_cmd_buffer *cmd_buffer)
+radv_meta_save(struct radv_meta_saved_state *state,
+ struct radv_cmd_buffer *cmd_buffer, uint32_t flags)
{
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
- radv_pipeline_to_handle(state->old_pipeline));
- cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
- if (state->vertex_saved) {
- memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
- sizeof(state->old_vertex_bindings));
- cmd_buffer->state.vb_dirty |= (1 << RADV_META_VERTEX_BINDING_COUNT) - 1;
- }
-
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
-
- radv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic,
- state->dynamic_mask);
- cmd_buffer->state.dirty |= state->dynamic_mask;
+ assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_COMPUTE_PIPELINE));
- memcpy(cmd_buffer->push_constants, state->push_constants, MAX_PUSH_CONSTANTS_SIZE);
- cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
-}
+ state->flags = flags;
-void
-radv_meta_save_pass(struct radv_meta_saved_pass_state *state,
- const struct radv_cmd_buffer *cmd_buffer)
-{
- state->pass = cmd_buffer->state.pass;
- state->subpass = cmd_buffer->state.subpass;
- state->framebuffer = cmd_buffer->state.framebuffer;
- state->attachments = cmd_buffer->state.attachments;
- state->render_area = cmd_buffer->state.render_area;
-}
+ if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
+ assert(!(state->flags & RADV_META_SAVE_COMPUTE_PIPELINE));
+
+ state->old_pipeline = cmd_buffer->state.pipeline;
+
+ /* Save all viewports. */
+ state->viewport.count = cmd_buffer->state.dynamic.viewport.count;
+ typed_memcpy(state->viewport.viewports,
+ cmd_buffer->state.dynamic.viewport.viewports,
+ MAX_VIEWPORTS);
+
+ /* Save all scissors. */
+ state->scissor.count = cmd_buffer->state.dynamic.scissor.count;
+ typed_memcpy(state->scissor.scissors,
+ cmd_buffer->state.dynamic.scissor.scissors,
+ MAX_SCISSORS);
+
+ /* The most common meta operations all want to have the
+ * viewport reset and any scissors disabled. The rest of the
+ * dynamic state should have no effect.
+ */
+ cmd_buffer->state.dynamic.viewport.count = 0;
+ cmd_buffer->state.dynamic.scissor.count = 0;
+ cmd_buffer->state.dirty |= 1 << VK_DYNAMIC_STATE_VIEWPORT |
+ 1 << VK_DYNAMIC_STATE_SCISSOR;
+ }
-void
-radv_meta_restore_pass(const struct radv_meta_saved_pass_state *state,
- struct radv_cmd_buffer *cmd_buffer)
-{
- cmd_buffer->state.pass = state->pass;
- cmd_buffer->state.subpass = state->subpass;
- cmd_buffer->state.framebuffer = state->framebuffer;
- cmd_buffer->state.attachments = state->attachments;
- cmd_buffer->state.render_area = state->render_area;
- if (state->subpass)
- radv_emit_framebuffer_state(cmd_buffer);
-}
+ if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
+ assert(!(state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE));
-void
-radv_meta_save_compute(struct radv_meta_saved_compute_state *state,
- const struct radv_cmd_buffer *cmd_buffer,
- unsigned push_constant_size)
-{
- state->old_pipeline = cmd_buffer->state.compute_pipeline;
- state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
+ state->old_pipeline = cmd_buffer->state.compute_pipeline;
+ }
+
+ if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
+ state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
+ }
+
+ if (state->flags & RADV_META_SAVE_CONSTANTS) {
+ memcpy(state->push_constants, cmd_buffer->push_constants,
+ MAX_PUSH_CONSTANTS_SIZE);
+ }
- if (push_constant_size)
- memcpy(state->push_constants, cmd_buffer->push_constants, push_constant_size);
+ if (state->flags & RADV_META_SAVE_PASS) {
+ state->pass = cmd_buffer->state.pass;
+ state->subpass = cmd_buffer->state.subpass;
+ state->framebuffer = cmd_buffer->state.framebuffer;
+ state->attachments = cmd_buffer->state.attachments;
+ state->render_area = cmd_buffer->state.render_area;
+ }
}
void
-radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
- struct radv_cmd_buffer *cmd_buffer,
- unsigned push_constant_size)
+radv_meta_restore(const struct radv_meta_saved_state *state,
+ struct radv_cmd_buffer *cmd_buffer)
{
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
- radv_pipeline_to_handle(state->old_pipeline));
+ if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_GRAPHICS,
+ radv_pipeline_to_handle(state->old_pipeline));
+
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
+
+ /* Restore all viewports. */
+ cmd_buffer->state.dynamic.viewport.count = state->viewport.count;
+ typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports,
+ state->viewport.viewports,
+ MAX_VIEWPORTS);
+
+ /* Restore all scissors. */
+ cmd_buffer->state.dynamic.scissor.count = state->scissor.count;
+ typed_memcpy(cmd_buffer->state.dynamic.scissor.scissors,
+ state->scissor.scissors,
+ MAX_SCISSORS);
- cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+ cmd_buffer->state.dirty |= 1 << VK_DYNAMIC_STATE_VIEWPORT |
+ 1 << VK_DYNAMIC_STATE_SCISSOR;
+ }
- if (push_constant_size) {
- memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size);
+ if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ radv_pipeline_to_handle(state->old_pipeline));
+ }
+
+ if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
+ cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+ cmd_buffer->state.descriptors_dirty |= (1 << 0);
+ }
+
+ if (state->flags & RADV_META_SAVE_CONSTANTS) {
+ memcpy(cmd_buffer->push_constants, state->push_constants,
+ MAX_PUSH_CONSTANTS_SIZE);
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
+
+ if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
+ cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_ALL_GRAPHICS;
+ }
+ }
+
+ if (state->flags & RADV_META_SAVE_PASS) {
+ cmd_buffer->state.pass = state->pass;
+ cmd_buffer->state.subpass = state->subpass;
+ cmd_buffer->state.framebuffer = state->framebuffer;
+ cmd_buffer->state.attachments = state->attachments;
+ cmd_buffer->state.render_area = state->render_area;
+ if (state->subpass)
+ radv_emit_framebuffer_state(cmd_buffer);
}
}
@@ -388,22 +416,6 @@
radv_pipeline_cache_finish(&device->meta_state.cache);
}
-/*
- * The most common meta operations all want to have the viewport
- * reset and any scissors disabled. The rest of the dynamic state
- * should have no effect.
- */
-void
-radv_meta_save_graphics_reset_vport_scissor_novertex(struct radv_meta_saved_state *saved_state,
- struct radv_cmd_buffer *cmd_buffer)
-{
- uint32_t dirty_state = (1 << VK_DYNAMIC_STATE_VIEWPORT) | (1 << VK_DYNAMIC_STATE_SCISSOR);
- radv_meta_save_novertex(saved_state, cmd_buffer, dirty_state);
- cmd_buffer->state.dynamic.viewport.count = 0;
- cmd_buffer->state.dynamic.scissor.count = 0;
- cmd_buffer->state.dirty |= dirty_state;
-}
-
nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
{
@@ -521,7 +533,7 @@
nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
nir_builder_instr_insert(b, &tex_all_same->instr);
- nir_ssa_def *all_same = nir_ine(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
+ nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
nir_if *if_stmt = nir_if_create(b->shader);
if_stmt->condition = nir_src_for_ssa(all_same);
nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_clear.c mesa-17.3.3/src/amd/vulkan/radv_meta_clear.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_clear.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_clear.c 2018-01-18 21:30:28.000000000 +0000
@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
+#include "radv_debug.h"
#include "radv_meta.h"
#include "radv_private.h"
#include "nir/nir_builder.h"
@@ -102,7 +103,7 @@
const VkPipelineLayout layout,
const struct radv_graphics_pipeline_create_info *extra,
const VkAllocationCallbacks *alloc,
- struct radv_pipeline **pipeline)
+ VkPipeline *pipeline)
{
VkDevice device_h = radv_device_to_handle(device);
VkResult result;
@@ -110,7 +111,6 @@
struct radv_shader_module vs_m = { .nir = vs_nir };
struct radv_shader_module fs_m = { .nir = fs_nir };
- VkPipeline pipeline_h = VK_NULL_HANDLE;
result = radv_graphics_pipeline_create(device_h,
radv_pipeline_cache_to_handle(&device->meta_state.cache),
&(VkGraphicsPipelineCreateInfo) {
@@ -186,13 +186,11 @@
},
extra,
alloc,
- &pipeline_h);
+ pipeline);
ralloc_free(vs_nir);
ralloc_free(fs_nir);
- *pipeline = radv_pipeline_from_handle(pipeline_h);
-
return result;
}
@@ -239,7 +237,7 @@
create_color_pipeline(struct radv_device *device,
uint32_t samples,
uint32_t frag_output,
- struct radv_pipeline **pipeline,
+ VkPipeline *pipeline,
VkRenderPass pass)
{
struct nir_shader *vs_nir;
@@ -289,25 +287,6 @@
return result;
}
-static void
-destroy_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
-{
- if (!pipeline)
- return;
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- radv_pipeline_to_handle(pipeline),
- &device->meta_state.alloc);
-
-}
-
-static void
-destroy_render_pass(struct radv_device *device, VkRenderPass renderpass)
-{
- radv_DestroyRenderPass(radv_device_to_handle(device), renderpass,
- &device->meta_state.alloc);
-}
-
void
radv_device_finish_meta_clear_state(struct radv_device *device)
{
@@ -315,16 +294,28 @@
for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
- destroy_pipeline(device, state->clear[i].color_pipelines[j]);
- destroy_render_pass(device, state->clear[i].render_pass[j]);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].color_pipelines[j],
+ &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->clear[i].render_pass[j],
+ &state->alloc);
}
for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
- destroy_pipeline(device, state->clear[i].depth_only_pipeline[j]);
- destroy_pipeline(device, state->clear[i].stencil_only_pipeline[j]);
- destroy_pipeline(device, state->clear[i].depthstencil_pipeline[j]);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].depth_only_pipeline[j],
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].stencil_only_pipeline[j],
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].depthstencil_pipeline[j],
+ &state->alloc);
}
- destroy_render_pass(device, state->clear[i].depthstencil_rp);
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->clear[i].depthstencil_rp,
+ &state->alloc);
}
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->clear_color_p_layout,
@@ -337,7 +328,8 @@
static void
emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
const VkClearAttachment *clear_att,
- const VkClearRect *clear_rect)
+ const VkClearRect *clear_rect,
+ uint32_t view_mask)
{
struct radv_device *device = cmd_buffer->device;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
@@ -348,18 +340,16 @@
const uint32_t samples = iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
unsigned fs_key = radv_format_meta_fs_key(iview->vk_format);
- struct radv_pipeline *pipeline;
VkClearColorValue clear_value = clear_att->clearValue.color;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- VkPipeline pipeline_h;
+ VkPipeline pipeline;
if (fs_key == -1) {
radv_finishme("color clears incomplete");
return;
}
- pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
- pipeline_h = radv_pipeline_to_handle(pipeline);
+ pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
if (!pipeline) {
radv_finishme("color clears incomplete");
return;
@@ -384,10 +374,8 @@
radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass, false);
- if (cmd_buffer->state.pipeline != pipeline) {
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline_h);
- }
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
.x = clear_rect->rect.offset.x,
@@ -400,7 +388,13 @@
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
- radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+ if (view_mask) {
+ unsigned i;
+ for_each_bit(i, view_mask)
+ radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
+ } else {
+ radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+ }
radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
}
@@ -490,7 +484,7 @@
VkImageAspectFlags aspects,
uint32_t samples,
int index,
- struct radv_pipeline **pipeline,
+ VkPipeline *pipeline,
VkRenderPass render_pass)
{
struct nir_shader *vs_nir, *fs_nir;
@@ -547,8 +541,10 @@
static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
const struct radv_image_view *iview,
+ VkImageAspectFlags aspects,
VkImageLayout layout,
- const VkClearRect *clear_rect)
+ const VkClearRect *clear_rect,
+ VkClearDepthStencilValue clear_value)
{
uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
cmd_buffer->queue_family_index,
@@ -557,6 +553,11 @@
clear_rect->rect.extent.width != iview->extent.width ||
clear_rect->rect.extent.height != iview->extent.height)
return false;
+ if (iview->image->tc_compatible_htile &&
+ (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
+ clear_value.depth != 1.0) ||
+ ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
+ return false;
if (iview->image->surface.htile_size &&
iview->base_mip == 0 &&
iview->base_layer == 0 &&
@@ -566,7 +567,7 @@
return false;
}
-static struct radv_pipeline *
+static VkPipeline
pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_state *meta_state,
const struct radv_image_view *iview,
@@ -576,7 +577,7 @@
const VkClearRect *clear_rect,
VkClearDepthStencilValue clear_value)
{
- bool fast = depth_view_can_fast_clear(cmd_buffer, iview, layout, clear_rect);
+ bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value);
int index = DEPTH_CLEAR_SLOW;
if (fast) {
@@ -632,20 +633,21 @@
clear_value.stencil);
}
- struct radv_pipeline *pipeline = pick_depthstencil_pipeline(cmd_buffer,
- meta_state,
- iview,
- samples_log2,
- aspects,
- subpass->depth_stencil_attachment.layout,
- clear_rect,
- clear_value);
- if (cmd_buffer->state.pipeline != pipeline) {
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- radv_pipeline_to_handle(pipeline));
- }
-
- if (depth_view_can_fast_clear(cmd_buffer, iview, subpass->depth_stencil_attachment.layout, clear_rect))
+ VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer,
+ meta_state,
+ iview,
+ samples_log2,
+ aspects,
+ subpass->depth_stencil_attachment.layout,
+ clear_rect,
+ clear_value);
+
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline);
+
+ if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
+ subpass->depth_stencil_attachment.layout,
+ clear_rect, clear_value))
radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
@@ -681,7 +683,7 @@
if (!iview->image->surface.htile_size)
return false;
- if (cmd_buffer->device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+ if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
return false;
if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
@@ -697,9 +699,6 @@
if (iview->image->info.array_size != iview->layer_count)
goto fail;
- if (iview->image->info.levels > 1)
- goto fail;
-
if (!radv_image_extent_compare(iview->image, &iview->extent))
goto fail;
@@ -771,8 +770,6 @@
VkResult res;
struct radv_meta_state *state = &device->meta_state;
- memset(&device->meta_state.clear, 0, sizeof(device->meta_state.clear));
-
VkPipelineLayoutCreateInfo pl_color_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
@@ -945,7 +942,8 @@
const VkClearAttachment *clear_att,
const VkClearRect *clear_rect,
enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush)
+ enum radv_cmd_flush_bits *post_flush,
+ uint32_t view_mask)
{
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
const uint32_t subpass_att = clear_att->colorAttachment;
@@ -960,7 +958,7 @@
if (!iview->image->cmask.size && !iview->image->surface.dcc_size)
return false;
- if (cmd_buffer->device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+ if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
return false;
if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
@@ -989,9 +987,12 @@
clear_rect->rect.extent.height != iview->image->info.height)
goto fail;
- if (clear_rect->baseArrayLayer != 0)
+ if (view_mask && (iview->image->info.array_size >= 32 ||
+ (1u << iview->image->info.array_size) - 1u != view_mask))
goto fail;
- if (clear_rect->layerCount != iview->image->info.array_size)
+ if (!view_mask && clear_rect->baseArrayLayer != 0)
+ goto fail;
+ if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
goto fail;
/* RB+ doesn't work with CMASK fast clear on Stoney. */
@@ -1026,11 +1027,6 @@
radv_set_dcc_need_cmask_elim_pred(cmd_buffer, iview->image,
!can_avoid_fast_clear_elim);
} else {
-
- if (iview->image->surface.bpe > 8) {
- /* 128 bit formats not supported */
- return false;
- }
radv_fill_buffer(cmd_buffer, iview->image->bo,
iview->image->offset + iview->image->cmask.offset,
iview->image->cmask.size, 0);
@@ -1060,13 +1056,13 @@
const VkClearAttachment *clear_att,
const VkClearRect *clear_rect,
enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush)
+ enum radv_cmd_flush_bits *post_flush,
+ uint32_t view_mask)
{
if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
-
if (!emit_fast_color_clear(cmd_buffer, clear_att, clear_rect,
- pre_flush, post_flush))
- emit_color_clear(cmd_buffer, clear_att, clear_rect);
+ pre_flush, post_flush, view_mask))
+ emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
} else {
assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT));
@@ -1076,29 +1072,56 @@
}
}
+static inline bool
+radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)
+{
+ uint32_t view_mask = cmd_state->subpass->view_mask;
+ return (a != VK_ATTACHMENT_UNUSED &&
+ cmd_state->attachments[a].pending_clear_aspects &&
+ (!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
+}
+
static bool
-subpass_needs_clear(const struct radv_cmd_buffer *cmd_buffer)
+radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
{
- const struct radv_cmd_state *cmd_state = &cmd_buffer->state;
- uint32_t ds;
+ struct radv_cmd_state *cmd_state = &cmd_buffer->state;
+ uint32_t a;
if (!cmd_state->subpass)
return false;
- ds = cmd_state->subpass->depth_stencil_attachment.attachment;
+
for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
- uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
- if (a != VK_ATTACHMENT_UNUSED &&
- cmd_state->attachments[a].pending_clear_aspects) {
+ a = cmd_state->subpass->color_attachments[i].attachment;
+ if (radv_attachment_needs_clear(cmd_state, a))
return true;
- }
}
- if (ds != VK_ATTACHMENT_UNUSED &&
- cmd_state->attachments[ds].pending_clear_aspects) {
- return true;
- }
+ a = cmd_state->subpass->depth_stencil_attachment.attachment;
+ return radv_attachment_needs_clear(cmd_state, a);
+}
- return false;
+static void
+radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_attachment_state *attachment,
+ const VkClearAttachment *clear_att,
+ enum radv_cmd_flush_bits *pre_flush,
+ enum radv_cmd_flush_bits *post_flush)
+{
+ struct radv_cmd_state *cmd_state = &cmd_buffer->state;
+ uint32_t view_mask = cmd_state->subpass->view_mask;
+
+ VkClearRect clear_rect = {
+ .rect = cmd_state->render_area,
+ .baseArrayLayer = 0,
+ .layerCount = cmd_state->framebuffer->layers,
+ };
+
+ emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
+ view_mask & ~attachment->cleared_views);
+ if (view_mask)
+ attachment->cleared_views |= view_mask;
+ else
+ attachment->pending_clear_aspects = 0;
}
/**
@@ -1114,22 +1137,17 @@
enum radv_cmd_flush_bits pre_flush = 0;
enum radv_cmd_flush_bits post_flush = 0;
- if (!subpass_needs_clear(cmd_buffer))
+ if (!radv_subpass_needs_clear(cmd_buffer))
return;
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
-
- VkClearRect clear_rect = {
- .rect = cmd_state->render_area,
- .baseArrayLayer = 0,
- .layerCount = cmd_state->framebuffer->layers,
- };
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_CONSTANTS);
for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
- if (a == VK_ATTACHMENT_UNUSED ||
- !cmd_state->attachments[a].pending_clear_aspects)
+ if (!radv_attachment_needs_clear(cmd_state, a))
continue;
assert(cmd_state->attachments[a].pending_clear_aspects ==
@@ -1141,25 +1159,23 @@
.clearValue = cmd_state->attachments[a].clear_value,
};
- emit_clear(cmd_buffer, &clear_att, &clear_rect, &pre_flush, &post_flush);
- cmd_state->attachments[a].pending_clear_aspects = 0;
+ radv_subpass_clear_attachment(cmd_buffer,
+ &cmd_state->attachments[a],
+ &clear_att, &pre_flush,
+ &post_flush);
}
uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
+ if (radv_attachment_needs_clear(cmd_state, ds)) {
+ VkClearAttachment clear_att = {
+ .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
+ .clearValue = cmd_state->attachments[ds].clear_value,
+ };
- if (ds != VK_ATTACHMENT_UNUSED) {
-
- if (cmd_state->attachments[ds].pending_clear_aspects) {
-
- VkClearAttachment clear_att = {
- .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
- .clearValue = cmd_state->attachments[ds].clear_value,
- };
-
- emit_clear(cmd_buffer, &clear_att, &clear_rect,
- &pre_flush, &post_flush);
- cmd_state->attachments[ds].pending_clear_aspects = 0;
- }
+ radv_subpass_clear_attachment(cmd_buffer,
+ &cmd_state->attachments[ds],
+ &clear_att, &pre_flush,
+ &post_flush);
}
radv_meta_restore(&saved_state, cmd_buffer);
@@ -1286,7 +1302,7 @@
.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
};
- emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL);
+ emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0);
radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
radv_DestroyRenderPass(device_h, pass,
@@ -1347,11 +1363,6 @@
}
}
-union meta_saved_state {
- struct radv_meta_saved_state gfx;
- struct radv_meta_saved_compute_state compute;
-};
-
void radv_CmdClearColorImage(
VkCommandBuffer commandBuffer,
VkImage image_h,
@@ -1362,22 +1373,25 @@
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, image, image_h);
- union meta_saved_state saved_state;
+ struct radv_meta_saved_state saved_state;
bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
- if (cs)
- radv_meta_begin_cleari(cmd_buffer, &saved_state.compute);
- else
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);
+ if (cs) {
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
+ } else {
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_CONSTANTS);
+ }
radv_cmd_clear_image(cmd_buffer, image, imageLayout,
(const VkClearValue *) pColor,
rangeCount, pRanges, cs);
- if (cs)
- radv_meta_end_cleari(cmd_buffer, &saved_state.compute);
- else
- radv_meta_restore(&saved_state.gfx, cmd_buffer);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdClearDepthStencilImage(
@@ -1392,7 +1406,9 @@
RADV_FROM_HANDLE(radv_image, image, image_h);
struct radv_meta_saved_state saved_state;
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_CONSTANTS);
radv_cmd_clear_image(cmd_buffer, image, imageLayout,
(const VkClearValue *) pDepthStencil,
@@ -1416,14 +1432,17 @@
if (!cmd_buffer->state.subpass)
return;
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_CONSTANTS);
/* FINISHME: We can do better than this dumb loop. It thrashes too much
* state.
*/
for (uint32_t a = 0; a < attachmentCount; ++a) {
for (uint32_t r = 0; r < rectCount; ++r) {
- emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush);
+ emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
+ cmd_buffer->state.subpass->view_mask);
}
}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_copy.c mesa-17.3.3/src/amd/vulkan/radv_meta_copy.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_copy.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_copy.c 2018-01-18 21:30:28.000000000 +0000
@@ -79,6 +79,7 @@
static struct radv_meta_blit2d_surf
blit_surf_for_image_level_layer(struct radv_image *image,
+ VkImageLayout layout,
const VkImageSubresourceLayers *subres)
{
VkFormat format = image->vk_format;
@@ -87,7 +88,8 @@
else if (subres->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
format = vk_format_stencil_only(format);
- if (!image->surface.dcc_size)
+ if (!image->surface.dcc_size &&
+ !(image->surface.htile_size && image->tc_compatible_htile))
format = vk_format_for_size(vk_format_get_blocksize(format));
return (struct radv_meta_blit2d_surf) {
@@ -97,33 +99,31 @@
.layer = subres->baseArrayLayer,
.image = image,
.aspect_mask = subres->aspectMask,
+ .current_layout = layout,
};
}
-union meta_saved_state {
- struct radv_meta_saved_state gfx;
- struct radv_meta_saved_compute_state compute;
-};
-
static void
meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer* buffer,
struct radv_image* image,
+ VkImageLayout layout,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
- union meta_saved_state saved_state;
+ struct radv_meta_saved_state saved_state;
/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
* VK_SAMPLE_COUNT_1_BIT."
*/
assert(image->info.samples == 1);
- if (cs)
- radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute);
- else
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);
+ radv_meta_save(&saved_state, cmd_buffer,
+ (cs ? RADV_META_SAVE_COMPUTE_PIPELINE :
+ RADV_META_SAVE_GRAPHICS_PIPELINE) |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
for (unsigned r = 0; r < regionCount; r++) {
@@ -159,6 +159,7 @@
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_bsurf =
blit_surf_for_image_level_layer(image,
+ layout,
&pRegions[r].imageSubresource);
struct radv_meta_blit2d_buffer buf_bsurf = {
@@ -202,10 +203,8 @@
slice_array++;
}
}
- if (cs)
- radv_meta_end_bufimage(cmd_buffer, &saved_state.compute);
- else
- radv_meta_restore(&saved_state.gfx, cmd_buffer);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdCopyBufferToImage(
@@ -220,7 +219,7 @@
RADV_FROM_HANDLE(radv_image, dest_image, destImage);
RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
- meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
+ meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, destImageLayout,
regionCount, pRegions);
}
@@ -228,12 +227,17 @@
meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer* buffer,
struct radv_image* image,
+ VkImageLayout layout,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
- struct radv_meta_saved_compute_state saved_state;
+ struct radv_meta_saved_state saved_state;
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
- radv_meta_begin_bufimage(cmd_buffer, &saved_state);
for (unsigned r = 0; r < regionCount; r++) {
/**
@@ -268,6 +272,7 @@
/* Create blit surfaces */
struct radv_meta_blit2d_surf img_info =
blit_surf_for_image_level_layer(image,
+ layout,
&pRegions[r].imageSubresource);
struct radv_meta_blit2d_buffer buf_info = {
@@ -303,7 +308,8 @@
slice_array++;
}
}
- radv_meta_end_bufimage(cmd_buffer, &saved_state);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdCopyImageToBuffer(
@@ -319,18 +325,21 @@
RADV_FROM_HANDLE(radv_buffer, dst_buffer, destBuffer);
meta_copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
+ srcImageLayout,
regionCount, pRegions);
}
static void
meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
+ VkImageLayout src_image_layout,
struct radv_image *dest_image,
+ VkImageLayout dest_image_layout,
uint32_t regionCount,
const VkImageCopy *pRegions)
{
bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
- union meta_saved_state saved_state;
+ struct radv_meta_saved_state saved_state;
/* From the Vulkan 1.0 spec:
*
@@ -338,10 +347,12 @@
* images, but both images must have the same number of samples.
*/
assert(src_image->info.samples == dest_image->info.samples);
- if (cs)
- radv_meta_begin_itoi(cmd_buffer, &saved_state.compute);
- else
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ (cs ? RADV_META_SAVE_COMPUTE_PIPELINE :
+ RADV_META_SAVE_GRAPHICS_PIPELINE) |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
for (unsigned r = 0; r < regionCount; r++) {
assert(pRegions[r].srcSubresource.aspectMask ==
@@ -350,10 +361,12 @@
/* Create blit surfaces */
struct radv_meta_blit2d_surf b_src =
blit_surf_for_image_level_layer(src_image,
+ src_image_layout,
&pRegions[r].srcSubresource);
struct radv_meta_blit2d_surf b_dst =
blit_surf_for_image_level_layer(dest_image,
+ dest_image_layout,
&pRegions[r].dstSubresource);
/* for DCC */
@@ -412,10 +425,7 @@
}
}
- if (cs)
- radv_meta_end_itoi(cmd_buffer, &saved_state.compute);
- else
- radv_meta_restore(&saved_state.gfx, cmd_buffer);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
void radv_CmdCopyImage(
@@ -431,7 +441,9 @@
RADV_FROM_HANDLE(radv_image, src_image, srcImage);
RADV_FROM_HANDLE(radv_image, dest_image, destImage);
- meta_copy_image(cmd_buffer, src_image, dest_image,
+ meta_copy_image(cmd_buffer,
+ src_image, srcImageLayout,
+ dest_image, destImageLayout,
regionCount, pRegions);
}
@@ -451,6 +463,7 @@
image_copy.extent.height = image->info.height;
image_copy.extent.depth = 1;
- meta_copy_image(cmd_buffer, image, linear_image,
+ meta_copy_image(cmd_buffer, image, VK_IMAGE_LAYOUT_GENERAL, linear_image,
+ VK_IMAGE_LAYOUT_GENERAL,
1, &image_copy);
}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_decompress.c mesa-17.3.3/src/amd/vulkan/radv_meta_decompress.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_decompress.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_decompress.c 2018-01-18 21:30:28.000000000 +0000
@@ -38,10 +38,13 @@
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
VkAttachmentDescription attachment;
+ attachment.flags = 0;
attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
attachment.samples = samples;
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+ attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
@@ -204,22 +207,17 @@
radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
- VkDevice device_h = radv_device_to_handle(device);
- const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
- VkRenderPass pass_h = state->depth_decomp[i].pass;
- if (pass_h) {
- radv_DestroyRenderPass(device_h, pass_h, alloc);
- }
- VkPipeline pipeline_h = state->depth_decomp[i].decompress_pipeline;
- if (pipeline_h) {
- radv_DestroyPipeline(device_h, pipeline_h, alloc);
- }
- pipeline_h = state->depth_decomp[i].resummarize_pipeline;
- if (pipeline_h) {
- radv_DestroyPipeline(device_h, pipeline_h, alloc);
- }
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->depth_decomp[i].pass,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->depth_decomp[i].decompress_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->depth_decomp[i].resummarize_pipeline,
+ &state->alloc);
}
}
@@ -229,8 +227,6 @@
struct radv_meta_state *state = &device->meta_state;
VkResult res = VK_SUCCESS;
- zero(state->depth_decomp);
-
struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
if (!vs_module.nir) {
/* XXX: Need more accurate error */
@@ -268,22 +264,17 @@
static void
emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
- const VkOffset2D *dest_offset,
const VkExtent2D *depth_decomp_extent,
VkPipeline pipeline_h)
{
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h);
-
- if (cmd_buffer->state.pipeline != pipeline) {
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline_h);
- }
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline_h);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = dest_offset->x,
- .y = dest_offset->y,
+ .x = 0,
+ .y = 0,
.width = depth_decomp_extent->width,
.height = depth_decomp_extent->height,
.minDepth = 0.0f,
@@ -291,7 +282,7 @@
});
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = *dest_offset,
+ .offset = { 0, 0 },
.extent = *depth_decomp_extent,
});
@@ -310,7 +301,6 @@
enum radv_depth_op op)
{
struct radv_meta_saved_state saved_state;
- struct radv_meta_saved_pass_state saved_pass_state;
VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
uint32_t width = radv_minify(image->info.width,
@@ -320,12 +310,25 @@
uint32_t samples = image->info.samples;
uint32_t samples_log2 = ffs(samples) - 1;
struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+ VkPipeline pipeline_h;
if (!image->surface.htile_size)
return;
- radv_meta_save_pass(&saved_pass_state, cmd_buffer);
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_PASS);
+
+ switch (op) {
+ case DEPTH_DECOMPRESS:
+ pipeline_h = meta_state->depth_decomp[samples_log2].decompress_pipeline;
+ break;
+ case DEPTH_RESUMMARIZE:
+ pipeline_h = meta_state->depth_decomp[samples_log2].resummarize_pipeline;
+ break;
+ default:
+ unreachable("unknown operation");
+ }
for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
struct radv_image_view iview;
@@ -334,6 +337,7 @@
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
.format = image->vk_format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
@@ -380,26 +384,13 @@
},
VK_SUBPASS_CONTENTS_INLINE);
- VkPipeline pipeline_h;
- switch (op) {
- case DEPTH_DECOMPRESS:
- pipeline_h = meta_state->depth_decomp[samples_log2].decompress_pipeline;
- break;
- case DEPTH_RESUMMARIZE:
- pipeline_h = meta_state->depth_decomp[samples_log2].resummarize_pipeline;
- break;
- default:
- unreachable("unknown operation");
- }
-
- emit_depth_decomp(cmd_buffer, &(VkOffset2D){0, 0 }, &(VkExtent2D){width, height}, pipeline_h);
+ emit_depth_decomp(cmd_buffer, &(VkExtent2D){width, height}, pipeline_h);
radv_CmdEndRenderPass(cmd_buffer_h);
radv_DestroyFramebuffer(device_h, fb_h,
&cmd_buffer->pool->alloc);
}
radv_meta_restore(&saved_state, cmd_buffer);
- radv_meta_restore_pass(&saved_pass_state, cmd_buffer);
}
void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_fast_clear.c mesa-17.3.3/src/amd/vulkan/radv_meta_fast_clear.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_fast_clear.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_fast_clear.c 2018-01-18 21:30:28.000000000 +0000
@@ -242,23 +242,15 @@
radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
- VkDevice device_h = radv_device_to_handle(device);
- VkRenderPass pass_h = device->meta_state.fast_clear_flush.pass;
- const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
-
- if (pass_h)
- radv_DestroyRenderPass(device_h, pass_h,
- &device->meta_state.alloc);
-
- VkPipeline pipeline_h = state->fast_clear_flush.cmask_eliminate_pipeline;
- if (pipeline_h) {
- radv_DestroyPipeline(device_h, pipeline_h, alloc);
- }
-
- pipeline_h = state->fast_clear_flush.fmask_decompress_pipeline;
- if (pipeline_h) {
- radv_DestroyPipeline(device_h, pipeline_h, alloc);
- }
+
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->fast_clear_flush.pass, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->fast_clear_flush.cmask_eliminate_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->fast_clear_flush.fmask_decompress_pipeline,
+ &state->alloc);
}
VkResult
@@ -266,8 +258,6 @@
{
VkResult res = VK_SUCCESS;
- zero(device->meta_state.fast_clear_flush);
-
struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
if (!vs_module.nir) {
/* XXX: Need more accurate error */
@@ -298,22 +288,12 @@
static void
emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
const VkExtent2D *resolve_extent,
- bool fmask_decompress)
+ VkPipeline pipeline)
{
- struct radv_device *device = cmd_buffer->device;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- VkPipeline pipeline_h;
- if (fmask_decompress)
- pipeline_h = device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
- else
- pipeline_h = device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
- RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h);
-
- if (cmd_buffer->state.pipeline != pipeline) {
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline_h);
- }
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
.x = 0,
@@ -341,7 +321,7 @@
uint64_t va = 0;
if (value) {
- va = cmd_buffer->device->ws->buffer_get_va(image->bo) + image->offset;
+ va = radv_buffer_get_va(image->bo) + image->offset;
va += image->dcc_pred_offset;
}
@@ -356,14 +336,22 @@
const VkImageSubresourceRange *subresourceRange)
{
struct radv_meta_saved_state saved_state;
- struct radv_meta_saved_pass_state saved_pass_state;
VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
uint32_t layer_count = radv_get_layerCount(image, subresourceRange);
+ VkPipeline pipeline;
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
- radv_meta_save_pass(&saved_pass_state, cmd_buffer);
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_PASS);
+
+ if (image->fmask.size > 0) {
+ pipeline = cmd_buffer->device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
+ } else {
+ pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
+ }
if (image->surface.dcc_size) {
radv_emit_set_predication_state_from_image(cmd_buffer, image, true);
@@ -424,7 +412,7 @@
emit_fast_clear_flush(cmd_buffer,
&(VkExtent2D) { image->info.width, image->info.height },
- image->fmask.size > 0);
+ pipeline);
radv_CmdEndRenderPass(cmd_buffer_h);
radv_DestroyFramebuffer(device_h, fb_h,
@@ -436,5 +424,4 @@
radv_emit_set_predication_state_from_image(cmd_buffer, image, false);
}
radv_meta_restore(&saved_state, cmd_buffer);
- radv_meta_restore_pass(&saved_pass_state, cmd_buffer);
}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta.h mesa-17.3.3/src/amd/vulkan/radv_meta.h
--- mesa-17.2.4/src/amd/vulkan/radv_meta.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta.h 2018-01-18 21:30:28.000000000 +0000
@@ -27,30 +27,30 @@
#define RADV_META_H
#include "radv_private.h"
+#include "radv_shader.h"
#ifdef __cplusplus
extern "C" {
#endif
-#define RADV_META_VERTEX_BINDING_COUNT 2
+enum radv_meta_save_flags {
+ RADV_META_SAVE_PASS = (1 << 0),
+ RADV_META_SAVE_CONSTANTS = (1 << 1),
+ RADV_META_SAVE_DESCRIPTORS = (1 << 2),
+ RADV_META_SAVE_GRAPHICS_PIPELINE = (1 << 3),
+ RADV_META_SAVE_COMPUTE_PIPELINE = (1 << 4),
+};
struct radv_meta_saved_state {
- bool vertex_saved;
- struct radv_vertex_binding old_vertex_bindings[RADV_META_VERTEX_BINDING_COUNT];
+ uint32_t flags;
+
struct radv_descriptor_set *old_descriptor_set0;
struct radv_pipeline *old_pipeline;
-
- /**
- * Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic
- * state.
- */
- uint32_t dynamic_mask;
- struct radv_dynamic_state dynamic;
+ struct radv_viewport_state viewport;
+ struct radv_scissor_state scissor;
char push_constants[128];
-};
-struct radv_meta_saved_pass_state {
struct radv_render_pass *pass;
const struct radv_subpass *subpass;
struct radv_attachment_state *attachments;
@@ -58,13 +58,6 @@
VkRect2D render_area;
};
-struct radv_meta_saved_compute_state {
- struct radv_descriptor_set *old_descriptor_set0;
- struct radv_pipeline *old_pipeline;
-
- char push_constants[128];
-};
-
VkResult radv_device_init_meta_clear_state(struct radv_device *device);
void radv_device_finish_meta_clear_state(struct radv_device *device);
@@ -95,23 +88,12 @@
VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device);
void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device);
+void radv_meta_save(struct radv_meta_saved_state *saved_state,
+ struct radv_cmd_buffer *cmd_buffer, uint32_t flags);
+
void radv_meta_restore(const struct radv_meta_saved_state *state,
struct radv_cmd_buffer *cmd_buffer);
-void radv_meta_save_pass(struct radv_meta_saved_pass_state *state,
- const struct radv_cmd_buffer *cmd_buffer);
-
-void radv_meta_restore_pass(const struct radv_meta_saved_pass_state *state,
- struct radv_cmd_buffer *cmd_buffer);
-
-void radv_meta_save_compute(struct radv_meta_saved_compute_state *state,
- const struct radv_cmd_buffer *cmd_buffer,
- unsigned push_constant_size);
-
-void radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
- struct radv_cmd_buffer *cmd_buffer,
- unsigned push_constant_size);
-
VkImageViewType radv_meta_get_view_type(const struct radv_image *image);
uint32_t radv_meta_get_iview_layer(const struct radv_image *dest_image,
@@ -127,6 +109,7 @@
unsigned level;
unsigned layer;
VkImageAspectFlags aspect_mask;
+ VkImageLayout current_layout;
};
struct radv_meta_blit2d_buffer {
@@ -159,18 +142,6 @@
VkResult radv_device_init_meta_bufimage_state(struct radv_device *device);
void radv_device_finish_meta_bufimage_state(struct radv_device *device);
-void radv_meta_begin_bufimage(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
-void radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
-void radv_meta_begin_itoi(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
-void radv_meta_end_itoi(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
-void radv_meta_begin_cleari(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
-void radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_compute_state *save);
void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *src,
struct radv_meta_blit2d_buffer *dst,
@@ -201,9 +172,6 @@
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
-void radv_meta_save_graphics_reset_vport_scissor_novertex(struct radv_meta_saved_state *saved_state,
- struct radv_cmd_buffer *cmd_buffer);
-
void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkImageLayout src_image_layout,
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_resolve.c mesa-17.3.3/src/amd/vulkan/radv_meta_resolve.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_resolve.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_resolve.c 2018-01-18 21:30:28.000000000 +0000
@@ -219,18 +219,11 @@
radv_device_finish_meta_resolve_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
- VkDevice device_h = radv_device_to_handle(device);
- VkRenderPass pass_h = device->meta_state.resolve.pass;
- const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
- if (pass_h)
- radv_DestroyRenderPass(device_h, pass_h,
- &device->meta_state.alloc);
-
- VkPipeline pipeline_h = state->resolve.pipeline;
- if (pipeline_h) {
- radv_DestroyPipeline(device_h, pipeline_h, alloc);
- }
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->resolve.pass, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve.pipeline, &state->alloc);
}
VkResult
@@ -238,8 +231,6 @@
{
VkResult res = VK_SUCCESS;
- zero(device->meta_state.resolve);
-
struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
if (!vs_module.nir) {
/* XXX: Need more accurate error */
@@ -277,13 +268,8 @@
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
- VkPipeline pipeline_h = device->meta_state.resolve.pipeline;
- RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h);
-
- if (cmd_buffer->state.pipeline != pipeline) {
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline_h);
- }
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.resolve.pipeline);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
.x = dest_offset->x,
@@ -314,11 +300,10 @@
enum radv_resolve_method *method)
{
- if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
- if (dest_image->surface.num_dcc_levels > 0)
- *method = RESOLVE_FRAGMENT;
- else
- *method = RESOLVE_COMPUTE;
+ if (dest_image->surface.num_dcc_levels > 0) {
+ *method = RESOLVE_FRAGMENT;
+ } else if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
+ *method = RESOLVE_COMPUTE;
}
}
@@ -379,7 +364,8 @@
return;
}
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE);
assert(src_image->info.samples > 1);
if (src_image->info.samples <= 1) {
@@ -587,7 +573,8 @@
return;
}
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE);
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_resolve_cs.c mesa-17.3.3/src/amd/vulkan/radv_meta_resolve_cs.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_resolve_cs.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_resolve_cs.c 2018-01-18 21:30:28.000000000 +0000
@@ -250,7 +250,6 @@
{
struct radv_meta_state *state = &device->meta_state;
VkResult res;
- memset(&device->meta_state.resolve_compute, 0, sizeof(device->meta_state.resolve_compute));
res = create_layout(device);
if (res != VK_SUCCESS)
@@ -352,10 +351,9 @@
pipeline = device->meta_state.resolve_compute.rc[samples_log2].srgb_pipeline;
else
pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline;
- if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
- }
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
unsigned push_constants[4] = {
src_offset->x,
@@ -379,7 +377,7 @@
uint32_t region_count,
const VkImageResolve *regions)
{
- struct radv_meta_saved_compute_state saved_state;
+ struct radv_meta_saved_state saved_state;
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = ®ions[r];
@@ -395,7 +393,10 @@
radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range);
}
- radv_meta_save_compute(&saved_state, cmd_buffer, 16);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = ®ions[r];
@@ -462,7 +463,7 @@
&(VkExtent2D) {extent.width, extent.height });
}
}
- radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
/**
@@ -473,7 +474,7 @@
{
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- struct radv_meta_saved_compute_state saved_state;
+ struct radv_meta_saved_state saved_state;
/* FINISHME(perf): Skip clears for resolve attachments.
*
* From the Vulkan 1.0 spec:
@@ -486,6 +487,14 @@
if (!subpass->has_resolve)
return;
+ /* Resolves happen before the end-of-subpass barriers get executed,
+ * so we have to make the attachment shader-readable */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+ RADV_CMD_FLAG_INV_GLOBAL_L2 |
+ RADV_CMD_FLAG_INV_VMEM_L1;
+
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
@@ -511,7 +520,10 @@
radv_fast_clear_flush_image_inplace(cmd_buffer, src_iview->image, &range);
}
- radv_meta_save_compute(&saved_state, cmd_buffer, 16);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
@@ -529,7 +541,7 @@
&(VkExtent2D) { fb->width, fb->height });
}
- radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
+ radv_meta_restore(&saved_state, cmd_buffer);
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_meta_resolve_fs.c mesa-17.3.3/src/amd/vulkan/radv_meta_resolve_fs.c
--- mesa-17.2.4/src/amd/vulkan/radv_meta_resolve_fs.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_meta_resolve_fs.c 2018-01-18 21:30:28.000000000 +0000
@@ -332,9 +332,7 @@
VkResult
radv_device_init_meta_resolve_fragment_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
VkResult res;
- memset(&state->resolve_fragment, 0, sizeof(state->resolve_fragment));
res = create_layout(device);
if (res != VK_SUCCESS)
@@ -409,8 +407,8 @@
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
unsigned push_constants[2] = {
- src_offset->x,
- src_offset->y,
+ src_offset->x - dest_offset->x,
+ src_offset->y - dest_offset->y,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.resolve_fragment.p_layout,
@@ -470,7 +468,11 @@
}
rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = ®ions[r];
@@ -538,8 +540,8 @@
.pAttachments = (VkImageView[]) {
radv_image_view_to_handle(&dest_iview),
},
- .width = extent.width,
- .height = extent.height,
+ .width = extent.width + dstOffset.x,
+ .height = extent.height + dstOffset.y,
.layers = 1
}, &cmd_buffer->pool->alloc, &fb);
@@ -597,7 +599,20 @@
if (!subpass->has_resolve)
return;
- radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
+
+ /* Resolves happen before the end-of-subpass barriers get executed,
+ * so we have to make the attachment shader-readable */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
+ RADV_CMD_FLAG_INV_GLOBAL_L2 |
+ RADV_CMD_FLAG_INV_VMEM_L1;
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_pass.c mesa-17.3.3/src/amd/vulkan/radv_pass.c
--- mesa-17.2.4/src/amd/vulkan/radv_pass.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_pass.c 2018-01-18 21:30:28.000000000 +0000
@@ -26,6 +26,8 @@
*/
#include "radv_private.h"
+#include "vk_util.h"
+
VkResult radv_CreateRenderPass(
VkDevice _device,
const VkRenderPassCreateInfo* pCreateInfo,
@@ -36,6 +38,7 @@
struct radv_render_pass *pass;
size_t size;
size_t attachments_offset;
+ VkRenderPassMultiviewCreateInfoKHX *multiview_info = NULL;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
@@ -54,6 +57,16 @@
pass->subpass_count = pCreateInfo->subpassCount;
pass->attachments = (void *) pass + attachments_offset;
+ vk_foreach_struct(ext, pCreateInfo->pNext) {
+ switch(ext->sType) {
+ case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHX:
+ multiview_info = ( VkRenderPassMultiviewCreateInfoKHX*)ext;
+ break;
+ default:
+ break;
+ }
+ }
+
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
struct radv_render_pass_attachment *att = &pass->attachments[i];
@@ -97,6 +110,8 @@
subpass->input_count = desc->inputAttachmentCount;
subpass->color_count = desc->colorAttachmentCount;
+ if (multiview_info)
+ subpass->view_mask = multiview_info->pViewMasks[i];
if (desc->inputAttachmentCount > 0) {
subpass->input_attachments = p;
@@ -105,6 +120,8 @@
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
subpass->input_attachments[j]
= desc->pInputAttachments[j];
+ if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
+ pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask;
}
}
@@ -115,6 +132,8 @@
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->color_attachments[j]
= desc->pColorAttachments[j];
+ if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
+ pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask;
}
}
@@ -127,14 +146,18 @@
uint32_t a = desc->pResolveAttachments[j].attachment;
subpass->resolve_attachments[j]
= desc->pResolveAttachments[j];
- if (a != VK_ATTACHMENT_UNUSED)
+ if (a != VK_ATTACHMENT_UNUSED) {
subpass->has_resolve = true;
+ pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask;
+ }
}
}
if (desc->pDepthStencilAttachment) {
subpass->depth_stencil_attachment =
*desc->pDepthStencilAttachment;
+ if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED)
+ pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask;
} else {
subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_pipeline.c mesa-17.3.3/src/amd/vulkan/radv_pipeline.c
--- mesa-17.2.4/src/amd/vulkan/radv_pipeline.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_pipeline.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,17 +27,19 @@
#include "util/mesa-sha1.h"
#include "util/u_atomic.h"
+#include "radv_debug.h"
#include "radv_private.h"
+#include "radv_shader.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "spirv/nir_spirv.h"
+#include "vk_util.h"
#include
#include
#include "sid.h"
#include "gfx9d.h"
-#include "r600d_common.h"
#include "ac_binary.h"
#include "ac_llvm_util.h"
#include "ac_nir_to_llvm.h"
@@ -45,74 +47,6 @@
#include "util/debug.h"
#include "ac_exp_param.h"
-void radv_shader_variant_destroy(struct radv_device *device,
- struct radv_shader_variant *variant);
-
-static const struct nir_shader_compiler_options nir_options = {
- .vertex_id_zero_based = true,
- .lower_scmp = true,
- .lower_flrp32 = true,
- .lower_fsat = true,
- .lower_fdiv = true,
- .lower_sub = true,
- .lower_pack_snorm_2x16 = true,
- .lower_pack_snorm_4x8 = true,
- .lower_pack_unorm_2x16 = true,
- .lower_pack_unorm_4x8 = true,
- .lower_unpack_snorm_2x16 = true,
- .lower_unpack_snorm_4x8 = true,
- .lower_unpack_unorm_2x16 = true,
- .lower_unpack_unorm_4x8 = true,
- .lower_extract_byte = true,
- .lower_extract_word = true,
- .lower_ffma = true,
- .max_unroll_iterations = 32
-};
-
-VkResult radv_CreateShaderModule(
- VkDevice _device,
- const VkShaderModuleCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkShaderModule* pShaderModule)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_shader_module *module;
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
- assert(pCreateInfo->flags == 0);
-
- module = vk_alloc2(&device->alloc, pAllocator,
- sizeof(*module) + pCreateInfo->codeSize, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (module == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
- module->nir = NULL;
- module->size = pCreateInfo->codeSize;
- memcpy(module->data, pCreateInfo->pCode, module->size);
-
- _mesa_sha1_compute(module->data, module->size, module->sha1);
-
- *pShaderModule = radv_shader_module_to_handle(module);
-
- return VK_SUCCESS;
-}
-
-void radv_DestroyShaderModule(
- VkDevice _device,
- VkShaderModule _module,
- const VkAllocationCallbacks* pAllocator)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_shader_module, module, _module);
-
- if (!module)
- return;
-
- vk_free2(&device->alloc, pAllocator, module);
-}
-
-
static void
radv_pipeline_destroy(struct radv_device *device,
struct radv_pipeline *pipeline,
@@ -142,610 +76,27 @@
radv_pipeline_destroy(device, pipeline, pAllocator);
}
-
-static void
-radv_optimize_nir(struct nir_shader *shader)
-{
- bool progress;
-
- do {
- progress = false;
-
- NIR_PASS_V(shader, nir_lower_vars_to_ssa);
- NIR_PASS_V(shader, nir_lower_64bit_pack);
- NIR_PASS_V(shader, nir_lower_alu_to_scalar);
- NIR_PASS_V(shader, nir_lower_phis_to_scalar);
-
- NIR_PASS(progress, shader, nir_copy_prop);
- NIR_PASS(progress, shader, nir_opt_remove_phis);
- NIR_PASS(progress, shader, nir_opt_dce);
- if (nir_opt_trivial_continues(shader)) {
- progress = true;
- NIR_PASS(progress, shader, nir_copy_prop);
- NIR_PASS(progress, shader, nir_opt_remove_phis);
- NIR_PASS(progress, shader, nir_opt_dce);
- }
- NIR_PASS(progress, shader, nir_opt_if);
- NIR_PASS(progress, shader, nir_opt_dead_cf);
- NIR_PASS(progress, shader, nir_opt_cse);
- NIR_PASS(progress, shader, nir_opt_peephole_select, 8);
- NIR_PASS(progress, shader, nir_opt_algebraic);
- NIR_PASS(progress, shader, nir_opt_constant_folding);
- NIR_PASS(progress, shader, nir_opt_undef);
- NIR_PASS(progress, shader, nir_opt_conditional_discard);
- if (shader->options->max_unroll_iterations) {
- NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
- }
- } while (progress);
-}
-
-static nir_shader *
-radv_shader_compile_to_nir(struct radv_device *device,
- struct radv_shader_module *module,
- const char *entrypoint_name,
- gl_shader_stage stage,
- const VkSpecializationInfo *spec_info,
- bool dump)
-{
- if (strcmp(entrypoint_name, "main") != 0) {
- radv_finishme("Multiple shaders per module not really supported");
- }
-
- nir_shader *nir;
- nir_function *entry_point;
- if (module->nir) {
- /* Some things such as our meta clear/blit code will give us a NIR
- * shader directly. In that case, we just ignore the SPIR-V entirely
- * and just use the NIR shader */
- nir = module->nir;
- nir->options = &nir_options;
- nir_validate_shader(nir);
-
- assert(exec_list_length(&nir->functions) == 1);
- struct exec_node *node = exec_list_get_head(&nir->functions);
- entry_point = exec_node_data(nir_function, node, node);
- } else {
- uint32_t *spirv = (uint32_t *) module->data;
- assert(module->size % 4 == 0);
-
- uint32_t num_spec_entries = 0;
- struct nir_spirv_specialization *spec_entries = NULL;
- if (spec_info && spec_info->mapEntryCount > 0) {
- num_spec_entries = spec_info->mapEntryCount;
- spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
- for (uint32_t i = 0; i < num_spec_entries; i++) {
- VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
- const void *data = spec_info->pData + entry.offset;
- assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
-
- spec_entries[i].id = spec_info->pMapEntries[i].constantID;
- if (spec_info->dataSize == 8)
- spec_entries[i].data64 = *(const uint64_t *)data;
- else
- spec_entries[i].data32 = *(const uint32_t *)data;
- }
- }
- const struct nir_spirv_supported_extensions supported_ext = {
- .draw_parameters = true,
- .float64 = true,
- .image_read_without_format = true,
- .image_write_without_format = true,
- .tessellation = true,
- .int64 = true,
- .variable_pointers = true,
- };
- entry_point = spirv_to_nir(spirv, module->size / 4,
- spec_entries, num_spec_entries,
- stage, entrypoint_name, &supported_ext, &nir_options);
- nir = entry_point->shader;
- assert(nir->stage == stage);
- nir_validate_shader(nir);
-
- free(spec_entries);
-
- /* We have to lower away local constant initializers right before we
- * inline functions. That way they get properly initialized at the top
- * of the function and not at the top of its caller.
- */
- NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
- NIR_PASS_V(nir, nir_lower_returns);
- NIR_PASS_V(nir, nir_inline_functions);
-
- /* Pick off the single entrypoint that we want */
- foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
- if (func != entry_point)
- exec_node_remove(&func->node);
- }
- assert(exec_list_length(&nir->functions) == 1);
- entry_point->name = ralloc_strdup(entry_point, "main");
-
- NIR_PASS_V(nir, nir_remove_dead_variables,
- nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
-
- /* Now that we've deleted all but the main function, we can go ahead and
- * lower the rest of the constant initializers.
- */
- NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
- NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
- }
-
- /* Vulkan uses the separate-shader linking model */
- nir->info.separate_shader = true;
-
- nir_shader_gather_info(nir, entry_point->impl);
-
- nir_variable_mode indirect_mask = 0;
- indirect_mask |= nir_var_shader_in;
- indirect_mask |= nir_var_local;
-
- nir_lower_indirect_derefs(nir, indirect_mask);
-
- static const nir_lower_tex_options tex_options = {
- .lower_txp = ~0,
- };
-
- nir_lower_tex(nir, &tex_options);
-
- nir_lower_vars_to_ssa(nir);
- nir_lower_var_copies(nir);
- nir_lower_global_vars_to_local(nir);
- nir_remove_dead_variables(nir, nir_var_local);
- radv_optimize_nir(nir);
-
- if (dump)
- nir_print_shader(nir, stderr);
-
- return nir;
-}
-
-static const char *radv_get_shader_name(struct radv_shader_variant *var,
- gl_shader_stage stage)
-{
- switch (stage) {
- case MESA_SHADER_VERTEX: return var->info.vs.as_ls ? "Vertex Shader as LS" : var->info.vs.as_es ? "Vertex Shader as ES" : "Vertex Shader as VS";
- case MESA_SHADER_GEOMETRY: return "Geometry Shader";
- case MESA_SHADER_FRAGMENT: return "Pixel Shader";
- case MESA_SHADER_COMPUTE: return "Compute Shader";
- case MESA_SHADER_TESS_CTRL: return "Tessellation Control Shader";
- case MESA_SHADER_TESS_EVAL: return var->info.tes.as_es ? "Tessellation Evaluation Shader as ES" : "Tessellation Evaluation Shader as VS";
- default:
- return "Unknown shader";
- };
-
-}
static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
{
- unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
- struct radv_shader_variant *var;
- struct ac_shader_config *conf;
int i;
- FILE *file = stderr;
- unsigned max_simd_waves = 10;
- unsigned lds_per_wave = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
if (!pipeline->shaders[i])
continue;
- var = pipeline->shaders[i];
-
- conf = &var->config;
- if (i == MESA_SHADER_FRAGMENT) {
- lds_per_wave = conf->lds_size * lds_increment +
- align(var->info.fs.num_interp * 48, lds_increment);
- }
-
- if (conf->num_sgprs) {
- if (device->physical_device->rad_info.chip_class >= VI)
- max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
- else
- max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
- }
-
- if (conf->num_vgprs)
- max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
-
- /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
- * that PS can use.
- */
- if (lds_per_wave)
- max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
-
- fprintf(file, "\n%s:\n",
- radv_get_shader_name(var, i));
- if (i == MESA_SHADER_FRAGMENT) {
- fprintf(file, "*** SHADER CONFIG ***\n"
- "SPI_PS_INPUT_ADDR = 0x%04x\n"
- "SPI_PS_INPUT_ENA = 0x%04x\n",
- conf->spi_ps_input_addr, conf->spi_ps_input_ena);
- }
- fprintf(file, "*** SHADER STATS ***\n"
- "SGPRS: %d\n"
- "VGPRS: %d\n"
- "Spilled SGPRs: %d\n"
- "Spilled VGPRs: %d\n"
- "Code Size: %d bytes\n"
- "LDS: %d blocks\n"
- "Scratch: %d bytes per wave\n"
- "Max Waves: %d\n"
- "********************\n\n\n",
- conf->num_sgprs, conf->num_vgprs,
- conf->spilled_sgprs, conf->spilled_vgprs, var->code_size,
- conf->lds_size, conf->scratch_bytes_per_wave,
- max_simd_waves);
+ radv_shader_dump_stats(device, pipeline->shaders[i], i, stderr);
}
}
-void radv_shader_variant_destroy(struct radv_device *device,
- struct radv_shader_variant *variant)
+static uint32_t get_hash_flags(struct radv_device *device)
{
- if (!p_atomic_dec_zero(&variant->ref_count))
- return;
-
- device->ws->buffer_destroy(variant->bo);
- free(variant);
-}
-
-static void radv_fill_shader_variant(struct radv_device *device,
- struct radv_shader_variant *variant,
- struct ac_shader_binary *binary,
- gl_shader_stage stage)
-{
- bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
- unsigned vgpr_comp_cnt = 0;
-
- if (scratch_enabled && !device->llvm_supports_spill)
- radv_finishme("shader scratch support only available with LLVM 4.0");
-
- variant->code_size = binary->code_size;
- variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
- S_00B12C_SCRATCH_EN(scratch_enabled);
-
- switch (stage) {
- case MESA_SHADER_TESS_EVAL:
- vgpr_comp_cnt = 3;
- /* fallthrough */
- case MESA_SHADER_TESS_CTRL:
- variant->rsrc2 |= S_00B42C_OC_LDS_EN(1);
- break;
- case MESA_SHADER_VERTEX:
- case MESA_SHADER_GEOMETRY:
- vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt;
- break;
- case MESA_SHADER_FRAGMENT:
- break;
- case MESA_SHADER_COMPUTE:
- variant->rsrc2 |=
- S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
- S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) |
- S_00B84C_TG_SIZE_EN(1) |
- S_00B84C_LDS_SIZE(variant->config.lds_size);
- break;
- default:
- unreachable("unsupported shader type");
- break;
- }
-
- variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) |
- S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) |
- S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
- S_00B848_DX10_CLAMP(1) |
- S_00B848_FLOAT_MODE(variant->config.float_mode);
-
- variant->bo = device->ws->buffer_create(device->ws, binary->code_size, 256,
- RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
-
- void *ptr = device->ws->buffer_map(variant->bo);
- memcpy(ptr, binary->code, binary->code_size);
- device->ws->buffer_unmap(variant->bo);
-
-
-}
+ uint32_t hash_flags = 0;
-static struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
- struct nir_shader *shader,
- struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key,
- void** code_out,
- unsigned *code_size_out,
- bool dump)
-{
- struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
- enum radeon_family chip_family = device->physical_device->rad_info.family;
- LLVMTargetMachineRef tm;
- if (!variant)
- return NULL;
-
- struct ac_nir_compiler_options options = {0};
- options.layout = layout;
- if (key)
- options.key = *key;
-
- struct ac_shader_binary binary;
- enum ac_target_machine_options tm_options = 0;
- options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
- options.family = chip_family;
- options.chip_class = device->physical_device->rad_info.chip_class;
- options.supports_spill = device->llvm_supports_spill;
- if (options.supports_spill)
- tm_options |= AC_TM_SUPPORTS_SPILL;
+ if (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH)
+ hash_flags |= RADV_HASH_SHADER_UNSAFE_MATH;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
- tm_options |= AC_TM_SISCHED;
- tm = ac_create_target_machine(chip_family, tm_options);
- ac_compile_nir_shader(tm, &binary, &variant->config,
- &variant->info, shader, &options, dump);
- LLVMDisposeTargetMachine(tm);
-
- radv_fill_shader_variant(device, variant, &binary, shader->stage);
-
- if (code_out) {
- *code_out = binary.code;
- *code_size_out = binary.code_size;
- } else
- free(binary.code);
- free(binary.config);
- free(binary.rodata);
- free(binary.global_symbol_offsets);
- free(binary.relocs);
- free(binary.disasm_string);
- variant->ref_count = 1;
- return variant;
-}
-
-static struct radv_shader_variant *
-radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
- struct nir_shader *nir,
- void** code_out,
- unsigned *code_size_out,
- bool dump_shader)
-{
- struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
- enum radeon_family chip_family = pipeline->device->physical_device->rad_info.family;
- LLVMTargetMachineRef tm;
- if (!variant)
- return NULL;
-
- struct ac_nir_compiler_options options = {0};
- struct ac_shader_binary binary;
- enum ac_target_machine_options tm_options = 0;
- options.family = chip_family;
- options.chip_class = pipeline->device->physical_device->rad_info.chip_class;
- if (options.supports_spill)
- tm_options |= AC_TM_SUPPORTS_SPILL;
- if (pipeline->device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
- tm_options |= AC_TM_SISCHED;
- tm = ac_create_target_machine(chip_family, tm_options);
- ac_create_gs_copy_shader(tm, nir, &binary, &variant->config, &variant->info, &options, dump_shader);
- LLVMDisposeTargetMachine(tm);
-
- radv_fill_shader_variant(pipeline->device, variant, &binary, MESA_SHADER_VERTEX);
-
- if (code_out) {
- *code_out = binary.code;
- *code_size_out = binary.code_size;
- } else
- free(binary.code);
- free(binary.config);
- free(binary.rodata);
- free(binary.global_symbol_offsets);
- free(binary.relocs);
- free(binary.disasm_string);
- variant->ref_count = 1;
- return variant;
-}
-
-static struct radv_shader_variant *
-radv_pipeline_compile(struct radv_pipeline *pipeline,
- struct radv_pipeline_cache *cache,
- struct radv_shader_module *module,
- const char *entrypoint,
- gl_shader_stage stage,
- const VkSpecializationInfo *spec_info,
- struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key)
-{
- unsigned char sha1[20];
- unsigned char gs_copy_sha1[20];
- struct radv_shader_variant *variant;
- nir_shader *nir;
- void *code = NULL;
- unsigned code_size = 0;
- bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
-
- if (module->nir)
- _mesa_sha1_compute(module->nir->info.name,
- strlen(module->nir->info.name),
- module->sha1);
-
- radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key, 0);
- if (stage == MESA_SHADER_GEOMETRY)
- radv_hash_shader(gs_copy_sha1, module, entrypoint, spec_info,
- layout, key, 1);
-
- variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device,
- cache,
- sha1);
-
- if (stage == MESA_SHADER_GEOMETRY) {
- pipeline->gs_copy_shader =
- radv_create_shader_variant_from_pipeline_cache(
- pipeline->device,
- cache,
- gs_copy_sha1);
- }
-
- if (variant &&
- (stage != MESA_SHADER_GEOMETRY || pipeline->gs_copy_shader))
- return variant;
-
- nir = radv_shader_compile_to_nir(pipeline->device,
- module, entrypoint, stage,
- spec_info, dump);
- if (nir == NULL)
- return NULL;
-
- if (!variant) {
- variant = radv_shader_variant_create(pipeline->device, nir,
- layout, key, &code,
- &code_size, dump);
- }
-
- if (stage == MESA_SHADER_GEOMETRY && !pipeline->gs_copy_shader) {
- void *gs_copy_code = NULL;
- unsigned gs_copy_code_size = 0;
- pipeline->gs_copy_shader = radv_pipeline_create_gs_copy_shader(
- pipeline, nir, &gs_copy_code, &gs_copy_code_size, dump);
-
- if (pipeline->gs_copy_shader) {
- pipeline->gs_copy_shader =
- radv_pipeline_cache_insert_shader(cache,
- gs_copy_sha1,
- pipeline->gs_copy_shader,
- gs_copy_code,
- gs_copy_code_size);
- }
- }
- if (!module->nir)
- ralloc_free(nir);
-
- if (variant)
- variant = radv_pipeline_cache_insert_shader(cache, sha1, variant,
- code, code_size);
-
- if (code)
- free(code);
- return variant;
-}
-
-static union ac_shader_variant_key
-radv_compute_tes_key(bool as_es, bool export_prim_id)
-{
- union ac_shader_variant_key key;
- memset(&key, 0, sizeof(key));
- key.tes.as_es = as_es;
- /* export prim id only happens when no geom shader */
- if (!as_es)
- key.tes.export_prim_id = export_prim_id;
- return key;
-}
-
-static union ac_shader_variant_key
-radv_compute_tcs_key(unsigned primitive_mode, unsigned input_vertices)
-{
- union ac_shader_variant_key key;
- memset(&key, 0, sizeof(key));
- key.tcs.primitive_mode = primitive_mode;
- key.tcs.input_vertices = input_vertices;
- return key;
-}
-
-static void
-radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
- struct radv_pipeline_cache *cache,
- struct radv_shader_module *tcs_module,
- struct radv_shader_module *tes_module,
- const char *tcs_entrypoint,
- const char *tes_entrypoint,
- const VkSpecializationInfo *tcs_spec_info,
- const VkSpecializationInfo *tes_spec_info,
- struct radv_pipeline_layout *layout,
- unsigned input_vertices)
-{
- unsigned char tcs_sha1[20], tes_sha1[20];
- struct radv_shader_variant *tes_variant = NULL, *tcs_variant = NULL;
- nir_shader *tes_nir, *tcs_nir;
- void *tes_code = NULL, *tcs_code = NULL;
- unsigned tes_code_size = 0, tcs_code_size = 0;
- union ac_shader_variant_key tes_key;
- union ac_shader_variant_key tcs_key;
- bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
-
- tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline),
- pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input);
- if (tes_module->nir)
- _mesa_sha1_compute(tes_module->nir->info.name,
- strlen(tes_module->nir->info.name),
- tes_module->sha1);
- radv_hash_shader(tes_sha1, tes_module, tes_entrypoint, tes_spec_info, layout, &tes_key, 0);
-
- tes_variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device,
- cache,
- tes_sha1);
-
- if (tes_variant) {
- tcs_key = radv_compute_tcs_key(tes_variant->info.tes.primitive_mode, input_vertices);
-
- if (tcs_module->nir)
- _mesa_sha1_compute(tcs_module->nir->info.name,
- strlen(tcs_module->nir->info.name),
- tcs_module->sha1);
-
- radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
-
- tcs_variant = radv_create_shader_variant_from_pipeline_cache(pipeline->device,
- cache,
- tcs_sha1);
- }
-
- if (tcs_variant && tes_variant) {
- pipeline->shaders[MESA_SHADER_TESS_CTRL] = tcs_variant;
- pipeline->shaders[MESA_SHADER_TESS_EVAL] = tes_variant;
- return;
- }
-
- tes_nir = radv_shader_compile_to_nir(pipeline->device,
- tes_module, tes_entrypoint, MESA_SHADER_TESS_EVAL,
- tes_spec_info, dump);
- if (tes_nir == NULL)
- return;
-
- tcs_nir = radv_shader_compile_to_nir(pipeline->device,
- tcs_module, tcs_entrypoint, MESA_SHADER_TESS_CTRL,
- tcs_spec_info, dump);
- if (tcs_nir == NULL)
- return;
-
- nir_lower_tes_patch_vertices(tes_nir,
- tcs_nir->info.tess.tcs_vertices_out);
-
- tes_variant = radv_shader_variant_create(pipeline->device, tes_nir,
- layout, &tes_key, &tes_code,
- &tes_code_size, dump);
-
- tcs_key = radv_compute_tcs_key(tes_nir->info.tess.primitive_mode, input_vertices);
- if (tcs_module->nir)
- _mesa_sha1_compute(tcs_module->nir->info.name,
- strlen(tcs_module->nir->info.name),
- tcs_module->sha1);
-
- radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
-
- tcs_variant = radv_shader_variant_create(pipeline->device, tcs_nir,
- layout, &tcs_key, &tcs_code,
- &tcs_code_size, dump);
-
- if (!tes_module->nir)
- ralloc_free(tes_nir);
-
- if (!tcs_module->nir)
- ralloc_free(tcs_nir);
-
- if (tes_variant)
- tes_variant = radv_pipeline_cache_insert_shader(cache, tes_sha1, tes_variant,
- tes_code, tes_code_size);
-
- if (tcs_variant)
- tcs_variant = radv_pipeline_cache_insert_shader(cache, tcs_sha1, tcs_variant,
- tcs_code, tcs_code_size);
-
- if (tes_code)
- free(tes_code);
- if (tcs_code)
- free(tcs_code);
- pipeline->shaders[MESA_SHADER_TESS_CTRL] = tcs_variant;
- pipeline->shaders[MESA_SHADER_TESS_EVAL] = tes_variant;
- return;
+ hash_flags |= RADV_HASH_SHADER_SISCHED;
+ return hash_flags;
}
static VkResult
@@ -1195,6 +546,7 @@
const struct radv_graphics_pipeline_create_info *extra)
{
const VkPipelineColorBlendStateCreateInfo *vkblend = pCreateInfo->pColorBlendState;
+ const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
struct radv_blend_state *blend = &pipeline->graphics.blend;
unsigned mode = V_028808_CB_NORMAL;
uint32_t blend_enable = 0, blend_need_alpha = 0;
@@ -1220,6 +572,10 @@
S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
S_028B70_ALPHA_TO_MASK_OFFSET3(2);
+ if (vkms && vkms->alphaToCoverageEnable) {
+ blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+ }
+
blend->cb_target_mask = 0;
for (i = 0; i < vkblend->attachmentCount; i++) {
const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i];
@@ -1372,7 +728,6 @@
const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
struct radv_depth_stencil_state *ds = &pipeline->graphics.ds;
- memset(ds, 0, sizeof(*ds));
if (!vkds)
return;
@@ -1439,8 +794,6 @@
const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
struct radv_raster_state *raster = &pipeline->graphics.raster;
- memset(raster, 0, sizeof(*raster));
-
raster->spi_interp_control =
S_0286D4_FLAT_SHADE_ENA(1) |
S_0286D4_PNT_SPRITE_ENA(1) |
@@ -1481,7 +834,6 @@
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
- struct radv_blend_state *blend = &pipeline->graphics.blend;
struct radv_multisample_state *ms = &pipeline->graphics.ms;
unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
int ps_iter_samples = 1;
@@ -1494,7 +846,7 @@
if (vkms && vkms->sampleShadingEnable) {
ps_iter_samples = ceil(vkms->minSampleShading * ms->num_samples);
- } else if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.force_persample) {
+ } else if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.force_persample) {
ps_iter_samples = ms->num_samples;
}
@@ -1510,8 +862,8 @@
S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
- EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
- EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
+ S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
+ S_028A4C_FORCE_EOV_REZ_ENABLE(1);
ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pipeline->device->physical_device->rad_info.chip_class >= GFX9);
if (ms->num_samples > 1) {
@@ -1526,15 +878,18 @@
ms->pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(radv_cayman_get_maxdist(log_samples)) |
S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); /* CM_R_028BE0_PA_SC_AA_CONFIG */
- ms->pa_sc_mode_cntl_1 |= EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
+ ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
}
- if (vkms) {
- if (vkms->alphaToCoverageEnable)
- blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+ const struct VkPipelineRasterizationStateRasterizationOrderAMD *raster_order =
+ vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD);
+ if (raster_order && raster_order->rasterizationOrder == VK_RASTERIZATION_ORDER_RELAXED_AMD) {
+ ms->pa_sc_mode_cntl_1 |= S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(1) |
+ S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7);
+ }
- if (vkms->pSampleMask)
- mask = vkms->pSampleMask[0] & 0xffff;
+ if (vkms && vkms->pSampleMask) {
+ mask = vkms->pSampleMask[0] & 0xffff;
}
ms->pa_sc_aa_mask[0] = mask | (mask << 16);
@@ -1785,10 +1140,10 @@
pipeline->dynamic_state_mask = states;
}
-static union ac_shader_variant_key
-radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls, bool export_prim_id)
+static struct ac_shader_variant_key
+radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls)
{
- union ac_shader_variant_key key;
+ struct ac_shader_variant_key key;
const VkPipelineVertexInputStateCreateInfo *input_state =
pCreateInfo->pVertexInputState;
@@ -1796,7 +1151,6 @@
key.vs.instance_rate_inputs = 0;
key.vs.as_es = as_es;
key.vs.as_ls = as_ls;
- key.vs.export_prim_id = export_prim_id;
for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
unsigned binding;
@@ -1807,6 +1161,121 @@
return key;
}
+
+static void calculate_gfx9_gs_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ struct radv_pipeline *pipeline)
+{
+ struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
+ struct ac_es_output_info *es_info = radv_pipeline_has_tess(pipeline) ?
+ &gs_info->tes.es_info : &gs_info->vs.es_info;
+ unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
+ bool uses_adjacency;
+ switch(pCreateInfo->pInputAssemblyState->topology) {
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ uses_adjacency = true;
+ break;
+ default:
+ uses_adjacency = false;
+ break;
+ }
+
+ /* All these are in dwords: */
+ /* We can't allow using the whole LDS, because GS waves compete with
+ * other shader stages for LDS space. */
+ const unsigned max_lds_size = 8 * 1024;
+ const unsigned esgs_itemsize = es_info->esgs_itemsize / 4;
+ unsigned esgs_lds_size;
+
+ /* All these are per subgroup: */
+ const unsigned max_out_prims = 32 * 1024;
+ const unsigned max_es_verts = 255;
+ const unsigned ideal_gs_prims = 64;
+ unsigned max_gs_prims, gs_prims;
+ unsigned min_es_verts, es_verts, worst_case_es_verts;
+
+ if (uses_adjacency || gs_num_invocations > 1)
+ max_gs_prims = 127 / gs_num_invocations;
+ else
+ max_gs_prims = 255;
+
+ /* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations.
+ * Make sure we don't go over the maximum value.
+ */
+ if (gs_info->gs.vertices_out > 0) {
+ max_gs_prims = MIN2(max_gs_prims,
+ max_out_prims /
+ (gs_info->gs.vertices_out * gs_num_invocations));
+ }
+ assert(max_gs_prims > 0);
+
+ /* If the primitive has adjacency, halve the number of vertices
+ * that will be reused in multiple primitives.
+ */
+ min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1);
+
+ gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
+ worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
+
+ /* Compute ESGS LDS size based on the worst case number of ES vertices
+ * needed to create the target number of GS prims per subgroup.
+ */
+ esgs_lds_size = esgs_itemsize * worst_case_es_verts;
+
+ /* If total LDS usage is too big, refactor partitions based on ratio
+ * of ESGS item sizes.
+ */
+ if (esgs_lds_size > max_lds_size) {
+ /* Our target GS Prims Per Subgroup was too large. Calculate
+ * the maximum number of GS Prims Per Subgroup that will fit
+ * into LDS, capped by the maximum that the hardware can support.
+ */
+ gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)),
+ max_gs_prims);
+ assert(gs_prims > 0);
+ worst_case_es_verts = MIN2(min_es_verts * gs_prims,
+ max_es_verts);
+
+ esgs_lds_size = esgs_itemsize * worst_case_es_verts;
+ assert(esgs_lds_size <= max_lds_size);
+ }
+
+ /* Now calculate remaining ESGS information. */
+ if (esgs_lds_size)
+ es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts);
+ else
+ es_verts = max_es_verts;
+
+ /* Vertices for adjacency primitives are not always reused, so restore
+ * it for ES_VERTS_PER_SUBGRP.
+ */
+ min_es_verts = gs_info->gs.vertices_in;
+
+ /* For normal primitives, the VGT only checks if they are past the ES
+ * verts per subgroup after allocating a full GS primitive and if they
+ * are, kick off a new subgroup. But if those additional ES verts are
+ * unique (e.g. not reused) we need to make sure there is enough LDS
+ * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP.
+ */
+ es_verts -= min_es_verts - 1;
+
+ uint32_t es_verts_per_subgroup = es_verts;
+ uint32_t gs_prims_per_subgroup = gs_prims;
+ uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
+ uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
+ pipeline->graphics.gs.lds_size = align(esgs_lds_size, 128) / 128;
+ pipeline->graphics.gs.vgt_gs_onchip_cntl =
+ S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
+ S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
+ S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
+ pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup =
+ S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
+ pipeline->graphics.gs.vgt_esgs_ring_itemsize = esgs_itemsize;
+ assert(max_prims_per_subgroup <= max_out_prims);
+}
+
static void
calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
{
@@ -1819,9 +1288,13 @@
/* The maximum size is 63.999 MB per SE. */
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
- struct ac_es_output_info *es_info = radv_pipeline_has_tess(pipeline) ?
- &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.es_info :
- &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
+ struct ac_es_output_info *es_info;
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
+ es_info = radv_pipeline_has_tess(pipeline) ? &gs_info->tes.es_info : &gs_info->vs.es_info;
+ else
+ es_info = radv_pipeline_has_tess(pipeline) ?
+ &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.es_info :
+ &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
/* Calculate the minimum size. */
unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse *
@@ -1836,7 +1309,9 @@
esgs_ring_size = align(esgs_ring_size, alignment);
gsvs_ring_size = align(gsvs_ring_size, alignment);
- pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
+ if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+ pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
+
pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
}
@@ -1853,6 +1328,24 @@
*lds_size = MAX2(*lds_size, 8);
}
+struct radv_shader_variant *
+radv_get_vertex_shader(struct radv_pipeline *pipeline)
+{
+ if (pipeline->shaders[MESA_SHADER_VERTEX])
+ return pipeline->shaders[MESA_SHADER_VERTEX];
+ if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
+ return pipeline->shaders[MESA_SHADER_TESS_CTRL];
+ return pipeline->shaders[MESA_SHADER_GEOMETRY];
+}
+
+static struct radv_shader_variant *
+radv_get_tess_eval_shader(struct radv_pipeline *pipeline)
+{
+ if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+ return pipeline->shaders[MESA_SHADER_TESS_EVAL];
+ return pipeline->shaders[MESA_SHADER_GEOMETRY];
+}
+
static void
calculate_tess_state(struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
@@ -1869,7 +1362,7 @@
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
- num_tcs_inputs = util_last_bit64(pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outputs_written);
+ num_tcs_inputs = util_last_bit64(radv_get_vertex_shader(pipeline)->info.vs.outputs_written);
num_tcs_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written); //tcs->outputs_written
num_tcs_output_cp = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; //TCS VERTICES OUT
@@ -1945,7 +1438,7 @@
tess->num_patches = num_patches;
tess->num_tcs_input_cp = num_tcs_input_cp;
- struct radv_shader_variant *tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+ struct radv_shader_variant *tes = radv_get_tess_eval_shader(pipeline);
unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
switch (tes->info.tes.primitive_mode) {
@@ -1974,14 +1467,22 @@
break;
}
+ bool ccw = tes->info.tes.ccw;
+ const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state =
+ vk_find_struct_const(pCreateInfo->pTessellationState,
+ PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR);
+
+ if (domain_origin_state && domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR)
+ ccw = !ccw;
+
if (tes->info.tes.point_mode)
topology = V_028B6C_OUTPUT_POINT;
else if (tes->info.tes.primitive_mode == GL_ISOLINES)
topology = V_028B6C_OUTPUT_LINE;
- else if (tes->info.tes.ccw)
- topology = V_028B6C_OUTPUT_TRIANGLE_CW;
- else
+ else if (ccw)
topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
+ else
+ topology = V_028B6C_OUTPUT_TRIANGLE_CW;
if (pipeline->device->has_distributed_tess) {
if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
@@ -2016,7 +1517,8 @@
[V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
};
-static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)
+static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs,
+ enum chip_class chip_class)
{
unsigned gs_max_vert_out = gs->info.gs.vertices_out;
unsigned cut_mode;
@@ -2034,22 +1536,31 @@
return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
S_028A40_CUT_MODE(cut_mode)|
- S_028A40_ES_WRITE_OPTIMIZE(1) |
- S_028A40_GS_WRITE_OPTIMIZE(1);
+ S_028A40_ES_WRITE_OPTIMIZE(chip_class <= VI) |
+ S_028A40_GS_WRITE_OPTIMIZE(1) |
+ S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
}
-static void calculate_vgt_gs_mode(struct radv_pipeline *pipeline)
+static struct ac_vs_output_info *get_vs_output_info(struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *vs;
- vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX]);
+ if (radv_pipeline_has_gs(pipeline))
+ return &pipeline->gs_copy_shader->info.vs.outinfo;
+ else if (radv_pipeline_has_tess(pipeline))
+ return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo;
+ else
+ return &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outinfo;
+}
- struct ac_vs_output_info *outinfo = &vs->info.vs.outinfo;
+static void calculate_vgt_gs_mode(struct radv_pipeline *pipeline)
+{
+ struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
pipeline->graphics.vgt_primitiveid_en = false;
pipeline->graphics.vgt_gs_mode = 0;
if (radv_pipeline_has_gs(pipeline)) {
- pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
+ pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY],
+ pipeline->device->physical_device->rad_info.chip_class);
} else if (outinfo->export_prim_id) {
pipeline->graphics.vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
pipeline->graphics.vgt_primitiveid_en = true;
@@ -2058,10 +1569,7 @@
static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *vs;
- vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX]);
-
- struct ac_vs_output_info *outinfo = &vs->info.vs.outinfo;
+ struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
unsigned clip_dist_mask, cull_dist_mask, total_mask;
clip_dist_mask = outinfo->clip_dist_mask;
@@ -2104,13 +1612,10 @@
static void calculate_ps_inputs(struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *ps, *vs;
- struct ac_vs_output_info *outinfo;
+ struct radv_shader_variant *ps;
+ struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX]);
-
- outinfo = &vs->info.vs.outinfo;
unsigned ps_offset = 0;
@@ -2124,10 +1629,11 @@
if (ps->info.fs.layer_input) {
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
- if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED)
pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
- ++ps_offset;
- }
+ else
+ pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
+ ++ps_offset;
}
if (ps->info.fs.has_pcoord) {
@@ -2159,7 +1665,268 @@
pipeline->graphics.ps_input_cntl_num = ps_offset;
}
-VkResult
+static void
+radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
+{
+ nir_shader* ordered_shaders[MESA_SHADER_STAGES];
+ int shader_count = 0;
+
+ if(shaders[MESA_SHADER_FRAGMENT]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_FRAGMENT];
+ }
+ if(shaders[MESA_SHADER_GEOMETRY]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_GEOMETRY];
+ }
+ if(shaders[MESA_SHADER_TESS_EVAL]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_EVAL];
+ }
+ if(shaders[MESA_SHADER_TESS_CTRL]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_CTRL];
+ }
+ if(shaders[MESA_SHADER_VERTEX]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX];
+ }
+
+ for (int i = 1; i < shader_count; ++i) {
+ nir_remove_dead_variables(ordered_shaders[i],
+ nir_var_shader_out);
+ nir_remove_dead_variables(ordered_shaders[i - 1],
+ nir_var_shader_in);
+
+ bool progress = nir_remove_unused_varyings(ordered_shaders[i],
+ ordered_shaders[i - 1]);
+
+ if (progress) {
+ nir_lower_global_vars_to_local(ordered_shaders[i]);
+ radv_optimize_nir(ordered_shaders[i]);
+ nir_lower_global_vars_to_local(ordered_shaders[i - 1]);
+ radv_optimize_nir(ordered_shaders[i - 1]);
+ }
+ }
+}
+
+static void
+merge_tess_info(struct shader_info *tes_info,
+ const struct shader_info *tcs_info)
+{
+ /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
+ *
+ * "PointMode. Controls generation of points rather than triangles
+ * or lines. This functionality defaults to disabled, and is
+ * enabled if either shader stage includes the execution mode.
+ *
+ * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
+ * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
+ * and OutputVertices, it says:
+ *
+ * "One mode must be set in at least one of the tessellation
+ * shader stages."
+ *
+ * So, the fields can be set in either the TCS or TES, but they must
+ * agree if set in both. Our backend looks at TES, so bitwise-or in
+ * the values from the TCS.
+ */
+ assert(tcs_info->tess.tcs_vertices_out == 0 ||
+ tes_info->tess.tcs_vertices_out == 0 ||
+ tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
+ tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
+
+ assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+ tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+ tcs_info->tess.spacing == tes_info->tess.spacing);
+ tes_info->tess.spacing |= tcs_info->tess.spacing;
+
+ assert(tcs_info->tess.primitive_mode == 0 ||
+ tes_info->tess.primitive_mode == 0 ||
+ tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
+ tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
+ tes_info->tess.ccw |= tcs_info->tess.ccw;
+ tes_info->tess.point_mode |= tcs_info->tess.point_mode;
+}
+
+static
+void radv_create_shaders(struct radv_pipeline *pipeline,
+ struct radv_device *device,
+ struct radv_pipeline_cache *cache,
+ struct ac_shader_variant_key *keys,
+ const VkPipelineShaderStageCreateInfo **pStages)
+{
+ struct radv_shader_module fs_m = {0};
+ struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
+ nir_shader *nir[MESA_SHADER_STAGES] = {0};
+ void *codes[MESA_SHADER_STAGES] = {0};
+ unsigned code_sizes[MESA_SHADER_STAGES] = {0};
+ unsigned char hash[20], gs_copy_hash[20];
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (pStages[i]) {
+ modules[i] = radv_shader_module_from_handle(pStages[i]->module);
+ if (modules[i]->nir)
+ _mesa_sha1_compute(modules[i]->nir->info.name,
+ strlen(modules[i]->nir->info.name),
+ modules[i]->sha1);
+ }
+ }
+
+ radv_hash_shaders(hash, pStages, pipeline->layout, keys, get_hash_flags(device));
+ memcpy(gs_copy_hash, hash, 20);
+ gs_copy_hash[0] ^= 1;
+
+ if (modules[MESA_SHADER_GEOMETRY]) {
+ struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
+ radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants);
+ pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
+ }
+
+ if (radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders) &&
+ (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (pipeline->shaders[i])
+ pipeline->active_stages |= mesa_to_vk_shader_stage(i);
+ }
+ return;
+ }
+
+ if (!modules[MESA_SHADER_FRAGMENT] && !modules[MESA_SHADER_COMPUTE]) {
+ nir_builder fs_b;
+ nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
+ fs_m.nir = fs_b.shader;
+ modules[MESA_SHADER_FRAGMENT] = &fs_m;
+ }
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+ const VkPipelineShaderStageCreateInfo *stage = pStages[i];
+
+ if (!modules[i])
+ continue;
+
+ nir[i] = radv_shader_compile_to_nir(device, modules[i],
+ stage ? stage->pName : "main", i,
+ stage ? stage->pSpecializationInfo : NULL);
+ pipeline->active_stages |= mesa_to_vk_shader_stage(i);
+ /* We don't want to alter meta shaders IR directly so clone it
+ * first.
+ */
+ if (nir[i]->info.name) {
+ nir[i] = nir_shader_clone(NULL, nir[i]);
+ }
+
+ }
+
+ if (nir[MESA_SHADER_TESS_CTRL]) {
+ /* TODO: This is no longer used as a key we should refactor this */
+ if (keys)
+ keys[MESA_SHADER_TESS_CTRL].tcs.primitive_mode = nir[MESA_SHADER_TESS_EVAL]->info.tess.primitive_mode;
+
+ keys[MESA_SHADER_TESS_CTRL].tcs.tes_reads_tess_factors = !!(nir[MESA_SHADER_TESS_EVAL]->info.inputs_read & (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER));
+ nir_lower_tes_patch_vertices(nir[MESA_SHADER_TESS_EVAL], nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out);
+ merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
+ }
+
+ radv_link_shaders(pipeline, nir);
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
+ continue;
+
+ if (modules[i])
+ nir_print_shader(nir[i], stderr);
+ }
+
+ if (nir[MESA_SHADER_FRAGMENT]) {
+ if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
+ pipeline->shaders[MESA_SHADER_FRAGMENT] =
+ radv_shader_variant_create(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
+ pipeline->layout, keys ? keys + MESA_SHADER_FRAGMENT : 0,
+ &codes[MESA_SHADER_FRAGMENT], &code_sizes[MESA_SHADER_FRAGMENT]);
+ }
+
+ /* TODO: These are no longer used as keys we should refactor this */
+ if (keys) {
+ keys[MESA_SHADER_VERTEX].vs.export_prim_id =
+ pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input;
+ keys[MESA_SHADER_TESS_EVAL].tes.export_prim_id =
+ pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input;
+ }
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) {
+ if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) {
+ struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
+ struct ac_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
+ key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+ pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_create(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
+ pipeline->layout,
+ &key, &codes[MESA_SHADER_TESS_CTRL],
+ &code_sizes[MESA_SHADER_TESS_CTRL]);
+ }
+ modules[MESA_SHADER_VERTEX] = NULL;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
+ gl_shader_stage pre_stage = modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
+ struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+ pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_create(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
+ pipeline->layout,
+ &keys[pre_stage] , &codes[MESA_SHADER_GEOMETRY],
+ &code_sizes[MESA_SHADER_GEOMETRY]);
+ }
+ modules[pre_stage] = NULL;
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if(modules[i] && !pipeline->shaders[i]) {
+ pipeline->shaders[i] = radv_shader_variant_create(device, modules[i], &nir[i], 1,
+ pipeline->layout,
+ keys ? keys + i : 0, &codes[i],
+ &code_sizes[i]);
+ }
+ }
+
+ if(modules[MESA_SHADER_GEOMETRY]) {
+ void *gs_copy_code = NULL;
+ unsigned gs_copy_code_size = 0;
+ if (!pipeline->gs_copy_shader) {
+ pipeline->gs_copy_shader = radv_create_gs_copy_shader(
+ device, nir[MESA_SHADER_GEOMETRY], &gs_copy_code,
+ &gs_copy_code_size,
+ keys[MESA_SHADER_GEOMETRY].has_multiview_view_index);
+ }
+
+ if (pipeline->gs_copy_shader) {
+ void *code[MESA_SHADER_STAGES] = {0};
+ unsigned code_size[MESA_SHADER_STAGES] = {0};
+ struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
+
+ code[MESA_SHADER_GEOMETRY] = gs_copy_code;
+ code_size[MESA_SHADER_GEOMETRY] = gs_copy_code_size;
+ variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
+
+ radv_pipeline_cache_insert_shaders(device, cache,
+ gs_copy_hash,
+ variants,
+ (const void**)code,
+ code_size);
+ }
+ free(gs_copy_code);
+ }
+
+ radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders,
+ (const void**)codes, code_sizes);
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ free(codes[i]);
+ if (modules[i] && !pipeline->device->trace_bo)
+ ralloc_free(nir[i]);
+ }
+
+ if (fs_m.nir)
+ ralloc_free(fs_m.nir);
+}
+
+static VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
struct radv_device *device,
struct radv_pipeline_cache *cache,
@@ -2167,9 +1934,13 @@
const struct radv_graphics_pipeline_create_info *extra,
const VkAllocationCallbacks *alloc)
{
- struct radv_shader_module fs_m = {0};
VkResult result;
+ bool has_view_index = false;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ if (subpass->view_mask)
+ has_view_index = true;
if (alloc == NULL)
alloc = &device->alloc;
@@ -2177,97 +1948,53 @@
pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
radv_pipeline_init_dynamic_state(pipeline, pCreateInfo);
+ radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
+
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
- struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
pStages[stage] = &pCreateInfo->pStages[i];
- modules[stage] = radv_shader_module_from_handle(pStages[stage]->module);
}
- radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
-
- if (!modules[MESA_SHADER_FRAGMENT]) {
- nir_builder fs_b;
- nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
- fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
- fs_m.nir = fs_b.shader;
- modules[MESA_SHADER_FRAGMENT] = &fs_m;
- }
-
- if (modules[MESA_SHADER_FRAGMENT]) {
- union ac_shader_variant_key key = {0};
- key.fs.col_format = pipeline->graphics.blend.spi_shader_col_format;
-
- if (pipeline->device->physical_device->rad_info.chip_class < VI)
- radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.fs.is_int8, &key.fs.is_int10);
-
- const VkPipelineShaderStageCreateInfo *stage = pStages[MESA_SHADER_FRAGMENT];
-
- pipeline->shaders[MESA_SHADER_FRAGMENT] =
- radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_FRAGMENT],
- stage ? stage->pName : "main",
- MESA_SHADER_FRAGMENT,
- stage ? stage->pSpecializationInfo : NULL,
- pipeline->layout, &key);
- pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT);
- }
+ struct ac_shader_variant_key keys[MESA_SHADER_STAGES];
+ memset(keys, 0, sizeof(keys));
- if (fs_m.nir)
- ralloc_free(fs_m.nir);
-
- if (modules[MESA_SHADER_VERTEX]) {
+ if (pStages[MESA_SHADER_VERTEX]) {
bool as_es = false;
bool as_ls = false;
- bool export_prim_id = false;
- if (modules[MESA_SHADER_TESS_CTRL])
+ if (pStages[MESA_SHADER_TESS_CTRL])
as_ls = true;
- else if (modules[MESA_SHADER_GEOMETRY])
+ else if (pStages[MESA_SHADER_GEOMETRY])
as_es = true;
- else if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
- export_prim_id = true;
- union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls, export_prim_id);
-
- pipeline->shaders[MESA_SHADER_VERTEX] =
- radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_VERTEX],
- pStages[MESA_SHADER_VERTEX]->pName,
- MESA_SHADER_VERTEX,
- pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
- pipeline->layout, &key);
- pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
+ keys[MESA_SHADER_VERTEX] = radv_compute_vs_key(pCreateInfo, as_es, as_ls);
+ keys[MESA_SHADER_VERTEX].has_multiview_view_index = has_view_index;
}
- if (modules[MESA_SHADER_GEOMETRY]) {
- union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false, false);
+ if (pStages[MESA_SHADER_TESS_EVAL]) {
+ keys[MESA_SHADER_TESS_EVAL].has_multiview_view_index = has_view_index;
+ if (pStages[MESA_SHADER_GEOMETRY])
+ keys[MESA_SHADER_TESS_EVAL].tes.as_es = true;
+ }
- pipeline->shaders[MESA_SHADER_GEOMETRY] =
- radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_GEOMETRY],
- pStages[MESA_SHADER_GEOMETRY]->pName,
- MESA_SHADER_GEOMETRY,
- pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo,
- pipeline->layout, &key);
-
- pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY);
- }
-
- if (modules[MESA_SHADER_TESS_EVAL]) {
- assert(modules[MESA_SHADER_TESS_CTRL]);
-
- radv_tess_pipeline_compile(pipeline,
- cache,
- modules[MESA_SHADER_TESS_CTRL],
- modules[MESA_SHADER_TESS_EVAL],
- pStages[MESA_SHADER_TESS_CTRL]->pName,
- pStages[MESA_SHADER_TESS_EVAL]->pName,
- pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
- pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo,
- pipeline->layout,
- pCreateInfo->pTessellationState->patchControlPoints);
- pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_TESS_EVAL) |
- mesa_to_vk_shader_stage(MESA_SHADER_TESS_CTRL);
+ if (pCreateInfo->pTessellationState)
+ keys[MESA_SHADER_TESS_CTRL].tcs.input_vertices = pCreateInfo->pTessellationState->patchControlPoints;
+
+ if (pStages[MESA_SHADER_GEOMETRY]) {
+ keys[MESA_SHADER_GEOMETRY] = radv_compute_vs_key(pCreateInfo, false, false);
+ keys[MESA_SHADER_GEOMETRY].has_multiview_view_index = has_view_index;
}
+ if (pCreateInfo->pMultisampleState &&
+ pCreateInfo->pMultisampleState->rasterizationSamples > 1)
+ keys[MESA_SHADER_FRAGMENT].fs.multisample = true;
+
+ keys[MESA_SHADER_FRAGMENT].fs.col_format = pipeline->graphics.blend.spi_shader_col_format;
+ if (pipeline->device->physical_device->rad_info.chip_class < VI)
+ radv_pipeline_compute_get_int_clamp(pCreateInfo, &keys[MESA_SHADER_FRAGMENT].fs.is_int8, &keys[MESA_SHADER_FRAGMENT].fs.is_int10);
+
+ radv_create_shaders(pipeline, device, cache, keys, pStages);
+
radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo, extra);
radv_pipeline_init_raster_state(pipeline, pCreateInfo);
radv_pipeline_init_multisample_state(pipeline, pCreateInfo);
@@ -2307,7 +2034,7 @@
!ps->info.fs.writes_sample_mask)
pipeline->graphics.blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
}
-
+
unsigned z_order;
pipeline->graphics.db_shader_control = 0;
if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory)
@@ -2366,8 +2093,11 @@
pipeline->graphics.vgt_shader_stages_en = stages;
- if (radv_pipeline_has_gs(pipeline))
+ if (radv_pipeline_has_gs(pipeline)) {
calculate_gs_ring_sizes(pipeline);
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ calculate_gfx9_gs_info(pCreateInfo, pipeline);
+ }
if (radv_pipeline_has_tess(pipeline)) {
if (pipeline->graphics.prim == V_008958_DI_PT_PATCH) {
@@ -2377,8 +2107,90 @@
calculate_tess_state(pipeline, pCreateInfo);
}
+ if (radv_pipeline_has_tess(pipeline))
+ pipeline->graphics.primgroup_size = pipeline->graphics.tess.num_patches;
+ else if (radv_pipeline_has_gs(pipeline))
+ pipeline->graphics.primgroup_size = 64;
+ else
+ pipeline->graphics.primgroup_size = 128; /* recommended without a GS */
+
+ pipeline->graphics.partial_es_wave = false;
+ if (pipeline->device->has_distributed_tess) {
+ if (radv_pipeline_has_gs(pipeline)) {
+ if (device->physical_device->rad_info.chip_class <= VI)
+ pipeline->graphics.partial_es_wave = true;
+ }
+ }
+ /* GS requirement. */
+ if (SI_GS_PER_ES / pipeline->graphics.primgroup_size >= pipeline->device->gs_table_depth - 3)
+ pipeline->graphics.partial_es_wave = true;
+
+ pipeline->graphics.wd_switch_on_eop = false;
+ if (device->physical_device->rad_info.chip_class >= CIK) {
+ unsigned prim = pipeline->graphics.prim;
+ /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
+ * 4 shader engines. Set 1 to pass the assertion below.
+ * The other cases are hardware requirements. */
+ if (device->physical_device->rad_info.max_se < 4 ||
+ prim == V_008958_DI_PT_POLYGON ||
+ prim == V_008958_DI_PT_LINELOOP ||
+ prim == V_008958_DI_PT_TRIFAN ||
+ prim == V_008958_DI_PT_TRISTRIP_ADJ ||
+ (pipeline->graphics.prim_restart_enable &&
+ (device->physical_device->rad_info.family < CHIP_POLARIS10 ||
+ (prim != V_008958_DI_PT_POINTLIST &&
+ prim != V_008958_DI_PT_LINESTRIP &&
+ prim != V_008958_DI_PT_TRISTRIP))))
+ pipeline->graphics.wd_switch_on_eop = true;
+ }
+
+ pipeline->graphics.ia_switch_on_eoi = false;
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
+ pipeline->graphics.ia_switch_on_eoi = true;
+ if (radv_pipeline_has_gs(pipeline) &&
+ pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.uses_prim_id)
+ pipeline->graphics.ia_switch_on_eoi = true;
+ if (radv_pipeline_has_tess(pipeline)) {
+ /* SWITCH_ON_EOI must be set if PrimID is used. */
+ if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id ||
+ radv_get_tess_eval_shader(pipeline)->info.tes.uses_prim_id)
+ pipeline->graphics.ia_switch_on_eoi = true;
+ }
+
+ pipeline->graphics.partial_vs_wave = false;
+ if (radv_pipeline_has_tess(pipeline)) {
+ /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
+ if ((device->physical_device->rad_info.family == CHIP_TAHITI ||
+ device->physical_device->rad_info.family == CHIP_PITCAIRN ||
+ device->physical_device->rad_info.family == CHIP_BONAIRE) &&
+ radv_pipeline_has_gs(pipeline))
+ pipeline->graphics.partial_vs_wave = true;
+ /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
+ if (device->has_distributed_tess) {
+ if (radv_pipeline_has_gs(pipeline)) {
+ if (device->physical_device->rad_info.family == CHIP_TONGA ||
+ device->physical_device->rad_info.family == CHIP_FIJI ||
+ device->physical_device->rad_info.family == CHIP_POLARIS10 ||
+ device->physical_device->rad_info.family == CHIP_POLARIS11 ||
+ device->physical_device->rad_info.family == CHIP_POLARIS12)
+ pipeline->graphics.partial_vs_wave = true;
+ } else {
+ pipeline->graphics.partial_vs_wave = true;
+ }
+ }
+ }
+
+ pipeline->graphics.base_ia_multi_vgt_param =
+ S_028AA8_PRIMGROUP_SIZE(pipeline->graphics.primgroup_size - 1) |
+ /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == VI ? 2 : 0) |
+ S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
+ S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
+
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
+ struct radv_vertex_elements_info *velems = &pipeline->vertex_elements;
+
for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
const VkVertexInputAttributeDescription *desc =
&vi_info->pVertexAttributeDescriptions[i];
@@ -2392,16 +2204,16 @@
num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
- pipeline->va_rsrc_word3[loc] = S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) |
+ velems->rsrc_word3[loc] = S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(si_map_swizzle(format_desc->swizzle[1])) |
S_008F0C_DST_SEL_Z(si_map_swizzle(format_desc->swizzle[2])) |
S_008F0C_DST_SEL_W(si_map_swizzle(format_desc->swizzle[3])) |
S_008F0C_NUM_FORMAT(num_format) |
S_008F0C_DATA_FORMAT(data_format);
- pipeline->va_format_size[loc] = format_desc->block.bits / 8;
- pipeline->va_offset[loc] = desc->offset;
- pipeline->va_binding[loc] = desc->binding;
- pipeline->num_vertex_attribs = MAX2(pipeline->num_vertex_attribs, loc + 1);
+ velems->format_size[loc] = format_desc->block.bits / 8;
+ velems->offset[loc] = desc->offset;
+ velems->binding[loc] = desc->binding;
+ velems->count = MAX2(velems->count, loc + 1);
}
for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
@@ -2414,14 +2226,21 @@
struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
AC_UD_VS_BASE_VERTEX_START_INSTANCE);
if (loc->sgpr_idx != -1) {
- pipeline->graphics.vtx_base_sgpr = radv_shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+ pipeline->graphics.vtx_base_sgpr = radv_shader_stage_to_user_data_0(MESA_SHADER_VERTEX, device->physical_device->rad_info.chip_class, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
- if (pipeline->shaders[MESA_SHADER_VERTEX]->info.info.vs.needs_draw_id)
+ if (radv_get_vertex_shader(pipeline)->info.info.vs.needs_draw_id)
pipeline->graphics.vtx_emit_num = 3;
else
pipeline->graphics.vtx_emit_num = 2;
}
- if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
+
+ pipeline->graphics.vtx_reuse_depth = 30;
+ if (radv_pipeline_has_tess(pipeline) &&
+ radv_get_tess_eval_shader(pipeline)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
+ pipeline->graphics.vtx_reuse_depth = 14;
+ }
+
+ if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
radv_dump_pipeline_stats(device, pipeline);
}
@@ -2496,7 +2315,7 @@
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
- RADV_FROM_HANDLE(radv_shader_module, module, pCreateInfo->stage.module);
+ const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
struct radv_pipeline *pipeline;
VkResult result;
@@ -2509,12 +2328,8 @@
pipeline->device = device;
pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
- pipeline->shaders[MESA_SHADER_COMPUTE] =
- radv_pipeline_compile(pipeline, cache, module,
- pCreateInfo->stage.pName,
- MESA_SHADER_COMPUTE,
- pCreateInfo->stage.pSpecializationInfo,
- pipeline->layout, NULL);
+ pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
+ radv_create_shaders(pipeline, device, cache, NULL, pStages);
pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
@@ -2526,7 +2341,7 @@
*pPipeline = radv_pipeline_to_handle(pipeline);
- if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
+ if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
radv_dump_pipeline_stats(device, pipeline);
}
return VK_SUCCESS;
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_pipeline_cache.c mesa-17.3.3/src/amd/vulkan/radv_pipeline_cache.c
--- mesa-17.2.4/src/amd/vulkan/radv_pipeline_cache.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_pipeline_cache.c 2018-01-18 21:30:28.000000000 +0000
@@ -23,22 +23,28 @@
#include "util/mesa-sha1.h"
#include "util/debug.h"
+#include "util/disk_cache.h"
#include "util/u_atomic.h"
+#include "radv_debug.h"
#include "radv_private.h"
+#include "radv_shader.h"
#include "ac_nir_to_llvm.h"
+struct cache_entry_variant_info {
+ struct ac_shader_variant_info variant_info;
+ struct ac_shader_config config;
+ uint32_t rsrc1, rsrc2;
+};
+
struct cache_entry {
union {
unsigned char sha1[20];
uint32_t sha1_dw[5];
};
- uint32_t code_size;
- struct ac_shader_variant_info variant_info;
- struct ac_shader_config config;
- uint32_t rsrc1, rsrc2;
- struct radv_shader_variant *variant;
- uint32_t code[0];
+ uint32_t code_sizes[MESA_SHADER_STAGES];
+ struct radv_shader_variant *variants[MESA_SHADER_STAGES];
+ char code[0];
};
void
@@ -58,7 +64,7 @@
/* We don't consider allocation failure fatal, we just start with a 0-sized
* cache. */
if (cache->hash_table == NULL ||
- (device->debug_flags & RADV_DEBUG_NO_CACHE))
+ (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
cache->table_size = 0;
else
memset(cache->hash_table, 0, byte_size);
@@ -69,9 +75,11 @@
{
for (unsigned i = 0; i < cache->table_size; ++i)
if (cache->hash_table[i]) {
- if (cache->hash_table[i]->variant)
- radv_shader_variant_destroy(cache->device,
- cache->hash_table[i]->variant);
+ for(int j = 0; j < MESA_SHADER_STAGES; ++j) {
+ if (cache->hash_table[i]->variants[j])
+ radv_shader_variant_destroy(cache->device,
+ cache->hash_table[i]->variants[j]);
+ }
vk_free(&cache->alloc, cache->hash_table[i]);
}
pthread_mutex_destroy(&cache->mutex);
@@ -81,32 +89,43 @@
static uint32_t
entry_size(struct cache_entry *entry)
{
- return sizeof(*entry) + entry->code_size;
+ size_t ret = sizeof(*entry);
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (entry->code_sizes[i])
+ ret += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
+ return ret;
}
void
-radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
- const char *entrypoint,
- const VkSpecializationInfo *spec_info,
- const struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key,
- uint32_t is_geom_copy_shader)
+radv_hash_shaders(unsigned char *hash,
+ const VkPipelineShaderStageCreateInfo **stages,
+ const struct radv_pipeline_layout *layout,
+ const struct ac_shader_variant_key *keys,
+ uint32_t flags)
{
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
- if (key)
- _mesa_sha1_update(&ctx, key, sizeof(*key));
- _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
- _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
+ if (keys)
+ _mesa_sha1_update(&ctx, keys, sizeof(*keys) * MESA_SHADER_STAGES);
if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
- if (spec_info) {
- _mesa_sha1_update(&ctx, spec_info->pMapEntries,
- spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
- _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (stages[i]) {
+ RADV_FROM_HANDLE(radv_shader_module, module, stages[i]->module);
+ const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
+
+ _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
+ _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
+ if (spec_info) {
+ _mesa_sha1_update(&ctx, spec_info->pMapEntries,
+ spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
+ _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
+ }
+ }
}
- _mesa_sha1_update(&ctx, &is_geom_copy_shader, 4);
+ _mesa_sha1_update(&ctx, &flags, 4);
_mesa_sha1_final(&ctx, hash);
}
@@ -151,48 +170,6 @@
return entry;
}
-struct radv_shader_variant *
-radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const unsigned char *sha1)
-{
- struct cache_entry *entry = NULL;
-
- if (cache)
- entry = radv_pipeline_cache_search(cache, sha1);
-
- if (!entry)
- return NULL;
-
- if (!entry->variant) {
- struct radv_shader_variant *variant;
-
- variant = calloc(1, sizeof(struct radv_shader_variant));
- if (!variant)
- return NULL;
-
- variant->config = entry->config;
- variant->info = entry->variant_info;
- variant->rsrc1 = entry->rsrc1;
- variant->rsrc2 = entry->rsrc2;
- variant->code_size = entry->code_size;
- variant->ref_count = 1;
-
- variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256,
- RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
-
- void *ptr = device->ws->buffer_map(variant->bo);
- memcpy(ptr, entry->code, entry->code_size);
- device->ws->buffer_unmap(variant->bo);
-
- entry->variant = variant;
- }
-
- p_atomic_inc(&entry->variant->ref_count);
- return entry->variant;
-}
-
-
static void
radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
struct cache_entry *entry)
@@ -262,51 +239,188 @@
radv_pipeline_cache_set_entry(cache, entry);
}
-struct radv_shader_variant *
-radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
- const unsigned char *sha1,
- struct radv_shader_variant *variant,
- const void *code, unsigned code_size)
+bool
+radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
+ struct radv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct radv_shader_variant **variants)
{
+ struct cache_entry *entry;
+
if (!cache)
- return variant;
+ cache = device->mem_cache;
+
+ pthread_mutex_lock(&cache->mutex);
+
+ entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+
+ if (!entry) {
+ if (!device->physical_device->disk_cache ||
+ (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
+ pthread_mutex_unlock(&cache->mutex);
+ return false;
+ }
+
+ uint8_t disk_sha1[20];
+ disk_cache_compute_key(device->physical_device->disk_cache,
+ sha1, 20, disk_sha1);
+ entry = (struct cache_entry *)
+ disk_cache_get(device->physical_device->disk_cache,
+ disk_sha1, NULL);
+ if (!entry) {
+ pthread_mutex_unlock(&cache->mutex);
+ return false;
+ } else {
+ size_t size = entry_size(entry);
+ struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+ if (!new_entry) {
+ free(entry);
+ pthread_mutex_unlock(&cache->mutex);
+ return false;
+ }
+
+ memcpy(new_entry, entry, entry_size(entry));
+ free(entry);
+ entry = new_entry;
+
+ radv_pipeline_cache_add_entry(cache, new_entry);
+ }
+ }
+
+ char *p = entry->code;
+ for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!entry->variants[i] && entry->code_sizes[i]) {
+ struct radv_shader_variant *variant;
+ struct cache_entry_variant_info info;
+
+ variant = calloc(1, sizeof(struct radv_shader_variant));
+ if (!variant) {
+ pthread_mutex_unlock(&cache->mutex);
+ return false;
+ }
+
+ memcpy(&info, p, sizeof(struct cache_entry_variant_info));
+ p += sizeof(struct cache_entry_variant_info);
+
+ variant->config = info.config;
+ variant->info = info.variant_info;
+ variant->rsrc1 = info.rsrc1;
+ variant->rsrc2 = info.rsrc2;
+ variant->code_size = entry->code_sizes[i];
+ variant->ref_count = 1;
+
+ void *ptr = radv_alloc_shader_memory(device, variant);
+ memcpy(ptr, p, entry->code_sizes[i]);
+ p += entry->code_sizes[i];
+
+ entry->variants[i] = variant;
+ } else if (entry->code_sizes[i]) {
+ p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
+ }
+
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (entry->variants[i])
+ p_atomic_inc(&entry->variants[i]->ref_count);
+
+ memcpy(variants, entry->variants, sizeof(entry->variants));
+ pthread_mutex_unlock(&cache->mutex);
+ return true;
+}
+
+void
+radv_pipeline_cache_insert_shaders(struct radv_device *device,
+ struct radv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct radv_shader_variant **variants,
+ const void *const *codes,
+ const unsigned *code_sizes)
+{
+ if (!cache)
+ cache = device->mem_cache;
pthread_mutex_lock(&cache->mutex);
struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
if (entry) {
- if (entry->variant) {
- radv_shader_variant_destroy(cache->device, variant);
- variant = entry->variant;
- } else {
- entry->variant = variant;
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (entry->variants[i]) {
+ radv_shader_variant_destroy(cache->device, variants[i]);
+ variants[i] = entry->variants[i];
+ } else {
+ entry->variants[i] = variants[i];
+ }
+ if (variants[i])
+ p_atomic_inc(&variants[i]->ref_count);
}
- p_atomic_inc(&variant->ref_count);
pthread_mutex_unlock(&cache->mutex);
- return variant;
+ return;
}
+ size_t size = sizeof(*entry);
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (variants[i])
+ size += sizeof(struct cache_entry_variant_info) + code_sizes[i];
+
- entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
+ entry = vk_alloc(&cache->alloc, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
if (!entry) {
pthread_mutex_unlock(&cache->mutex);
- return variant;
+ return;
}
+ memset(entry, 0, sizeof(*entry));
memcpy(entry->sha1, sha1, 20);
- memcpy(entry->code, code, code_size);
- entry->config = variant->config;
- entry->variant_info = variant->info;
- entry->rsrc1 = variant->rsrc1;
- entry->rsrc2 = variant->rsrc2;
- entry->code_size = code_size;
- entry->variant = variant;
- p_atomic_inc(&variant->ref_count);
+
+ char* p = entry->code;
+ struct cache_entry_variant_info info;
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!variants[i])
+ continue;
+
+ entry->code_sizes[i] = code_sizes[i];
+
+ info.config = variants[i]->config;
+ info.variant_info = variants[i]->info;
+ info.rsrc1 = variants[i]->rsrc1;
+ info.rsrc2 = variants[i]->rsrc2;
+ memcpy(p, &info, sizeof(struct cache_entry_variant_info));
+ p += sizeof(struct cache_entry_variant_info);
+
+ memcpy(p, codes[i], code_sizes[i]);
+ p += code_sizes[i];
+ }
+
+ /* Always add cache items to disk. This will allow collection of
+ * compiled shaders by third parties such as steam, even if the app
+ * implements its own pipeline cache.
+ */
+ if (device->physical_device->disk_cache) {
+ uint8_t disk_sha1[20];
+ disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20,
+ disk_sha1);
+ disk_cache_put(device->physical_device->disk_cache,
+ disk_sha1, entry, entry_size(entry), NULL);
+ }
+
+ /* We delay setting the variant so we have reproducible disk cache
+ * items.
+ */
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!variants[i])
+ continue;
+
+ entry->variants[i] = variants[i];
+ p_atomic_inc(&variants[i]->ref_count);
+ }
radv_pipeline_cache_add_entry(cache, entry);
cache->modified = true;
pthread_mutex_unlock(&cache->mutex);
- return variant;
+ return;
}
struct cache_header {
@@ -331,11 +445,11 @@
return;
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
return;
- if (header.vendor_id != 0x1002)
+ if (header.vendor_id != ATI_VENDOR_ID)
return;
if (header.device_id != device->physical_device->rad_info.pci_id)
return;
- if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0)
+ if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
return;
char *end = (void *) data + size;
@@ -344,17 +458,19 @@
while (end - p >= sizeof(struct cache_entry)) {
struct cache_entry *entry = (struct cache_entry*)p;
struct cache_entry *dest_entry;
- if(end - p < sizeof(*entry) + entry->code_size)
+ size_t size = entry_size(entry);
+ if(end - p < size)
break;
- dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
+ dest_entry = vk_alloc(&cache->alloc, size,
8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
if (dest_entry) {
- memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
- dest_entry->variant = NULL;
+ memcpy(dest_entry, entry, size);
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ dest_entry->variants[i] = NULL;
radv_pipeline_cache_add_entry(cache, dest_entry);
}
- p += sizeof (*entry) + entry->code_size;
+ p += size;
}
}
@@ -419,12 +535,17 @@
RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
struct cache_header *header;
VkResult result = VK_SUCCESS;
+
+ pthread_mutex_lock(&cache->mutex);
+
const size_t size = sizeof(*header) + cache->total_size;
if (pData == NULL) {
+ pthread_mutex_unlock(&cache->mutex);
*pDataSize = size;
return VK_SUCCESS;
}
if (*pDataSize < sizeof(*header)) {
+ pthread_mutex_unlock(&cache->mutex);
*pDataSize = 0;
return VK_INCOMPLETE;
}
@@ -432,9 +553,9 @@
header = p;
header->header_size = sizeof(*header);
header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
- header->vendor_id = 0x1002;
+ header->vendor_id = ATI_VENDOR_ID;
header->device_id = device->physical_device->rad_info.pci_id;
- memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE);
+ memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
p += header->header_size;
struct cache_entry *entry;
@@ -449,11 +570,13 @@
}
memcpy(p, entry, size);
- ((struct cache_entry*)p)->variant = NULL;
+ for(int j = 0; j < MESA_SHADER_STAGES; ++j)
+ ((struct cache_entry*)p)->variants[j] = NULL;
p += size;
}
*pDataSize = p - pData;
+ pthread_mutex_unlock(&cache->mutex);
return result;
}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_private.h mesa-17.3.3/src/amd/vulkan/radv_private.h
--- mesa-17.2.4/src/amd/vulkan/radv_private.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_private.h 2018-01-18 21:30:28.000000000 +0000
@@ -55,7 +55,6 @@
#include "ac_nir_to_llvm.h"
#include "ac_gpu_info.h"
#include "ac_surface.h"
-#include "radv_debug.h"
#include "radv_descriptor_set.h"
#include
@@ -75,6 +74,8 @@
#include "wsi_common.h"
+#define ATI_VENDOR_ID 0x1002
+
#define MAX_VBS 32
#define MAX_VERTEX_ATTRIBS 32
#define MAX_RTS 8
@@ -86,6 +87,7 @@
#define MAX_SAMPLES_LOG2 4
#define NUM_META_FS_KEYS 13
#define RADV_MAX_DRM_DEVICES 8
+#define MAX_VIEWS 8
#define NUM_DEPTH_CLEAR_PIPELINES 3
@@ -104,6 +106,11 @@
RADV_MEM_TYPE_COUNT
};
+enum radv_mem_flags_bits {
+ /* enable implicit synchronization when accessing the underlying bo */
+ RADV_MEM_IMPLICIT_SYNC = 1 << 0,
+};
+
#define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
static inline uint32_t
@@ -195,8 +202,6 @@
memcpy((dest), (src), (count) * sizeof(*(src))); \
})
-#define zero(x) (memset(&(x), 0, sizeof(x)))
-
/* Whenever we generate an error, pass it through this function. Useful for
* debugging, where we can break on it. Only call at error site, not when
* propagating errors. Might be useful to plug in a stack trace here.
@@ -253,11 +258,6 @@
void *radv_lookup_entrypoint(const char *name);
-struct radv_extensions {
- VkExtensionProperties *ext_array;
- uint32_t num_ext;
-};
-
struct radv_physical_device {
VK_LOADER_DATA _loader_data;
@@ -266,16 +266,25 @@
struct radeon_winsys *ws;
struct radeon_info rad_info;
char path[20];
- const char * name;
- uint8_t uuid[VK_UUID_SIZE];
+ char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
+ uint8_t driver_uuid[VK_UUID_SIZE];
uint8_t device_uuid[VK_UUID_SIZE];
+ uint8_t cache_uuid[VK_UUID_SIZE];
int local_fd;
struct wsi_device wsi_device;
- struct radv_extensions extensions;
bool has_rbplus; /* if RB+ register exist */
bool rbplus_allowed; /* if RB+ is allowed */
+ bool has_clear_state;
+
+ /* This is the drivers on-disk cache used as a fallback as opposed to
+ * the pipeline cache defined by apps.
+ */
+ struct disk_cache * disk_cache;
+
+ VkPhysicalDeviceMemoryProperties memory_properties;
+ enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
};
struct radv_instance {
@@ -294,6 +303,11 @@
VkResult radv_init_wsi(struct radv_physical_device *physical_device);
void radv_finish_wsi(struct radv_physical_device *physical_device);
+bool radv_instance_extension_supported(const char *name);
+uint32_t radv_physical_device_api_version(struct radv_physical_device *dev);
+bool radv_physical_device_extension_supported(struct radv_physical_device *dev,
+ const char *name);
+
struct cache_entry;
struct radv_pipeline_cache {
@@ -318,19 +332,37 @@
radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
const void *data, size_t size);
-struct radv_shader_variant *
-radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const unsigned char *sha1);
-
-struct radv_shader_variant *
-radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
- const unsigned char *sha1,
- struct radv_shader_variant *variant,
- const void *code, unsigned code_size);
+struct radv_shader_variant;
+
+bool
+radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
+ struct radv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct radv_shader_variant **variants);
+
+void
+radv_pipeline_cache_insert_shaders(struct radv_device *device,
+ struct radv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct radv_shader_variant **variants,
+ const void *const *codes,
+ const unsigned *code_sizes);
+
+enum radv_blit_ds_layout {
+ RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
+ RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
+ RADV_BLIT_DS_LAYOUT_COUNT,
+};
+
+static inline enum radv_blit_ds_layout radv_meta_blit_ds_to_type(VkImageLayout layout)
+{
+ return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+}
-void radv_shader_variant_destroy(struct radv_device *device,
- struct radv_shader_variant *variant);
+static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
+{
+ return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
+}
struct radv_meta_state {
VkAllocationCallbacks alloc;
@@ -342,12 +374,12 @@
*/
struct {
VkRenderPass render_pass[NUM_META_FS_KEYS];
- struct radv_pipeline *color_pipelines[NUM_META_FS_KEYS];
+ VkPipeline color_pipelines[NUM_META_FS_KEYS];
VkRenderPass depthstencil_rp;
- struct radv_pipeline *depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
- struct radv_pipeline *stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
- struct radv_pipeline *depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
} clear[1 + MAX_SAMPLES_LOG2];
VkPipelineLayout clear_color_p_layout;
@@ -364,12 +396,12 @@
/** Pipeline that blits from a 3D image. */
VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
- VkRenderPass depth_only_rp;
+ VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
VkPipeline depth_only_1d_pipeline;
VkPipeline depth_only_2d_pipeline;
VkPipeline depth_only_3d_pipeline;
- VkRenderPass stencil_only_rp;
+ VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
VkPipeline stencil_only_1d_pipeline;
VkPipeline stencil_only_2d_pipeline;
VkPipeline stencil_only_3d_pipeline;
@@ -380,37 +412,40 @@
struct {
VkRenderPass render_passes[NUM_META_FS_KEYS];
- VkPipelineLayout p_layouts[2];
- VkDescriptorSetLayout ds_layouts[2];
- VkPipeline pipelines[2][NUM_META_FS_KEYS];
+ VkPipelineLayout p_layouts[3];
+ VkDescriptorSetLayout ds_layouts[3];
+ VkPipeline pipelines[3][NUM_META_FS_KEYS];
- VkRenderPass depth_only_rp;
- VkPipeline depth_only_pipeline[2];
+ VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+ VkPipeline depth_only_pipeline[3];
- VkRenderPass stencil_only_rp;
- VkPipeline stencil_only_pipeline[2];
+ VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+ VkPipeline stencil_only_pipeline[3];
} blit2d;
struct {
VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
+ VkPipeline pipeline_3d;
} itob;
struct {
- VkRenderPass render_pass;
VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
+ VkPipeline pipeline_3d;
} btoi;
struct {
VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
+ VkPipeline pipeline_3d;
} itoi;
struct {
VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
+ VkPipeline pipeline_3d;
} cleari;
struct {
@@ -480,6 +515,7 @@
VK_LOADER_DATA _loader_data;
struct radv_device * device;
struct radeon_winsys_ctx *hw_ctx;
+ enum radeon_ctx_priority priority;
int queue_family_index;
int queue_idx;
@@ -498,6 +534,7 @@
struct radeon_winsys_bo *tess_factor_ring_bo;
struct radeon_winsys_bo *tess_offchip_ring_bo;
struct radeon_winsys_cs *initial_preamble_cs;
+ struct radeon_winsys_cs *initial_full_flush_preamble_cs;
struct radeon_winsys_cs *continue_preamble_cs;
};
@@ -514,9 +551,6 @@
struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
int queue_count[RADV_MAX_QUEUE_FAMILIES];
struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
- struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
- struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
- uint64_t debug_flags;
bool llvm_supports_spill;
bool has_distributed_tess;
@@ -546,7 +580,17 @@
/* Backup in-memory cache to be used if the app doesn't provide one */
struct radv_pipeline_cache * mem_cache;
+ /*
+ * use different counters so MSAA MRTs get consecutive surface indices,
+ * even if MASK is allocated in between.
+ */
uint32_t image_mrt_offset_counter;
+ uint32_t fmask_mrt_offset_counter;
+ struct list_head shader_slabs;
+ mtx_t shader_slab_mutex;
+
+ /* For detecting VM faults reported by dmesg. */
+ uint64_t dmesg_timestamp;
};
struct radv_device_memory {
@@ -654,7 +698,7 @@
RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1,
RADV_CMD_DIRTY_PIPELINE = 1 << 9,
RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,
- RADV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,
+ RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 11,
};
typedef uint32_t radv_cmd_dirty_mask_t;
@@ -690,16 +734,20 @@
VkDeviceSize offset;
};
+struct radv_viewport_state {
+ uint32_t count;
+ VkViewport viewports[MAX_VIEWPORTS];
+};
+
+struct radv_scissor_state {
+ uint32_t count;
+ VkRect2D scissors[MAX_SCISSORS];
+};
+
struct radv_dynamic_state {
- struct {
- uint32_t count;
- VkViewport viewports[MAX_VIEWPORTS];
- } viewport;
+ struct radv_viewport_state viewport;
- struct {
- uint32_t count;
- VkRect2D scissors[MAX_SCISSORS];
- } scissor;
+ struct radv_scissor_state scissor;
float line_width;
@@ -734,9 +782,12 @@
extern const struct radv_dynamic_state default_dynamic_state;
-void radv_dynamic_state_copy(struct radv_dynamic_state *dest,
- const struct radv_dynamic_state *src,
- uint32_t copy_mask);
+const char *
+radv_get_debug_option_name(int id);
+
+const char *
+radv_get_perftest_option_name(int id);
+
/**
* Attachment state when recording a renderpass instance.
*
@@ -744,14 +795,16 @@
*/
struct radv_attachment_state {
VkImageAspectFlags pending_clear_aspects;
+ uint32_t cleared_views;
VkClearValue clear_value;
VkImageLayout current_layout;
};
struct radv_cmd_state {
- uint32_t vb_dirty;
+ bool vb_dirty;
radv_cmd_dirty_mask_t dirty;
bool push_descriptors_dirty;
+ bool predicating;
struct radv_pipeline * pipeline;
struct radv_pipeline * emitted_pipeline;
@@ -766,8 +819,8 @@
struct radv_attachment_state * attachments;
VkRect2D render_area;
uint32_t index_type;
- uint64_t index_va;
uint32_t max_index_count;
+ uint64_t index_va;
int32_t last_primitive_reset_en;
uint32_t last_primitive_reset_index;
enum radv_cmd_flush_bits flush_bits;
@@ -776,7 +829,6 @@
uint32_t descriptors_dirty;
uint32_t trace_id;
uint32_t last_ia_multi_vgt_param;
- bool predicating;
};
struct radv_cmd_pool {
@@ -823,7 +875,7 @@
bool tess_rings_needed;
bool sample_positions_needed;
- bool record_fail;
+ VkResult record_result;
int ring_offsets_idx; /* just used for verification */
uint32_t gfx9_fence_offset;
@@ -922,6 +974,12 @@
bool radv_get_memory_fd(struct radv_device *device,
struct radv_device_memory *memory,
int *pFD);
+VkResult radv_alloc_memory(VkDevice _device,
+ const VkMemoryAllocateInfo* pAllocateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ enum radv_mem_flags_bits flags,
+ VkDeviceMemory* pMem);
+
/*
* Takes x,y,z as exact numbers of invocations, instead of blocks.
*
@@ -939,24 +997,18 @@
uint64_t *map;
};
-struct nir_shader;
-
-struct radv_shader_module {
- struct nir_shader * nir;
- unsigned char sha1[20];
- uint32_t size;
- char data[0];
-};
-
-union ac_shader_variant_key;
+struct radv_shader_module;
+struct ac_shader_variant_key;
+#define RADV_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0)
+#define RADV_HASH_SHADER_SISCHED (1 << 1)
+#define RADV_HASH_SHADER_UNSAFE_MATH (1 << 2)
void
-radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
- const char *entrypoint,
- const VkSpecializationInfo *spec_info,
- const struct radv_pipeline_layout *layout,
- const union ac_shader_variant_key *key,
- uint32_t is_geom_copy_shader);
+radv_hash_shaders(unsigned char *hash,
+ const VkPipelineShaderStageCreateInfo **stages,
+ const struct radv_pipeline_layout *layout,
+ const struct ac_shader_variant_key *keys,
+ uint32_t flags);
static inline gl_shader_stage
vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
@@ -979,17 +1031,6 @@
stage = __builtin_ffs(__tmp) - 1, __tmp; \
__tmp &= ~(1 << (stage)))
-struct radv_shader_variant {
- uint32_t ref_count;
-
- struct radeon_winsys_bo *bo;
- struct ac_shader_config config;
- struct ac_shader_variant_info info;
- unsigned rsrc1;
- unsigned rsrc2;
- uint32_t code_size;
-};
-
struct radv_depth_stencil_state {
uint32_t db_depth_control;
uint32_t db_stencil_control;
@@ -1013,9 +1054,6 @@
struct radv_raster_state {
uint32_t pa_cl_clip_cntl;
uint32_t spi_interp_control;
- uint32_t pa_su_point_size;
- uint32_t pa_su_point_minmax;
- uint32_t pa_su_line_cntl;
uint32_t pa_su_vtx_cntl;
uint32_t pa_su_sc_mode_cntl;
};
@@ -1047,6 +1085,23 @@
uint32_t tf_param;
};
+struct radv_gs_state {
+ uint32_t vgt_gs_onchip_cntl;
+ uint32_t vgt_gs_max_prims_per_subgroup;
+ uint32_t vgt_esgs_ring_itemsize;
+ uint32_t lds_size;
+};
+
+struct radv_vertex_elements_info {
+ uint32_t rsrc_word3[MAX_VERTEX_ATTRIBS];
+ uint32_t format_size[MAX_VERTEX_ATTRIBS];
+ uint32_t binding[MAX_VERTEX_ATTRIBS];
+ uint32_t offset[MAX_VERTEX_ATTRIBS];
+ uint32_t count;
+};
+
+#define SI_GS_PER_ES 128
+
struct radv_pipeline {
struct radv_device * device;
uint32_t dynamic_state_mask;
@@ -1060,11 +1115,8 @@
struct radv_shader_variant *gs_copy_shader;
VkShaderStageFlags active_stages;
- uint32_t va_rsrc_word3[MAX_VERTEX_ATTRIBS];
- uint32_t va_format_size[MAX_VERTEX_ATTRIBS];
- uint32_t va_binding[MAX_VERTEX_ATTRIBS];
- uint32_t va_offset[MAX_VERTEX_ATTRIBS];
- uint32_t num_vertex_attribs;
+ struct radv_vertex_elements_info vertex_elements;
+
uint32_t binding_stride[MAX_VBS];
union {
@@ -1074,6 +1126,7 @@
struct radv_raster_state raster;
struct radv_multisample_state ms;
struct radv_tessellation_state tess;
+ struct radv_gs_state gs;
uint32_t db_shader_control;
uint32_t shader_z_format;
unsigned prim;
@@ -1081,6 +1134,8 @@
uint32_t vgt_gs_mode;
bool vgt_primitiveid_en;
bool prim_restart_enable;
+ bool partial_es_wave;
+ uint8_t primgroup_size;
unsigned esgs_ring_size;
unsigned gsvs_ring_size;
uint32_t ps_input_cntl[32];
@@ -1088,7 +1143,12 @@
uint32_t pa_cl_vs_out_cntl;
uint32_t vgt_shader_stages_en;
uint32_t vtx_base_sgpr;
+ uint32_t base_ia_multi_vgt_param;
+ bool wd_switch_on_eop;
+ bool ia_switch_on_eoi;
+ bool partial_vs_wave;
uint8_t vtx_emit_num;
+ uint32_t vtx_reuse_depth;
struct radv_prim_vertex_count prim_vertex_count;
bool can_use_guardband;
} graphics;
@@ -1105,14 +1165,15 @@
static inline bool radv_pipeline_has_tess(struct radv_pipeline *pipeline)
{
- return pipeline->shaders[MESA_SHADER_TESS_EVAL] ? true : false;
+ return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
}
-uint32_t radv_shader_stage_to_user_data_0(gl_shader_stage stage, bool has_gs, bool has_tess);
struct ac_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
gl_shader_stage stage,
int idx);
+struct radv_shader_variant *radv_get_vertex_shader(struct radv_pipeline *pipeline);
+
struct radv_graphics_pipeline_create_info {
bool use_rectlist;
bool db_depth_clear;
@@ -1126,13 +1187,6 @@
};
VkResult
-radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_graphics_pipeline_create_info *extra,
- const VkAllocationCallbacks *alloc);
-
-VkResult
radv_graphics_pipeline_create(VkDevice device,
VkPipelineCache cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
@@ -1162,6 +1216,8 @@
uint32_t clear_vals[2],
VkClearColorValue *value);
bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable);
+bool radv_dcc_formats_compatible(VkFormat format1,
+ VkFormat format2);
struct radv_fmask_info {
uint64_t offset;
@@ -1171,6 +1227,7 @@
unsigned bank_height;
unsigned slice_tile_max;
unsigned tile_mode_index;
+ unsigned tile_swizzle;
};
struct radv_cmask_info {
@@ -1181,15 +1238,6 @@
unsigned base_address_reg;
};
-struct r600_htile_info {
- uint64_t offset;
- uint64_t size;
- unsigned pitch;
- unsigned height;
- unsigned xalign;
- unsigned yalign;
-};
-
struct radv_image {
VkImageType type;
/* The original VkFormat provided by the client. This may not match any
@@ -1197,17 +1245,16 @@
*/
VkFormat vk_format;
VkImageAspectFlags aspects;
- struct ac_surf_info info;
VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
+ struct ac_surf_info info;
VkImageTiling tiling; /** VkImageCreateInfo::tiling */
VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
VkDeviceSize size;
uint32_t alignment;
- bool exclusive;
unsigned queue_family_mask;
-
+ bool exclusive;
bool shareable;
/* Set when bound */
@@ -1215,6 +1262,7 @@
VkDeviceSize offset;
uint64_t dcc_offset;
uint64_t htile_offset;
+ bool tc_compatible_htile;
struct radeon_surf surface;
struct radv_fmask_info fmask;
@@ -1243,6 +1291,17 @@
VkImageLayout layout,
unsigned queue_mask);
+static inline bool
+radv_vi_dcc_enabled(const struct radv_image *image, unsigned level)
+{
+ return image->surface.dcc_size && level < image->surface.num_dcc_levels;
+}
+
+static inline bool
+radv_htile_enabled(const struct radv_image *image, unsigned level)
+{
+ return image->surface.htile_size && level == 0;
+}
unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family);
@@ -1313,8 +1372,7 @@
};
void radv_buffer_view_init(struct radv_buffer_view *view,
struct radv_device *device,
- const VkBufferViewCreateInfo* pCreateInfo,
- struct radv_cmd_buffer *cmd_buffer);
+ const VkBufferViewCreateInfo* pCreateInfo);
static inline struct VkExtent3D
radv_sanitize_image_extent(const VkImageType imageType,
@@ -1437,6 +1495,8 @@
bool has_resolve;
struct radv_subpass_barrier start_barrier;
+
+ uint32_t view_mask;
};
struct radv_render_pass_attachment {
@@ -1446,6 +1506,7 @@
VkAttachmentLoadOp stencil_load_op;
VkImageLayout initial_layout;
VkImageLayout final_layout;
+ uint32_t view_mask;
};
struct radv_render_pass {
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_query.c mesa-17.3.3/src/amd/vulkan/radv_query.c
--- mesa-17.2.4/src/amd/vulkan/radv_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -519,8 +519,6 @@
struct radv_shader_module occlusion_cs = { .nir = NULL };
struct radv_shader_module pipeline_statistics_cs = { .nir = NULL };
- zero(device->meta_state.query);
-
occlusion_cs.nir = build_occlusion_query_shader(device);
pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device);
@@ -651,9 +649,12 @@
uint32_t pipeline_stats_mask, uint32_t avail_offset)
{
struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_compute_state saved_state;
+ struct radv_meta_saved_state saved_state;
- radv_meta_save_compute(&saved_state, cmd_buffer, 16);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE |
+ RADV_META_SAVE_CONSTANTS |
+ RADV_META_SAVE_DESCRIPTORS);
struct radv_buffer dst_buffer = {
.bo = dst_bo,
@@ -737,7 +738,7 @@
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
- radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
VkResult radv_CreateQueryPool(
@@ -952,8 +953,8 @@
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
- uint64_t dest_va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
+ uint64_t va = radv_buffer_get_va(pool->bo);
+ uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
dest_va += dst_buffer->offset + dstOffset;
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, pool->bo, 8);
@@ -1057,7 +1058,7 @@
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
+ uint64_t va = radv_buffer_get_va(pool->bo);
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, pool->bo, 8);
@@ -1078,7 +1079,7 @@
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
+ uint64_t va = radv_buffer_get_va(pool->bo);
va += pool->stride * query;
cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 8);
@@ -1118,7 +1119,7 @@
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
+ uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t avail_va = va + pool->availability_offset + 4 * query;
va += pool->stride * query;
@@ -1151,8 +1152,8 @@
si_cs_emit_write_event_eop(cs,
false,
cmd_buffer->device->physical_device->rad_info.chip_class,
- false,
- EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
+ radv_cmd_buffer_uses_mec(cmd_buffer),
+ V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, avail_va, 0, 1);
break;
default:
@@ -1170,7 +1171,7 @@
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
struct radeon_winsys_cs *cs = cmd_buffer->cs;
- uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
+ uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t avail_va = va + pool->availability_offset + 4 * query;
uint64_t query_va = va + pool->stride * query;
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_radeon_winsys.h mesa-17.3.3/src/amd/vulkan/radv_radeon_winsys.h
--- mesa-17.2.4/src/amd/vulkan/radv_radeon_winsys.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_radeon_winsys.h 2018-01-18 21:30:28.000000000 +0000
@@ -53,6 +53,7 @@
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
RADEON_FLAG_VIRTUAL = (1 << 3),
RADEON_FLAG_VA_UNCACHED = (1 << 4),
+ RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
};
enum radeon_bo_usage { /* bitfield */
@@ -70,6 +71,14 @@
RING_LAST,
};
+enum radeon_ctx_priority {
+ RADEON_CTX_PRIORITY_INVALID = -1,
+ RADEON_CTX_PRIORITY_LOW = 0,
+ RADEON_CTX_PRIORITY_MEDIUM,
+ RADEON_CTX_PRIORITY_HIGH,
+ RADEON_CTX_PRIORITY_REALTIME,
+};
+
struct radeon_winsys_cs {
unsigned cdw; /* Number of used dwords. */
unsigned max_dw; /* Maximum number of dwords. */
@@ -133,9 +142,11 @@
};
uint32_t syncobj_handle;
-struct radeon_winsys_bo;
struct radeon_winsys_fence;
+struct radeon_winsys_bo {
+ uint64_t va;
+};
struct radv_winsys_sem_counts {
uint32_t syncobj_count;
uint32_t sem_count;
@@ -156,6 +167,11 @@
void (*query_info)(struct radeon_winsys *ws,
struct radeon_info *info);
+ bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset,
+ unsigned num_registers, uint32_t *out);
+
+ const char *(*get_chip_name)(struct radeon_winsys *ws);
+
struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws,
uint64_t size,
unsigned alignment,
@@ -175,15 +191,14 @@
void (*buffer_unmap)(struct radeon_winsys_bo *bo);
- uint64_t (*buffer_get_va)(struct radeon_winsys_bo *bo);
-
void (*buffer_set_metadata)(struct radeon_winsys_bo *bo,
struct radeon_bo_metadata *md);
void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent,
uint64_t offset, uint64_t size,
struct radeon_winsys_bo *bo, uint64_t bo_offset);
- struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
+ struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws,
+ enum radeon_ctx_priority priority);
void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
@@ -217,7 +232,7 @@
void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
struct radeon_winsys_cs *child);
- void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
+ void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, const int *trace_ids, int trace_id_count);
int (*surface_init)(struct radeon_winsys *ws,
const struct ac_surf_info *surf_info,
@@ -258,4 +273,9 @@
cs->cdw += count;
}
+static inline uint64_t radv_buffer_get_va(struct radeon_winsys_bo *bo)
+{
+ return bo->va;
+}
+
#endif /* RADV_RADEON_WINSYS_H */
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_shader.c mesa-17.3.3/src/amd/vulkan/radv_shader.c
--- mesa-17.2.4/src/amd/vulkan/radv_shader.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_shader.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,665 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/mesa-sha1.h"
+#include "util/u_atomic.h"
+#include "radv_debug.h"
+#include "radv_private.h"
+#include "radv_shader.h"
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "spirv/nir_spirv.h"
+
+#include
+#include
+
+#include "sid.h"
+#include "gfx9d.h"
+#include "ac_binary.h"
+#include "ac_llvm_util.h"
+#include "ac_nir_to_llvm.h"
+#include "vk_format.h"
+#include "util/debug.h"
+#include "ac_exp_param.h"
+
+static const struct nir_shader_compiler_options nir_options = {
+ .vertex_id_zero_based = true,
+ .lower_scmp = true,
+ .lower_flrp32 = true,
+ .lower_fsat = true,
+ .lower_fdiv = true,
+ .lower_sub = true,
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_snorm_4x8 = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_pack_unorm_4x8 = true,
+ .lower_unpack_snorm_2x16 = true,
+ .lower_unpack_snorm_4x8 = true,
+ .lower_unpack_unorm_2x16 = true,
+ .lower_unpack_unorm_4x8 = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_ffma = true,
+ .max_unroll_iterations = 32
+};
+
+VkResult radv_CreateShaderModule(
+ VkDevice _device,
+ const VkShaderModuleCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkShaderModule* pShaderModule)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_shader_module *module;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
+ assert(pCreateInfo->flags == 0);
+
+ module = vk_alloc2(&device->alloc, pAllocator,
+ sizeof(*module) + pCreateInfo->codeSize, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (module == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ module->nir = NULL;
+ module->size = pCreateInfo->codeSize;
+ memcpy(module->data, pCreateInfo->pCode, module->size);
+
+ _mesa_sha1_compute(module->data, module->size, module->sha1);
+
+ *pShaderModule = radv_shader_module_to_handle(module);
+
+ return VK_SUCCESS;
+}
+
+void radv_DestroyShaderModule(
+ VkDevice _device,
+ VkShaderModule _module,
+ const VkAllocationCallbacks* pAllocator)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_shader_module, module, _module);
+
+ if (!module)
+ return;
+
+ vk_free2(&device->alloc, pAllocator, module);
+}
+
+void
+radv_optimize_nir(struct nir_shader *shader)
+{
+ bool progress;
+
+ do {
+ progress = false;
+
+ NIR_PASS_V(shader, nir_lower_vars_to_ssa);
+ NIR_PASS_V(shader, nir_lower_64bit_pack);
+ NIR_PASS_V(shader, nir_lower_alu_to_scalar);
+ NIR_PASS_V(shader, nir_lower_phis_to_scalar);
+
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_remove_phis);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ if (nir_opt_trivial_continues(shader)) {
+ progress = true;
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_remove_phis);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ }
+ NIR_PASS(progress, shader, nir_opt_if);
+ NIR_PASS(progress, shader, nir_opt_dead_cf);
+ NIR_PASS(progress, shader, nir_opt_cse);
+ NIR_PASS(progress, shader, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, shader, nir_opt_algebraic);
+ NIR_PASS(progress, shader, nir_opt_constant_folding);
+ NIR_PASS(progress, shader, nir_opt_undef);
+ NIR_PASS(progress, shader, nir_opt_conditional_discard);
+ if (shader->options->max_unroll_iterations) {
+ NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+ }
+ } while (progress);
+}
+
+nir_shader *
+radv_shader_compile_to_nir(struct radv_device *device,
+ struct radv_shader_module *module,
+ const char *entrypoint_name,
+ gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info)
+{
+ if (strcmp(entrypoint_name, "main") != 0) {
+ radv_finishme("Multiple shaders per module not really supported");
+ }
+
+ nir_shader *nir;
+ nir_function *entry_point;
+ if (module->nir) {
+ /* Some things such as our meta clear/blit code will give us a NIR
+ * shader directly. In that case, we just ignore the SPIR-V entirely
+ * and just use the NIR shader */
+ nir = module->nir;
+ nir->options = &nir_options;
+ nir_validate_shader(nir);
+
+ assert(exec_list_length(&nir->functions) == 1);
+ struct exec_node *node = exec_list_get_head(&nir->functions);
+ entry_point = exec_node_data(nir_function, node, node);
+ } else {
+ uint32_t *spirv = (uint32_t *) module->data;
+ assert(module->size % 4 == 0);
+
+ if (device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV)
+ radv_print_spirv(spirv, module->size, stderr);
+
+ uint32_t num_spec_entries = 0;
+ struct nir_spirv_specialization *spec_entries = NULL;
+ if (spec_info && spec_info->mapEntryCount > 0) {
+ num_spec_entries = spec_info->mapEntryCount;
+ spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
+ for (uint32_t i = 0; i < num_spec_entries; i++) {
+ VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
+ const void *data = spec_info->pData + entry.offset;
+ assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
+
+ spec_entries[i].id = spec_info->pMapEntries[i].constantID;
+ if (spec_info->dataSize == 8)
+ spec_entries[i].data64 = *(const uint64_t *)data;
+ else
+ spec_entries[i].data32 = *(const uint32_t *)data;
+ }
+ }
+ const struct nir_spirv_supported_extensions supported_ext = {
+ .draw_parameters = true,
+ .float64 = true,
+ .image_read_without_format = true,
+ .image_write_without_format = true,
+ .tessellation = true,
+ .int64 = true,
+ .multiview = true,
+ .variable_pointers = true,
+ };
+ entry_point = spirv_to_nir(spirv, module->size / 4,
+ spec_entries, num_spec_entries,
+ stage, entrypoint_name, &supported_ext, &nir_options);
+ nir = entry_point->shader;
+ assert(nir->info.stage == stage);
+ nir_validate_shader(nir);
+
+ free(spec_entries);
+
+ /* We have to lower away local constant initializers right before we
+ * inline functions. That way they get properly initialized at the top
+ * of the function and not at the top of its caller.
+ */
+ NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+ NIR_PASS_V(nir, nir_lower_returns);
+ NIR_PASS_V(nir, nir_inline_functions);
+
+ /* Pick off the single entrypoint that we want */
+ foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+ if (func != entry_point)
+ exec_node_remove(&func->node);
+ }
+ assert(exec_list_length(&nir->functions) == 1);
+ entry_point->name = ralloc_strdup(entry_point, "main");
+
+ NIR_PASS_V(nir, nir_remove_dead_variables,
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
+
+ /* Now that we've deleted all but the main function, we can go ahead and
+ * lower the rest of the constant initializers.
+ */
+ NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+ NIR_PASS_V(nir, nir_lower_system_values);
+ NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+ }
+
+ /* Vulkan uses the separate-shader linking model */
+ nir->info.separate_shader = true;
+
+ nir_shader_gather_info(nir, entry_point->impl);
+
+ /* While it would be nice not to have this flag, we are constrained
+ * by the reality that LLVM 5.0 doesn't have working VGPR indexing
+ * on GFX9.
+ */
+ bool llvm_has_working_vgpr_indexing =
+ device->physical_device->rad_info.chip_class <= VI;
+
+ /* TODO: Indirect indexing of GS inputs is unimplemented.
+ *
+ * TCS and TES load inputs directly from LDS or offchip memory, so
+ * indirect indexing is trivial.
+ */
+ nir_variable_mode indirect_mask = 0;
+ if (nir->info.stage == MESA_SHADER_GEOMETRY ||
+ (nir->info.stage != MESA_SHADER_TESS_CTRL &&
+ nir->info.stage != MESA_SHADER_TESS_EVAL &&
+ !llvm_has_working_vgpr_indexing)) {
+ indirect_mask |= nir_var_shader_in;
+ }
+ if (!llvm_has_working_vgpr_indexing &&
+ nir->info.stage != MESA_SHADER_TESS_CTRL)
+ indirect_mask |= nir_var_shader_out;
+
+ /* TODO: We shouldn't need to do this, however LLVM isn't currently
+ * smart enough to handle indirects without causing excess spilling
+ * causing the gpu to hang.
+ *
+ * See the following thread for more details of the problem:
+ * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+ */
+ indirect_mask |= nir_var_local;
+
+ nir_lower_indirect_derefs(nir, indirect_mask);
+
+ static const nir_lower_tex_options tex_options = {
+ .lower_txp = ~0,
+ };
+
+ nir_lower_tex(nir, &tex_options);
+
+ nir_lower_vars_to_ssa(nir);
+ nir_lower_var_copies(nir);
+ nir_lower_global_vars_to_local(nir);
+ nir_remove_dead_variables(nir, nir_var_local);
+ radv_optimize_nir(nir);
+
+ return nir;
+}
+
+void *
+radv_alloc_shader_memory(struct radv_device *device,
+ struct radv_shader_variant *shader)
+{
+ mtx_lock(&device->shader_slab_mutex);
+ list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
+ uint64_t offset = 0;
+ list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
+ if (s->bo_offset - offset >= shader->code_size) {
+ shader->bo = slab->bo;
+ shader->bo_offset = offset;
+ list_addtail(&shader->slab_list, &s->slab_list);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr + offset;
+ }
+ offset = align_u64(s->bo_offset + s->code_size, 256);
+ }
+ if (slab->size - offset >= shader->code_size) {
+ shader->bo = slab->bo;
+ shader->bo_offset = offset;
+ list_addtail(&shader->slab_list, &slab->shaders);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr + offset;
+ }
+ }
+
+ mtx_unlock(&device->shader_slab_mutex);
+ struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
+
+ slab->size = 256 * 1024;
+ slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
+ RADEON_DOMAIN_VRAM, 0);
+ slab->ptr = (char*)device->ws->buffer_map(slab->bo);
+ list_inithead(&slab->shaders);
+
+ mtx_lock(&device->shader_slab_mutex);
+ list_add(&slab->slabs, &device->shader_slabs);
+
+ shader->bo = slab->bo;
+ shader->bo_offset = 0;
+ list_add(&shader->slab_list, &slab->shaders);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr;
+}
+
+void
+radv_destroy_shader_slabs(struct radv_device *device)
+{
+ list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
+ device->ws->buffer_destroy(slab->bo);
+ free(slab);
+ }
+ mtx_destroy(&device->shader_slab_mutex);
+}
+
+static void
+radv_fill_shader_variant(struct radv_device *device,
+ struct radv_shader_variant *variant,
+ struct ac_shader_binary *binary,
+ gl_shader_stage stage)
+{
+ bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
+ unsigned vgpr_comp_cnt = 0;
+
+ if (scratch_enabled && !device->llvm_supports_spill)
+ radv_finishme("shader scratch support only available with LLVM 4.0");
+
+ variant->code_size = binary->code_size;
+ variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
+ S_00B12C_SCRATCH_EN(scratch_enabled);
+
+ variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) |
+ S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) |
+ S_00B848_DX10_CLAMP(1) |
+ S_00B848_FLOAT_MODE(variant->config.float_mode);
+
+ switch (stage) {
+ case MESA_SHADER_TESS_EVAL:
+ vgpr_comp_cnt = 3;
+ variant->rsrc2 |= S_00B12C_OC_LDS_EN(1);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt;
+ else
+ variant->rsrc2 |= S_00B12C_OC_LDS_EN(1);
+ break;
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_GEOMETRY:
+ vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ break;
+ case MESA_SHADER_COMPUTE:
+ variant->rsrc2 |=
+ S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
+ S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) |
+ S_00B84C_TG_SIZE_EN(1) |
+ S_00B84C_LDS_SIZE(variant->config.lds_size);
+ break;
+ default:
+ unreachable("unsupported shader type");
+ break;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ stage == MESA_SHADER_GEOMETRY) {
+ /* TODO: Figure out how many we actually need. */
+ variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(3);
+ variant->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(3) |
+ S_00B22C_OC_LDS_EN(1);
+ } else if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ stage == MESA_SHADER_TESS_CTRL)
+ variant->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
+ else
+ variant->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
+
+ void *ptr = radv_alloc_shader_memory(device, variant);
+ memcpy(ptr, binary->code, binary->code_size);
+}
+
+static struct radv_shader_variant *
+shader_variant_create(struct radv_device *device,
+ struct radv_shader_module *module,
+ struct nir_shader * const *shaders,
+ int shader_count,
+ gl_shader_stage stage,
+ struct ac_nir_compiler_options *options,
+ bool gs_copy_shader,
+ void **code_out,
+ unsigned *code_size_out)
+{
+ enum radeon_family chip_family = device->physical_device->rad_info.family;
+ bool dump_shaders = device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS;
+ enum ac_target_machine_options tm_options = 0;
+ struct radv_shader_variant *variant;
+ struct ac_shader_binary binary;
+ LLVMTargetMachineRef tm;
+
+ variant = calloc(1, sizeof(struct radv_shader_variant));
+ if (!variant)
+ return NULL;
+
+ options->family = chip_family;
+ options->chip_class = device->physical_device->rad_info.chip_class;
+
+ if (options->supports_spill)
+ tm_options |= AC_TM_SUPPORTS_SPILL;
+ if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
+ tm_options |= AC_TM_SISCHED;
+ tm = ac_create_target_machine(chip_family, tm_options);
+
+ if (gs_copy_shader) {
+ assert(shader_count == 1);
+ ac_create_gs_copy_shader(tm, *shaders, &binary, &variant->config,
+ &variant->info, options, dump_shaders);
+ } else {
+ ac_compile_nir_shader(tm, &binary, &variant->config,
+ &variant->info, shaders, shader_count, options,
+ dump_shaders);
+ }
+
+ LLVMDisposeTargetMachine(tm);
+
+ radv_fill_shader_variant(device, variant, &binary, stage);
+
+ if (code_out) {
+ *code_out = binary.code;
+ *code_size_out = binary.code_size;
+ } else
+ free(binary.code);
+ free(binary.config);
+ free(binary.rodata);
+ free(binary.global_symbol_offsets);
+ free(binary.relocs);
+ variant->ref_count = 1;
+
+ if (device->trace_bo) {
+ variant->disasm_string = binary.disasm_string;
+ if (!gs_copy_shader && !module->nir) {
+ variant->nir = *shaders;
+ variant->spirv = (uint32_t *)module->data;
+ variant->spirv_size = module->size;
+ }
+ } else {
+ free(binary.disasm_string);
+ }
+
+ return variant;
+}
+
+struct radv_shader_variant *
+radv_shader_variant_create(struct radv_device *device,
+ struct radv_shader_module *module,
+ struct nir_shader *const *shaders,
+ int shader_count,
+ struct radv_pipeline_layout *layout,
+ const struct ac_shader_variant_key *key,
+ void **code_out,
+ unsigned *code_size_out)
+{
+ struct ac_nir_compiler_options options = {0};
+
+ options.layout = layout;
+ if (key)
+ options.key = *key;
+
+ options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH);
+ options.supports_spill = device->llvm_supports_spill;
+
+ return shader_variant_create(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage,
+ &options, false, code_out, code_size_out);
+}
+
+struct radv_shader_variant *
+radv_create_gs_copy_shader(struct radv_device *device,
+ struct nir_shader *shader,
+ void **code_out,
+ unsigned *code_size_out,
+ bool multiview)
+{
+ struct ac_nir_compiler_options options = {0};
+
+ options.key.has_multiview_view_index = multiview;
+
+ return shader_variant_create(device, NULL, &shader, 1, MESA_SHADER_VERTEX,
+ &options, true, code_out, code_size_out);
+}
+
+void
+radv_shader_variant_destroy(struct radv_device *device,
+ struct radv_shader_variant *variant)
+{
+ if (!p_atomic_dec_zero(&variant->ref_count))
+ return;
+
+ mtx_lock(&device->shader_slab_mutex);
+ list_del(&variant->slab_list);
+ mtx_unlock(&device->shader_slab_mutex);
+
+ ralloc_free(variant->nir);
+ free(variant->disasm_string);
+ free(variant);
+}
+
+uint32_t
+radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class,
+ bool has_gs, bool has_tess)
+{
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ return R_00B030_SPI_SHADER_USER_DATA_PS_0;
+ case MESA_SHADER_VERTEX:
+ if (chip_class >= GFX9) {
+ return has_tess ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
+ has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ }
+ if (has_tess)
+ return R_00B530_SPI_SHADER_USER_DATA_LS_0;
+ else
+ return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ case MESA_SHADER_GEOMETRY:
+ return chip_class >= GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B230_SPI_SHADER_USER_DATA_GS_0;
+ case MESA_SHADER_COMPUTE:
+ return R_00B900_COMPUTE_USER_DATA_0;
+ case MESA_SHADER_TESS_CTRL:
+ return chip_class >= GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
+ R_00B430_SPI_SHADER_USER_DATA_HS_0;
+ case MESA_SHADER_TESS_EVAL:
+ if (chip_class >= GFX9) {
+ return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ }
+ if (has_gs)
+ return R_00B330_SPI_SHADER_USER_DATA_ES_0;
+ else
+ return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ default:
+ unreachable("unknown shader");
+ }
+}
+
+const char *
+radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX: return var->info.vs.as_ls ? "Vertex Shader as LS" : var->info.vs.as_es ? "Vertex Shader as ES" : "Vertex Shader as VS";
+ case MESA_SHADER_GEOMETRY: return "Geometry Shader";
+ case MESA_SHADER_FRAGMENT: return "Pixel Shader";
+ case MESA_SHADER_COMPUTE: return "Compute Shader";
+ case MESA_SHADER_TESS_CTRL: return "Tessellation Control Shader";
+ case MESA_SHADER_TESS_EVAL: return var->info.tes.as_es ? "Tessellation Evaluation Shader as ES" : "Tessellation Evaluation Shader as VS";
+ default:
+ return "Unknown shader";
+ };
+}
+
+void
+radv_shader_dump_stats(struct radv_device *device,
+ struct radv_shader_variant *variant,
+ gl_shader_stage stage,
+ FILE *file)
+{
+ unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
+ struct ac_shader_config *conf;
+ unsigned max_simd_waves;
+ unsigned lds_per_wave = 0;
+
+ switch (device->physical_device->rad_info.family) {
+ /* These always have 8 waves: */
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ max_simd_waves = 8;
+ break;
+ default:
+ max_simd_waves = 10;
+ }
+
+ conf = &variant->config;
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ lds_per_wave = conf->lds_size * lds_increment +
+ align(variant->info.fs.num_interp * 48,
+ lds_increment);
+ }
+
+ if (conf->num_sgprs) {
+ if (device->physical_device->rad_info.chip_class >= VI)
+ max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
+ else
+ max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
+ }
+
+ if (conf->num_vgprs)
+ max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+ /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+ * that PS can use.
+ */
+ if (lds_per_wave)
+ max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
+ fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ fprintf(file, "*** SHADER CONFIG ***\n"
+ "SPI_PS_INPUT_ADDR = 0x%04x\n"
+ "SPI_PS_INPUT_ENA = 0x%04x\n",
+ conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+ }
+
+ fprintf(file, "*** SHADER STATS ***\n"
+ "SGPRS: %d\n"
+ "VGPRS: %d\n"
+ "Spilled SGPRs: %d\n"
+ "Spilled VGPRs: %d\n"
+ "Code Size: %d bytes\n"
+ "LDS: %d blocks\n"
+ "Scratch: %d bytes per wave\n"
+ "Max Waves: %d\n"
+ "********************\n\n\n",
+ conf->num_sgprs, conf->num_vgprs,
+ conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,
+ conf->lds_size, conf->scratch_bytes_per_wave,
+ max_simd_waves);
+}
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_shader.h mesa-17.3.3/src/amd/vulkan/radv_shader.h
--- mesa-17.2.4/src/amd/vulkan/radv_shader.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_shader.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef RADV_SHADER_H
+#define RADV_SHADER_H
+
+#include "radv_private.h"
+
+#include "nir/nir.h"
+
+struct radv_shader_module {
+ struct nir_shader *nir;
+ unsigned char sha1[20];
+ uint32_t size;
+ char data[0];
+};
+
+struct radv_shader_variant {
+ uint32_t ref_count;
+
+ struct radeon_winsys_bo *bo;
+ uint64_t bo_offset;
+ struct ac_shader_config config;
+ uint32_t code_size;
+ struct ac_shader_variant_info info;
+ unsigned rsrc1;
+ unsigned rsrc2;
+
+ /* debug only */
+ uint32_t *spirv;
+ uint32_t spirv_size;
+ struct nir_shader *nir;
+ char *disasm_string;
+
+ struct list_head slab_list;
+};
+
+struct radv_shader_slab {
+ struct list_head slabs;
+ struct list_head shaders;
+ struct radeon_winsys_bo *bo;
+ uint64_t size;
+ char *ptr;
+};
+
+void
+radv_optimize_nir(struct nir_shader *shader);
+
+nir_shader *
+radv_shader_compile_to_nir(struct radv_device *device,
+ struct radv_shader_module *module,
+ const char *entrypoint_name,
+ gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info);
+
+void *
+radv_alloc_shader_memory(struct radv_device *device,
+ struct radv_shader_variant *shader);
+
+void
+radv_destroy_shader_slabs(struct radv_device *device);
+
+struct radv_shader_variant *
+radv_shader_variant_create(struct radv_device *device,
+ struct radv_shader_module *module,
+ struct nir_shader *const *shaders,
+ int shader_count,
+ struct radv_pipeline_layout *layout,
+ const struct ac_shader_variant_key *key,
+ void **code_out,
+ unsigned *code_size_out);
+
+struct radv_shader_variant *
+radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir,
+ void **code_out, unsigned *code_size_out,
+ bool multiview);
+
+void
+radv_shader_variant_destroy(struct radv_device *device,
+ struct radv_shader_variant *variant);
+
+uint32_t
+radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class,
+ bool has_gs, bool has_tess);
+
+const char *
+radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage);
+
+void
+radv_shader_dump_stats(struct radv_device *device,
+ struct radv_shader_variant *variant,
+ gl_shader_stage stage,
+ FILE *file);
+
+#endif
diff -Nru mesa-17.2.4/src/amd/vulkan/radv_wsi.c mesa-17.3.3/src/amd/vulkan/radv_wsi.c
--- mesa-17.2.4/src/amd/vulkan/radv_wsi.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/radv_wsi.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,8 +27,9 @@
#include "radv_meta.h"
#include "wsi_common.h"
#include "vk_util.h"
+#include "util/macros.h"
-static const struct wsi_callbacks wsi_cbs = {
+MAYBE_UNUSED static const struct wsi_callbacks wsi_cbs = {
.get_phys_device_format_properties = radv_GetPhysicalDeviceFormatProperties,
};
@@ -193,14 +194,29 @@
.image = image_h
};
- result = radv_AllocateMemory(device_h,
+ /* Find the first VRAM memory type, or GART for PRIME images. */
+ int memory_type_index = -1;
+ for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
+ bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if ((linear && !is_local) || (!linear && is_local)) {
+ memory_type_index = i;
+ break;
+ }
+ }
+
+ /* fallback */
+ if (memory_type_index == -1)
+ memory_type_index = 0;
+
+ result = radv_alloc_memory(device_h,
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &ded_alloc,
.allocationSize = image->size,
- .memoryTypeIndex = linear ? 1 : 0,
+ .memoryTypeIndex = memory_type_index,
},
NULL /* XXX: pAllocator */,
+ RADV_MEM_IMPLICIT_SYNC,
&memory_h);
if (result != VK_SUCCESS)
goto fail_create_image;
diff -Nru mesa-17.2.4/src/amd/vulkan/si_cmd_buffer.c mesa-17.3.3/src/amd/vulkan/si_cmd_buffer.c
--- mesa-17.2.4/src/amd/vulkan/si_cmd_buffer.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/si_cmd_buffer.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,14 +28,13 @@
/* command buffer handling for SI */
#include "radv_private.h"
+#include "radv_shader.h"
#include "radv_cs.h"
#include "sid.h"
#include "gfx9d.h"
#include "radv_util.h"
#include "main/macros.h"
-#define SI_GS_PER_ES 128
-
static void
si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
struct radeon_winsys_cs *cs,
@@ -180,7 +179,8 @@
radeon_emit(cs, 0);
radeon_emit(cs, 0);
- radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
+ radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+ S_00B854_WAVES_PER_SH(0x3));
radeon_emit(cs, 0);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
@@ -217,48 +217,20 @@
si_emit_compute(physical_device, cmd_buffer->cs);
}
+/* 12.4 fixed-point */
+static unsigned radv_pack_float_12p4(float x)
+{
+ return x <= 0 ? 0 :
+ x >= 4096 ? 0xffff : x * 16;
+}
+
static void
-si_emit_config(struct radv_physical_device *physical_device,
- struct radeon_winsys_cs *cs)
+si_set_raster_config(struct radv_physical_device *physical_device,
+ struct radeon_winsys_cs *cs)
{
unsigned num_rb = MIN2(physical_device->rad_info.num_render_backends, 16);
unsigned rb_mask = physical_device->rad_info.enabled_rb_mask;
unsigned raster_config, raster_config_1;
- int i;
-
- radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1));
- radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1));
-
- radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
- radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
-
- /* FIXME calculate these values somehow ??? */
- radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
- radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
- radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
-
- radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
- radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
-
- radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
- radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
- if (physical_device->rad_info.chip_class >= GFX9)
- radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 0);
- radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
- if (physical_device->rad_info.chip_class < CIK)
- radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
- S_008A14_CLIP_VTX_REORDER_ENA(1));
-
- radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
- radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
-
- radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
-
- for (i = 0; i < 16; i++) {
- radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
- radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
- }
switch (physical_device->rad_info.family) {
case CHIP_TAHITI:
@@ -332,47 +304,114 @@
raster_config_1 = 0x00000000;
break;
default:
- if (physical_device->rad_info.chip_class <= VI) {
- fprintf(stderr,
- "radeonsi: Unknown GPU, using 0 for raster_config\n");
- raster_config = 0x00000000;
- raster_config_1 = 0x00000000;
- }
+ fprintf(stderr,
+ "radv: Unknown GPU, using 0 for raster_config\n");
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
break;
}
/* Always use the default config when all backends are enabled
* (or when we failed to determine the enabled backends).
*/
+ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+ radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
+ raster_config);
+ if (physical_device->rad_info.chip_class >= CIK)
+ radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
+ raster_config_1);
+ } else {
+ si_write_harvested_raster_configs(physical_device, cs,
+ raster_config,
+ raster_config_1);
+ }
+}
+
+static void
+si_emit_config(struct radv_physical_device *physical_device,
+ struct radeon_winsys_cs *cs)
+{
+ int i;
+
+ /* Only SI can disable CLEAR_STATE for now. */
+ assert(physical_device->has_clear_state ||
+ physical_device->rad_info.chip_class == SI);
+
+ radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1));
+ radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1));
+
+ if (physical_device->has_clear_state) {
+ radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
+ radeon_emit(cs, 0);
+ }
+
+ if (physical_device->rad_info.chip_class <= VI)
+ si_set_raster_config(physical_device, cs);
+
+ radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+ if (!physical_device->has_clear_state)
+ radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
+
+ /* FIXME calculate these values somehow ??? */
if (physical_device->rad_info.chip_class <= VI) {
- if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
- radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
- raster_config);
- if (physical_device->rad_info.chip_class >= CIK)
- radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
- raster_config_1);
- } else {
- si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
+ radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
+ radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
+ }
+
+ if (!physical_device->has_clear_state) {
+ radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
+ radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
+ radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+ }
+
+ radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
+ if (!physical_device->has_clear_state)
+ radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
+ if (physical_device->rad_info.chip_class < CIK)
+ radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
+ S_008A14_CLIP_VTX_REORDER_ENA(1));
+
+ radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
+ radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
+
+ if (!physical_device->has_clear_state)
+ radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
+
+ /* CLEAR_STATE doesn't clear these correctly on certain generations.
+ * I don't know why. Deduced by trial and error.
+ */
+ if (physical_device->rad_info.chip_class <= CIK) {
+ radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+ radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
+ S_028204_WINDOW_OFFSET_DISABLE(1));
+ radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL,
+ S_028240_WINDOW_OFFSET_DISABLE(1));
+ radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
+ S_028244_BR_X(16384) | S_028244_BR_Y(16384));
+ radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
+ radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
+ S_028034_BR_X(16384) | S_028034_BR_Y(16384));
+ }
+
+ if (!physical_device->has_clear_state) {
+ for (i = 0; i < 16; i++) {
+ radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
+ radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
}
}
- radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
- radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
- radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
- S_028244_BR_X(16384) | S_028244_BR_Y(16384));
- radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
- radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
- S_028034_BR_X(16384) | S_028034_BR_Y(16384));
-
- radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
- radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
- /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
- radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
- radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
-
- radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
- radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
- radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+ if (!physical_device->has_clear_state) {
+ radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
+ radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
+ /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
+ radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+ radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
+ radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
+ radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
+ radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+ }
+
radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE,
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
@@ -382,6 +421,11 @@
radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
} else {
+ /* These registers, when written, also overwrite the
+ * CLEAR_STATE context, so we can't rely on CLEAR_STATE setting
+ * them. It would be an issue if there was another UMD
+ * changing them.
+ */
radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
@@ -389,11 +433,15 @@
if (physical_device->rad_info.chip_class >= CIK) {
if (physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
} else {
- radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
- radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
- radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+ S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+ S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
* but we don't use on-chip GS.
@@ -402,7 +450,8 @@
S_028A44_ES_VERTS_PER_SUBGRP(64) |
S_028A44_GS_PRIMS_PER_SUBGRP(4));
}
- radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
if (physical_device->rad_info.num_good_compute_units /
(physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) {
@@ -412,7 +461,8 @@
*
* LATE_ALLOC_VS = 2 is the highest safe number.
*/
- radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ S_00B118_CU_EN(0xffff) | S_00B118_WAVE_LIMIT(0x3F) );
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
} else {
/* Set LATE_ALLOC_VS == 31. It should be less than
@@ -420,11 +470,13 @@
* - VS can't execute on CU0.
* - If HS writes outputs to LDS, LS can't execute on CU0.
*/
- radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
+ radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ S_00B118_CU_EN(0xfffe) | S_00B118_WAVE_LIMIT(0x3F));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
}
- radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
+ radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+ S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
}
if (physical_device->rad_info.chip_class >= VI) {
@@ -432,9 +484,6 @@
radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
S_028424_OVERWRITE_COMBINER_WATERMARK(4));
- if (physical_device->rad_info.family < CHIP_POLARIS10)
- radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
- radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
S_028B50_ACCUM_TRI(11) |
@@ -447,14 +496,11 @@
radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
vgt_tess_distribution);
- } else {
+ } else if (!physical_device->has_clear_state) {
radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
}
- if (physical_device->has_rbplus)
- radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0);
-
if (physical_device->rad_info.chip_class >= GFX9) {
unsigned num_se = physical_device->rad_info.max_se;
unsigned pc_lines = 0;
@@ -472,10 +518,6 @@
radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
- radeon_set_context_reg(cs, R_028064_DB_RENDER_FILTER, 0);
- /* TODO: We can use this to disable RBs for rendering to GART: */
- radeon_set_context_reg(cs, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0);
- radeon_set_context_reg(cs, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
/* TODO: Enable the binner: */
radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
@@ -487,6 +529,19 @@
S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
}
+
+ unsigned tmp = (unsigned)(1.0 * 8.0);
+ radeon_set_context_reg_seq(cs, R_028A00_PA_SU_POINT_SIZE, 1);
+ radeon_emit(cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
+ radeon_set_context_reg_seq(cs, R_028A04_PA_SU_POINT_MINMAX, 1);
+ radeon_emit(cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
+ S_028A04_MAX_SIZE(radv_pack_float_12p4(8192/2)));
+
+ if (!physical_device->has_clear_state) {
+ radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL,
+ S_028004_ZPASS_INCREMENT_DISABLE(1));
+ }
+
si_emit_compute(physical_device, cs);
}
@@ -621,7 +676,8 @@
int i;
float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
const float max_range = 32767.0f;
- assert(count);
+ if (!count)
+ return;
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
for (i = 0; i < count; i++) {
@@ -681,75 +737,27 @@
enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
- unsigned prim = cmd_buffer->state.pipeline->graphics.prim;
- unsigned primgroup_size = 128; /* recommended without a GS */
- unsigned max_primgroup_in_wave = 2;
+ const unsigned max_primgroup_in_wave = 2;
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
bool ia_switch_on_eop = false;
bool ia_switch_on_eoi = false;
bool partial_vs_wave = false;
- bool partial_es_wave = false;
- uint32_t num_prims = radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count, draw_vertex_count);
+ bool partial_es_wave = cmd_buffer->state.pipeline->graphics.partial_es_wave;
bool multi_instances_smaller_than_primgroup;
- if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
- primgroup_size = cmd_buffer->state.pipeline->graphics.tess.num_patches;
- else if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
- primgroup_size = 64; /* recommended with a GS */
-
- multi_instances_smaller_than_primgroup = indirect_draw || (instanced_draw &&
- num_prims < primgroup_size);
- if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
- ia_switch_on_eoi = true;
-
- if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
- /* SWITCH_ON_EOI must be set if PrimID is used. */
- if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id ||
- cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.uses_prim_id)
- ia_switch_on_eoi = true;
-
- /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
- if ((family == CHIP_TAHITI ||
- family == CHIP_PITCAIRN ||
- family == CHIP_BONAIRE) &&
- radv_pipeline_has_gs(cmd_buffer->state.pipeline))
- partial_vs_wave = true;
-
- /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
- if (cmd_buffer->device->has_distributed_tess) {
- if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
- if (chip_class <= VI)
- partial_es_wave = true;
-
- if (family == CHIP_TONGA ||
- family == CHIP_FIJI ||
- family == CHIP_POLARIS10 ||
- family == CHIP_POLARIS11 ||
- family == CHIP_POLARIS12)
- partial_vs_wave = true;
- } else {
- partial_vs_wave = true;
- }
- }
+ multi_instances_smaller_than_primgroup = indirect_draw;
+ if (!multi_instances_smaller_than_primgroup && instanced_draw) {
+ uint32_t num_prims = radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count, draw_vertex_count);
+ if (num_prims < cmd_buffer->state.pipeline->graphics.primgroup_size)
+ multi_instances_smaller_than_primgroup = true;
}
- /* TODO linestipple */
+
+ ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_switch_on_eoi;
+ partial_vs_wave = cmd_buffer->state.pipeline->graphics.partial_vs_wave;
if (chip_class >= CIK) {
- /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
- * 4 shader engines. Set 1 to pass the assertion below.
- * The other cases are hardware requirements. */
- if (info->max_se < 4 ||
- prim == V_008958_DI_PT_POLYGON ||
- prim == V_008958_DI_PT_LINELOOP ||
- prim == V_008958_DI_PT_TRIFAN ||
- prim == V_008958_DI_PT_TRISTRIP_ADJ ||
- (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
- (family < CHIP_POLARIS10 ||
- (prim != V_008958_DI_PT_POINTLIST &&
- prim != V_008958_DI_PT_LINESTRIP &&
- prim != V_008958_DI_PT_TRISTRIP))))
- wd_switch_on_eop = true;
+ wd_switch_on_eop = cmd_buffer->state.pipeline->graphics.wd_switch_on_eop;
/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
* We don't know that for indirect drawing, so treat it as
@@ -776,6 +784,7 @@
if (ia_switch_on_eoi &&
(family == CHIP_HAWAII ||
(chip_class == VI &&
+ /* max primgroup in wave is always 2 - leave this for documentation */
(radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
partial_vs_wave = true;
@@ -792,34 +801,28 @@
partial_es_wave = true;
if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
-
- if (radv_pipeline_has_gs(cmd_buffer->state.pipeline) &&
- cmd_buffer->state.pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.uses_prim_id)
- ia_switch_on_eoi = true;
-
- /* GS requirement. */
- if (SI_GS_PER_ES / primgroup_size >= cmd_buffer->device->gs_table_depth - 3)
- partial_es_wave = true;
-
- /* Hw bug with single-primitive instances and SWITCH_ON_EOI
- * on multi-SE chips. */
- if (info->max_se >= 2 && ia_switch_on_eoi &&
- ((instanced_draw || indirect_draw) &&
- num_prims <= 1))
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
+ /* GS hw bug with single-primitive instances and SWITCH_ON_EOI.
+ * The hw doc says all multi-SE chips are affected, but amdgpu-pro Vulkan
+ * only applies it to Hawaii. Do what amdgpu-pro Vulkan does.
+ */
+ if (family == CHIP_HAWAII && ia_switch_on_eoi) {
+ bool set_vgt_flush = indirect_draw;
+ if (!set_vgt_flush && instanced_draw) {
+ uint32_t num_prims = radv_prims_for_vertices(&cmd_buffer->state.pipeline->graphics.prim_vertex_count, draw_vertex_count);
+ if (num_prims <= 1)
+ set_vgt_flush = true;
+ }
+ if (set_vgt_flush)
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
+ }
}
- return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
+ return cmd_buffer->state.pipeline->graphics.base_ia_multi_vgt_param |
+ S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
- S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
- S_028AA8_WD_SWITCH_ON_EOP(chip_class >= CIK ? wd_switch_on_eop : 0) |
- /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
- S_028AA8_MAX_PRIMGRP_IN_WAVE(chip_class == VI ?
- max_primgroup_in_wave : 0) |
- S_030960_EN_INST_OPT_BASIC(chip_class >= GFX9) |
- S_030960_EN_INST_OPT_ADV(chip_class >= GFX9);
+ S_028AA8_WD_SWITCH_ON_EOP(chip_class >= CIK ? wd_switch_on_eop : 0);
}
@@ -970,14 +973,12 @@
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
}
- if (!flush_cb_db) {
- if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
- } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
- }
+ if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
@@ -988,6 +989,11 @@
if (chip_class >= GFX9 && flush_cb_db) {
unsigned cb_db_event, tc_flags;
+#if 0
+ /* This breaks a bunch of:
+ dEQP-VK.renderpass.dedicated_allocation.formats.d32_sfloat_s8_uint.input*.
+ use the big hammer always.
+ */
/* Set the CB/DB flush event. */
switch (flush_cb_db) {
case RADV_CMD_FLAG_FLUSH_AND_INV_CB:
@@ -1000,7 +1006,9 @@
/* both CB & DB */
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
}
-
+#else
+ cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
+#endif
/* TC | TC_WB = invalidate L2 data
* TC_MD | TC_WB = invalidate L2 metadata
* TC | TC_WB | TC_MD = invalidate L2 data & metadata
@@ -1114,7 +1122,7 @@
uint32_t *ptr = NULL;
uint64_t va = 0;
if (chip_class == GFX9) {
- va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->gfx9_fence_bo) + cmd_buffer->gfx9_fence_offset;
+ va = radv_buffer_get_va(cmd_buffer->gfx9_fence_bo) + cmd_buffer->gfx9_fence_offset;
ptr = &cmd_buffer->gfx9_fence_idx;
}
si_cs_emit_cache_flush(cmd_buffer->cs,
@@ -1293,7 +1301,7 @@
radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, SI_CPDMA_ALIGNMENT, &offset, &ptr);
- va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += offset;
si_cp_dma_prepare(cmd_buffer, size, size, &dma_flags);
@@ -1464,25 +1472,25 @@
switch (nr_samples) {
default:
case 1:
- radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
- radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
- radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
- radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
break;
case 2:
- radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
- radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
- radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
- radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
break;
case 4:
- radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
- radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
- radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
- radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
break;
case 8:
- radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+ radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
radeon_emit(cs, cm_sample_locs_8x[0]);
radeon_emit(cs, cm_sample_locs_8x[4]);
radeon_emit(cs, 0);
@@ -1499,7 +1507,7 @@
radeon_emit(cs, cm_sample_locs_8x[7]);
break;
case 16:
- radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+ radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
radeon_emit(cs, cm_sample_locs_16x[0]);
radeon_emit(cs, cm_sample_locs_16x[4]);
radeon_emit(cs, cm_sample_locs_16x[8]);
diff -Nru mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
--- mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c 2018-01-18 21:30:28.000000000 +0000
@@ -67,7 +67,7 @@
p_atomic_inc(&range->bo->ref_count);
int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
- range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
+ range->offset + bo->base.va, 0, AMDGPU_VA_OP_MAP);
if (r)
abort();
}
@@ -82,7 +82,7 @@
return; /* TODO: PRT mapping */
int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
- range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
+ range->offset + bo->base.va, 0, AMDGPU_VA_OP_UNMAP);
if (r)
abort();
radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
@@ -252,7 +252,7 @@
bo->ws->num_buffers--;
pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
}
- radv_amdgpu_bo_va_op(bo->ws->dev, bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
+ radv_amdgpu_bo_va_op(bo->ws->dev, bo->bo, 0, bo->size, bo->base.va, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_bo_free(bo->bo);
}
amdgpu_va_range_free(bo->va_handle);
@@ -295,7 +295,7 @@
if (r)
goto error_va_alloc;
- bo->va = va;
+ bo->base.va = va;
bo->va_handle = va_handle;
bo->size = size;
bo->ws = ws;
@@ -330,7 +330,12 @@
request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GTT_WC)
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
+ request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+ /* this won't do anything on pre 4.9 kernels */
+ if (ws->zero_all_vram_allocs && (initial_domain & RADEON_DOMAIN_VRAM))
+ request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
@@ -364,12 +369,6 @@
return NULL;
}
-static uint64_t radv_amdgpu_winsys_bo_get_va(struct radeon_winsys_bo *_bo)
-{
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- return bo->va;
-}
-
static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
{
@@ -430,7 +429,7 @@
initial |= RADEON_DOMAIN_GTT;
bo->bo = result.buf_handle;
- bo->va = va;
+ bo->base.va = va;
bo->va_handle = va_handle;
bo->initial_domain = initial;
bo->size = result.alloc_size;
@@ -524,7 +523,6 @@
{
ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
- ws->base.buffer_get_va = radv_amdgpu_winsys_bo_get_va;
ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
diff -Nru mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
--- mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h 2018-01-18 21:30:28.000000000 +0000
@@ -40,8 +40,8 @@
};
struct radv_amdgpu_winsys_bo {
+ struct radeon_winsys_bo base;
amdgpu_va_handle va_handle;
- uint64_t va;
uint64_t size;
struct radv_amdgpu_winsys *ws;
bool is_virtual;
diff -Nru mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
--- mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 2018-01-18 21:30:28.000000000 +0000
@@ -215,7 +215,7 @@
return NULL;
}
- cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
+ cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
cs->base.buf = (uint32_t *)cs->ib_mapped;
cs->base.max_dw = ib_size / 4 - 4;
cs->ib_size_ptr = &cs->ib.size;
@@ -306,8 +306,8 @@
cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
- cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
- cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32;
+ cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
+ cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va >> 32;
cs->ib_size_ptr = cs->base.buf + cs->base.cdw;
cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
@@ -360,7 +360,7 @@
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
cs->num_old_ib_buffers = 0;
- cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
+ cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
cs->ib_size_ptr = &cs->ib.size;
cs->ib.size = 0;
}
@@ -841,7 +841,7 @@
uint32_t *ptr;
unsigned cnt = 0;
unsigned size = 0;
-
+ unsigned pad_words = 0;
if (preamble_cs)
size += preamble_cs->cdw;
@@ -850,6 +850,10 @@
++cnt;
}
+ while(!size || (size & 7)) {
+ size++;
+ pad_words++;
+ }
assert(cnt);
bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
@@ -867,10 +871,8 @@
}
- while(!size || (size & 7)) {
+ for (unsigned j = 0; j < pad_words; ++j)
*ptr++ = pad_word;
- ++size;
- }
memset(&request, 0, sizeof(request));
@@ -884,7 +886,7 @@
}
ib.size = size;
- ib.ib_mc_address = ws->buffer_get_va(bo);
+ ib.ib_mc_address = radv_buffer_get_va(bo);
request.ip_type = cs0->hw_ip;
request.ring = queue_idx;
@@ -949,7 +951,6 @@
return ret;
}
-
static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
{
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
@@ -962,17 +963,30 @@
bo = (struct radv_amdgpu_winsys_bo*)
(i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
- if (addr >= bo->va && addr - bo->va < bo->size) {
+ if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
- return (char *)ret + (addr - bo->va);
+ return (char *)ret + (addr - bo->base.va);
}
}
+ if(cs->ws->debug_all_bos) {
+ pthread_mutex_lock(&cs->ws->global_bo_list_lock);
+ list_for_each_entry(struct radv_amdgpu_winsys_bo, bo,
+ &cs->ws->global_bo_list, global_list_item) {
+ if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
+ if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) {
+ pthread_mutex_unlock(&cs->ws->global_bo_list_lock);
+ return (char *)ret + (addr - bo->base.va);
+ }
+ }
+ }
+ pthread_mutex_unlock(&cs->ws->global_bo_list_lock);
+ }
return ret;
}
static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
FILE* file,
- uint32_t trace_id)
+ const int *trace_ids, int trace_id_count)
{
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
void *ib = cs->base.buf;
@@ -983,21 +997,40 @@
num_dw = cs->ib.size;
}
assert(ib);
- ac_parse_ib(file, ib, num_dw, trace_id, "main IB", cs->ws->info.chip_class,
- radv_amdgpu_winsys_get_cpu_addr, cs);
+ ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB",
+ cs->ws->info.chip_class, radv_amdgpu_winsys_get_cpu_addr, cs);
}
-static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
+static uint32_t radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority)
+{
+ switch (radv_priority) {
+ case RADEON_CTX_PRIORITY_REALTIME:
+ return AMDGPU_CTX_PRIORITY_VERY_HIGH;
+ case RADEON_CTX_PRIORITY_HIGH:
+ return AMDGPU_CTX_PRIORITY_HIGH;
+ case RADEON_CTX_PRIORITY_MEDIUM:
+ return AMDGPU_CTX_PRIORITY_NORMAL;
+ case RADEON_CTX_PRIORITY_LOW:
+ return AMDGPU_CTX_PRIORITY_LOW;
+ default:
+ unreachable("Invalid context priority");
+ }
+}
+
+static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws,
+ enum radeon_ctx_priority priority)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
+ uint32_t amdgpu_priority = radv_to_amdgpu_priority(priority);
int r;
if (!ctx)
return NULL;
- r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx);
+
+ r = amdgpu_cs_ctx_create2(ws->dev, amdgpu_priority, &ctx->ctx);
if (r) {
- fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r);
+ fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create2 failed. (%i)\n", r);
goto error_create;
}
ctx->ws = ws;
diff -Nru mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
--- mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h 2018-01-18 21:30:28.000000000 +0000
@@ -32,7 +32,6 @@
#include
#include
#include
-#include "r600d_common.h"
#include
#include "radv_radeon_winsys.h"
diff -Nru mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
--- mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c 2018-01-18 21:30:28.000000000 +0000
@@ -42,9 +42,6 @@
{
unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
- if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX))
- return -EINVAL;
-
if (!surf->blk_w || !surf->blk_h)
return -EINVAL;
diff -Nru mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
--- mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c 2018-01-18 21:30:28.000000000 +0000
@@ -72,6 +72,23 @@
*info = ((struct radv_amdgpu_winsys *)rws)->info;
}
+static bool radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws,
+ unsigned reg_offset,
+ unsigned num_registers, uint32_t *out)
+{
+ struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
+
+ return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
+ 0xffffffff, 0, out) == 0;
+}
+
+static const char *radv_amdgpu_winsys_get_chip_name(struct radeon_winsys *rws)
+{
+ amdgpu_device_handle dev = ((struct radv_amdgpu_winsys *)rws)->dev;
+
+ return amdgpu_get_marketing_name(dev);
+}
+
static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
{
struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
@@ -106,10 +123,13 @@
if (debug_flags & RADV_DEBUG_NO_IBS)
ws->use_ib_bos = false;
- ws->batchchain = !!(perftest_flags & RADV_PERFTEST_BATCHCHAIN);
+ ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
+ ws->batchchain = !(perftest_flags & RADV_PERFTEST_NO_BATCHCHAIN);
LIST_INITHEAD(&ws->global_bo_list);
pthread_mutex_init(&ws->global_bo_list_lock, NULL);
ws->base.query_info = radv_amdgpu_winsys_query_info;
+ ws->base.read_registers = radv_amdgpu_winsys_read_registers;
+ ws->base.get_chip_name = radv_amdgpu_winsys_get_chip_name;
ws->base.destroy = radv_amdgpu_winsys_destroy;
radv_amdgpu_bo_init_functions(ws);
radv_amdgpu_cs_init_functions(ws);
diff -Nru mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
--- mesa-17.2.4/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h 2018-01-18 21:30:28.000000000 +0000
@@ -44,11 +44,12 @@
bool debug_all_bos;
bool batchchain;
- pthread_mutex_t global_bo_list_lock;
- struct list_head global_bo_list;
+ bool use_ib_bos;
+ bool zero_all_vram_allocs;
unsigned num_buffers;
- bool use_ib_bos;
+ pthread_mutex_t global_bo_list_lock;
+ struct list_head global_bo_list;
};
static inline struct radv_amdgpu_winsys *
diff -Nru mesa-17.2.4/src/broadcom/cle/gen_pack_header.py mesa-17.3.3/src/broadcom/cle/gen_pack_header.py
--- mesa-17.2.4/src/broadcom/cle/gen_pack_header.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/gen_pack_header.py 2018-01-18 21:30:28.000000000 +0000
@@ -152,6 +152,8 @@
type = 'uint32_t'
elif self.type in self.parser.structs:
type = 'struct ' + self.parser.gen_prefix(safe_name(self.type))
+ elif self.type in self.parser.enums:
+ type = 'enum ' + self.parser.gen_prefix(safe_name(self.type))
elif self.type == 'mbo':
return
else:
@@ -283,6 +285,9 @@
elif field.type == "uint":
s = "__gen_uint(values->%s, %d, %d)" % \
(name, start, end)
+ elif field.type in self.parser.enums:
+ s = "__gen_uint(values->%s, %d, %d)" % \
+ (name, start, end)
elif field.type == "int":
s = "__gen_sint(values->%s, %d, %d)" % \
(name, start, end)
@@ -335,6 +340,8 @@
convert = "__gen_unpack_address"
elif field.type == "uint":
convert = "__gen_unpack_uint"
+ elif field.type in self.parser.enums:
+ convert = "__gen_unpack_uint"
elif field.type == "int":
convert = "__gen_unpack_sint"
elif field.type == "bool":
@@ -370,6 +377,8 @@
self.packet = None
self.struct = None
self.structs = {}
+ # Set of enum names we've seen.
+ self.enums = set()
self.registers = {}
def gen_prefix(self, name):
@@ -423,6 +432,7 @@
elif name == "enum":
self.values = []
self.enum = safe_name(attrs["name"])
+ self.enums.add(attrs["name"])
if "prefix" in attrs:
self.prefix = safe_name(attrs["prefix"])
else:
@@ -478,13 +488,7 @@
print("}\n#endif\n")
- def emit_packet(self):
- name = self.packet
-
- assert(self.group.fields[0].name == "opcode")
- print('#define %-33s %6d' %
- (name + "_opcode", self.group.fields[0].default))
-
+ def emit_header(self, name):
default_fields = []
for field in self.group.fields:
if not type(field) is Field:
@@ -493,11 +497,18 @@
continue
default_fields.append(" .%-35s = %6d" % (field.name, field.default))
- if default_fields:
- print('#define %-40s\\' % (name + '_header'))
- print(", \\\n".join(default_fields))
- print('')
+ print('#define %-40s\\' % (name + '_header'))
+ print(", \\\n".join(default_fields))
+ print('')
+ def emit_packet(self):
+ name = self.packet
+
+ assert(self.group.fields[0].name == "opcode")
+ print('#define %-33s %6d' %
+ (name + "_opcode", self.group.fields[0].default))
+
+ self.emit_header(name)
self.emit_template_struct(self.packet, self.group)
self.emit_pack_function(self.packet, self.group)
self.emit_unpack_function(self.packet, self.group)
@@ -516,10 +527,8 @@
def emit_struct(self):
name = self.struct
- # Emit an empty header define so that we can use the CL pack functions
- # with structs.
- print('#define ' + name + '_header')
+ self.emit_header(name)
self.emit_template_struct(self.struct, self.group)
self.emit_pack_function(self.struct, self.group)
self.emit_unpack_function(self.struct, self.group)
@@ -527,14 +536,14 @@
print('')
def emit_enum(self):
- print('/* enum %s */' % self.gen_prefix(self.enum))
+ print('enum %s {' % self.gen_prefix(self.enum))
for value in self.values:
if self.prefix:
name = self.prefix + "_" + value.name
else:
name = value.name
- print('#define %-36s %6d' % (name.upper(), value.value))
- print('')
+ print(' % -36s = %6d,' % (name.upper(), value.value))
+ print('};\n')
def parse(self, filename):
file = open(filename, "rb")
diff -Nru mesa-17.2.4/src/broadcom/cle/meson.build mesa-17.3.3/src/broadcom/cle/meson.build
--- mesa-17.2.4/src/broadcom/cle/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,59 @@
+# Copyright © 2017 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+v3d_versions = [
+ 21,
+ 33
+]
+
+v3d_xml_files = []
+foreach v: v3d_versions
+ v3d_xml_files += 'v3d_packet_v@0@.xml'.format(v)
+endforeach
+
+v3d_xml_h = custom_target(
+ 'v3d_xml.h',
+ input : ['../../intel/genxml/gen_zipped_file.py', v3d_xml_files],
+ output : 'v3d_xml.h',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+)
+
+v3d_xml_pack = []
+foreach f : v3d_xml_files
+ _name = '@0@_pack.h'.format(f.split('.')[0])
+ _xml = custom_target(
+ _name,
+ input : ['gen_pack_header.py', f],
+ output : _name,
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+ )
+ v3d_xml_pack += _xml
+endforeach
+
+libbroadcom_cle = static_library(
+ ['broadcom_cle', v3d_xml_h],
+ 'v3d_decoder.c',
+ include_directories : [inc_common, inc_broadcom],
+ c_args : [c_vis_args, no_override_init_args],
+ dependencies : [dep_libdrm, dep_valgrind],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/broadcom/cle/v3d_decoder.c mesa-17.3.3/src/broadcom/cle/v3d_decoder.c
--- mesa-17.2.4/src/broadcom/cle/v3d_decoder.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/v3d_decoder.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,876 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include "v3d_decoder.h"
+#include "v3d_packet_helpers.h"
+#include "v3d_xml.h"
+
+struct v3d_spec {
+ uint32_t ver;
+
+ int ncommands;
+ struct v3d_group *commands[256];
+ int nstructs;
+ struct v3d_group *structs[256];
+ int nregisters;
+ struct v3d_group *registers[256];
+ int nenums;
+ struct v3d_enum *enums[256];
+};
+
+struct location {
+ const char *filename;
+ int line_number;
+};
+
+struct parser_context {
+ XML_Parser parser;
+ int foo;
+ struct location loc;
+
+ struct v3d_group *group;
+ struct v3d_enum *enoom;
+
+ int nvalues;
+ struct v3d_value *values[256];
+
+ struct v3d_spec *spec;
+};
+
+const char *
+v3d_group_get_name(struct v3d_group *group)
+{
+ return group->name;
+}
+
+uint8_t
+v3d_group_get_opcode(struct v3d_group *group)
+{
+ return group->opcode;
+}
+
+struct v3d_group *
+v3d_spec_find_struct(struct v3d_spec *spec, const char *name)
+{
+ for (int i = 0; i < spec->nstructs; i++)
+ if (strcmp(spec->structs[i]->name, name) == 0)
+ return spec->structs[i];
+
+ return NULL;
+}
+
+struct v3d_group *
+v3d_spec_find_register(struct v3d_spec *spec, uint32_t offset)
+{
+ for (int i = 0; i < spec->nregisters; i++)
+ if (spec->registers[i]->register_offset == offset)
+ return spec->registers[i];
+
+ return NULL;
+}
+
+struct v3d_group *
+v3d_spec_find_register_by_name(struct v3d_spec *spec, const char *name)
+{
+ for (int i = 0; i < spec->nregisters; i++) {
+ if (strcmp(spec->registers[i]->name, name) == 0)
+ return spec->registers[i];
+ }
+
+ return NULL;
+}
+
+struct v3d_enum *
+v3d_spec_find_enum(struct v3d_spec *spec, const char *name)
+{
+ for (int i = 0; i < spec->nenums; i++)
+ if (strcmp(spec->enums[i]->name, name) == 0)
+ return spec->enums[i];
+
+ return NULL;
+}
+
+static void __attribute__((noreturn))
+fail(struct location *loc, const char *msg, ...)
+{
+ va_list ap;
+
+ va_start(ap, msg);
+ fprintf(stderr, "%s:%d: error: ",
+ loc->filename, loc->line_number);
+ vfprintf(stderr, msg, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void *
+fail_on_null(void *p)
+{
+ if (p == NULL) {
+ fprintf(stderr, "aubinator: out of memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return p;
+}
+
+static char *
+xstrdup(const char *s)
+{
+ return fail_on_null(strdup(s));
+}
+
+static void *
+zalloc(size_t s)
+{
+ return calloc(s, 1);
+}
+
+static void *
+xzalloc(size_t s)
+{
+ return fail_on_null(zalloc(s));
+}
+
+/* We allow fields to have either a bit index, or append "b" for a byte index.
+ */
+static bool
+is_byte_offset(const char *value)
+{
+ return value[strlen(value) - 1] == 'b';
+}
+
+static void
+get_group_offset_count(const char **atts, uint32_t *offset, uint32_t *count,
+ uint32_t *size, bool *variable)
+{
+ char *p;
+ int i;
+
+ for (i = 0; atts[i]; i += 2) {
+ if (strcmp(atts[i], "count") == 0) {
+ *count = strtoul(atts[i + 1], &p, 0);
+ if (*count == 0)
+ *variable = true;
+ } else if (strcmp(atts[i], "start") == 0) {
+ *offset = strtoul(atts[i + 1], &p, 0);
+ } else if (strcmp(atts[i], "size") == 0) {
+ *size = strtoul(atts[i + 1], &p, 0);
+ }
+ }
+ return;
+}
+
+static struct v3d_group *
+create_group(struct parser_context *ctx,
+ const char *name,
+ const char **atts,
+ struct v3d_group *parent)
+{
+ struct v3d_group *group;
+
+ group = xzalloc(sizeof(*group));
+ if (name)
+ group->name = xstrdup(name);
+
+ group->spec = ctx->spec;
+ group->group_offset = 0;
+ group->group_count = 0;
+ group->variable = false;
+
+ if (parent) {
+ group->parent = parent;
+ get_group_offset_count(atts,
+ &group->group_offset,
+ &group->group_count,
+ &group->group_size,
+ &group->variable);
+ }
+
+ return group;
+}
+
+static struct v3d_enum *
+create_enum(struct parser_context *ctx, const char *name, const char **atts)
+{
+ struct v3d_enum *e;
+
+ e = xzalloc(sizeof(*e));
+ if (name)
+ e->name = xstrdup(name);
+
+ e->nvalues = 0;
+
+ return e;
+}
+
+static void
+get_register_offset(const char **atts, uint32_t *offset)
+{
+ char *p;
+ int i;
+
+ for (i = 0; atts[i]; i += 2) {
+ if (strcmp(atts[i], "num") == 0)
+ *offset = strtoul(atts[i + 1], &p, 0);
+ }
+ return;
+}
+
+static void
+get_start_end_pos(int *start, int *end)
+{
+ /* start value has to be mod with 32 as we need the relative
+ * start position in the first DWord. For the end position, add
+ * the length of the field to the start position to get the
+ * relative postion in the 64 bit address.
+ */
+ if (*end - *start > 32) {
+ int len = *end - *start;
+ *start = *start % 32;
+ *end = *start + len;
+ } else {
+ *start = *start % 32;
+ *end = *end % 32;
+ }
+
+ return;
+}
+
+static inline uint64_t
+mask(int start, int end)
+{
+ uint64_t v;
+
+ v = ~0ULL >> (63 - end + start);
+
+ return v << start;
+}
+
+static inline uint64_t
+field(uint64_t value, int start, int end)
+{
+ get_start_end_pos(&start, &end);
+ return (value & mask(start, end)) >> (start);
+}
+
+static inline uint64_t
+field_address(uint64_t value, int start, int end)
+{
+ /* no need to right shift for address/offset */
+ get_start_end_pos(&start, &end);
+ return (value & mask(start, end));
+}
+
+static struct v3d_type
+string_to_type(struct parser_context *ctx, const char *s)
+{
+ int i, f;
+ struct v3d_group *g;
+ struct v3d_enum *e;
+
+ if (strcmp(s, "int") == 0)
+ return (struct v3d_type) { .kind = V3D_TYPE_INT };
+ else if (strcmp(s, "uint") == 0)
+ return (struct v3d_type) { .kind = V3D_TYPE_UINT };
+ else if (strcmp(s, "bool") == 0)
+ return (struct v3d_type) { .kind = V3D_TYPE_BOOL };
+ else if (strcmp(s, "float") == 0)
+ return (struct v3d_type) { .kind = V3D_TYPE_FLOAT };
+ else if (strcmp(s, "address") == 0)
+ return (struct v3d_type) { .kind = V3D_TYPE_ADDRESS };
+ else if (strcmp(s, "offset") == 0)
+ return (struct v3d_type) { .kind = V3D_TYPE_OFFSET };
+ else if (sscanf(s, "u%d.%d", &i, &f) == 2)
+ return (struct v3d_type) { .kind = V3D_TYPE_UFIXED, .i = i, .f = f };
+ else if (sscanf(s, "s%d.%d", &i, &f) == 2)
+ return (struct v3d_type) { .kind = V3D_TYPE_SFIXED, .i = i, .f = f };
+ else if (g = v3d_spec_find_struct(ctx->spec, s), g != NULL)
+ return (struct v3d_type) { .kind = V3D_TYPE_STRUCT, .v3d_struct = g };
+ else if (e = v3d_spec_find_enum(ctx->spec, s), e != NULL)
+ return (struct v3d_type) { .kind = V3D_TYPE_ENUM, .v3d_enum = e };
+ else if (strcmp(s, "mbo") == 0)
+ return (struct v3d_type) { .kind = V3D_TYPE_MBO };
+ else
+ fail(&ctx->loc, "invalid type: %s", s);
+}
+
+static struct v3d_field *
+create_field(struct parser_context *ctx, const char **atts)
+{
+ struct v3d_field *field;
+ char *p;
+ int i;
+ uint32_t size = 0;
+
+ field = xzalloc(sizeof(*field));
+
+ for (i = 0; atts[i]; i += 2) {
+ if (strcmp(atts[i], "name") == 0)
+ field->name = xstrdup(atts[i + 1]);
+ else if (strcmp(atts[i], "start") == 0) {
+ field->start = strtoul(atts[i + 1], &p, 0);
+ if (is_byte_offset(atts[i + 1]))
+ field->start *= 8;
+ } else if (strcmp(atts[i], "end") == 0) {
+ field->end = strtoul(atts[i + 1], &p, 0) - 1;
+ if (is_byte_offset(atts[i + 1]))
+ field->end *= 8;
+ } else if (strcmp(atts[i], "size") == 0) {
+ size = strtoul(atts[i + 1], &p, 0);
+ if (is_byte_offset(atts[i + 1]))
+ size *= 8;
+ } else if (strcmp(atts[i], "type") == 0)
+ field->type = string_to_type(ctx, atts[i + 1]);
+ else if (strcmp(atts[i], "default") == 0) {
+ field->has_default = true;
+ field->default_value = strtoul(atts[i + 1], &p, 0);
+ }
+ }
+
+ if (size)
+ field->end = field->start + size - 1;
+
+ return field;
+}
+
+static struct v3d_value *
+create_value(struct parser_context *ctx, const char **atts)
+{
+ struct v3d_value *value = xzalloc(sizeof(*value));
+
+ for (int i = 0; atts[i]; i += 2) {
+ if (strcmp(atts[i], "name") == 0)
+ value->name = xstrdup(atts[i + 1]);
+ else if (strcmp(atts[i], "value") == 0)
+ value->value = strtoul(atts[i + 1], NULL, 0);
+ }
+
+ return value;
+}
+
+static void
+create_and_append_field(struct parser_context *ctx,
+ const char **atts)
+{
+ if (ctx->group->nfields == ctx->group->fields_size) {
+ ctx->group->fields_size = MAX2(ctx->group->fields_size * 2, 2);
+ ctx->group->fields =
+ (struct v3d_field **) realloc(ctx->group->fields,
+ sizeof(ctx->group->fields[0]) *
+ ctx->group->fields_size);
+ }
+
+ ctx->group->fields[ctx->group->nfields++] = create_field(ctx, atts);
+}
+
+static void
+set_group_opcode(struct v3d_group *group, const char **atts)
+{
+ char *p;
+ int i;
+
+ for (i = 0; atts[i]; i += 2) {
+ if (strcmp(atts[i], "code") == 0)
+ group->opcode = strtoul(atts[i + 1], &p, 0);
+ }
+ return;
+}
+
+static void
+start_element(void *data, const char *element_name, const char **atts)
+{
+ struct parser_context *ctx = data;
+ int i;
+ const char *name = NULL;
+ const char *ver = NULL;
+
+ ctx->loc.line_number = XML_GetCurrentLineNumber(ctx->parser);
+
+ for (i = 0; atts[i]; i += 2) {
+ if (strcmp(atts[i], "name") == 0)
+ name = atts[i + 1];
+ else if (strcmp(atts[i], "gen") == 0)
+ ver = atts[i + 1];
+ }
+
+ if (strcmp(element_name, "vcxml") == 0) {
+ if (ver == NULL)
+ fail(&ctx->loc, "no ver given");
+
+ int major, minor;
+ int n = sscanf(ver, "%d.%d", &major, &minor);
+ if (n == 0)
+ fail(&ctx->loc, "invalid ver given: %s", ver);
+ if (n == 1)
+ minor = 0;
+
+ ctx->spec->ver = major * 10 + minor;
+ } else if (strcmp(element_name, "packet") == 0 ||
+ strcmp(element_name, "struct") == 0) {
+ ctx->group = create_group(ctx, name, atts, NULL);
+
+ if (strcmp(element_name, "packet") == 0)
+ set_group_opcode(ctx->group, atts);
+ } else if (strcmp(element_name, "register") == 0) {
+ ctx->group = create_group(ctx, name, atts, NULL);
+ get_register_offset(atts, &ctx->group->register_offset);
+ } else if (strcmp(element_name, "group") == 0) {
+ struct v3d_group *previous_group = ctx->group;
+ while (previous_group->next)
+ previous_group = previous_group->next;
+
+ struct v3d_group *group = create_group(ctx, "", atts,
+ ctx->group);
+ previous_group->next = group;
+ ctx->group = group;
+ } else if (strcmp(element_name, "field") == 0) {
+ create_and_append_field(ctx, atts);
+ } else if (strcmp(element_name, "enum") == 0) {
+ ctx->enoom = create_enum(ctx, name, atts);
+ } else if (strcmp(element_name, "value") == 0) {
+ ctx->values[ctx->nvalues++] = create_value(ctx, atts);
+ assert(ctx->nvalues < ARRAY_SIZE(ctx->values));
+ }
+
+}
+
+static void
+end_element(void *data, const char *name)
+{
+ struct parser_context *ctx = data;
+ struct v3d_spec *spec = ctx->spec;
+
+ if (strcmp(name, "packet") == 0 ||
+ strcmp(name, "struct") == 0 ||
+ strcmp(name, "register") == 0) {
+ struct v3d_group *group = ctx->group;
+
+ ctx->group = ctx->group->parent;
+
+ if (strcmp(name, "packet") == 0) {
+ spec->commands[spec->ncommands++] = group;
+
+ /* V3D packet XML has the packet contents with offsets
+ * starting from the first bit after the opcode, to
+ * match the spec. Shift the fields up now.
+ */
+ for (int i = 0; i < group->nfields; i++) {
+ group->fields[i]->start += 8;
+ group->fields[i]->end += 8;
+ }
+ }
+ else if (strcmp(name, "struct") == 0)
+ spec->structs[spec->nstructs++] = group;
+ else if (strcmp(name, "register") == 0)
+ spec->registers[spec->nregisters++] = group;
+
+ assert(spec->ncommands < ARRAY_SIZE(spec->commands));
+ assert(spec->nstructs < ARRAY_SIZE(spec->structs));
+ assert(spec->nregisters < ARRAY_SIZE(spec->registers));
+ } else if (strcmp(name, "group") == 0) {
+ ctx->group = ctx->group->parent;
+ } else if (strcmp(name, "field") == 0) {
+ assert(ctx->group->nfields > 0);
+ struct v3d_field *field = ctx->group->fields[ctx->group->nfields - 1];
+ size_t size = ctx->nvalues * sizeof(ctx->values[0]);
+ field->inline_enum.values = xzalloc(size);
+ field->inline_enum.nvalues = ctx->nvalues;
+ memcpy(field->inline_enum.values, ctx->values, size);
+ ctx->nvalues = 0;
+ } else if (strcmp(name, "enum") == 0) {
+ struct v3d_enum *e = ctx->enoom;
+ size_t size = ctx->nvalues * sizeof(ctx->values[0]);
+ e->values = xzalloc(size);
+ e->nvalues = ctx->nvalues;
+ memcpy(e->values, ctx->values, size);
+ ctx->nvalues = 0;
+ ctx->enoom = NULL;
+ spec->enums[spec->nenums++] = e;
+ }
+}
+
+static void
+character_data(void *data, const XML_Char *s, int len)
+{
+}
+
+static uint32_t zlib_inflate(const void *compressed_data,
+ uint32_t compressed_len,
+ void **out_ptr)
+{
+ struct z_stream_s zstream;
+ void *out;
+
+ memset(&zstream, 0, sizeof(zstream));
+
+ zstream.next_in = (unsigned char *)compressed_data;
+ zstream.avail_in = compressed_len;
+
+ if (inflateInit(&zstream) != Z_OK)
+ return 0;
+
+ out = malloc(4096);
+ zstream.next_out = out;
+ zstream.avail_out = 4096;
+
+ do {
+ switch (inflate(&zstream, Z_SYNC_FLUSH)) {
+ case Z_STREAM_END:
+ goto end;
+ case Z_OK:
+ break;
+ default:
+ inflateEnd(&zstream);
+ return 0;
+ }
+
+ if (zstream.avail_out)
+ break;
+
+ out = realloc(out, 2*zstream.total_out);
+ if (out == NULL) {
+ inflateEnd(&zstream);
+ return 0;
+ }
+
+ zstream.next_out = (unsigned char *)out + zstream.total_out;
+ zstream.avail_out = zstream.total_out;
+ } while (1);
+ end:
+ inflateEnd(&zstream);
+ *out_ptr = out;
+ return zstream.total_out;
+}
+
+struct v3d_spec *
+v3d_spec_load(const struct v3d_device_info *devinfo)
+{
+ struct parser_context ctx;
+ void *buf;
+ uint8_t *text_data = NULL;
+ uint32_t text_offset = 0, text_length = 0, total_length;
+
+ for (int i = 0; i < ARRAY_SIZE(genxml_files_table); i++) {
+ if (genxml_files_table[i].gen_10 == devinfo->ver) {
+ text_offset = genxml_files_table[i].offset;
+ text_length = genxml_files_table[i].length;
+ break;
+ }
+ }
+
+ if (text_length == 0) {
+ fprintf(stderr, "unable to find gen (%u) data\n", devinfo->ver);
+ return NULL;
+ }
+
+ memset(&ctx, 0, sizeof ctx);
+ ctx.parser = XML_ParserCreate(NULL);
+ XML_SetUserData(ctx.parser, &ctx);
+ if (ctx.parser == NULL) {
+ fprintf(stderr, "failed to create parser\n");
+ return NULL;
+ }
+
+ XML_SetElementHandler(ctx.parser, start_element, end_element);
+ XML_SetCharacterDataHandler(ctx.parser, character_data);
+
+ ctx.spec = xzalloc(sizeof(*ctx.spec));
+
+ total_length = zlib_inflate(compress_genxmls,
+ sizeof(compress_genxmls),
+ (void **) &text_data);
+ assert(text_offset + text_length <= total_length);
+
+ buf = XML_GetBuffer(ctx.parser, text_length);
+ memcpy(buf, &text_data[text_offset], text_length);
+
+ if (XML_ParseBuffer(ctx.parser, text_length, true) == 0) {
+ fprintf(stderr,
+ "Error parsing XML at line %ld col %ld byte %ld/%u: %s\n",
+ XML_GetCurrentLineNumber(ctx.parser),
+ XML_GetCurrentColumnNumber(ctx.parser),
+ XML_GetCurrentByteIndex(ctx.parser), text_length,
+ XML_ErrorString(XML_GetErrorCode(ctx.parser)));
+ XML_ParserFree(ctx.parser);
+ free(text_data);
+ return NULL;
+ }
+
+ XML_ParserFree(ctx.parser);
+ free(text_data);
+
+ return ctx.spec;
+}
+
+struct v3d_group *
+v3d_spec_find_instruction(struct v3d_spec *spec, const uint8_t *p)
+{
+ uint8_t opcode = *p;
+
+ for (int i = 0; i < spec->ncommands; i++) {
+ struct v3d_group *group = spec->commands[i];
+
+ if (opcode != group->opcode)
+ continue;
+
+ /* If there's a "sub-id" field, make sure that it matches the
+ * instruction being decoded.
+ */
+ struct v3d_field *subid = NULL;
+ for (int j = 0; j < group->nfields; j++) {
+ struct v3d_field *field = group->fields[j];
+ if (strcmp(field->name, "sub-id") == 0) {
+ subid = field;
+ break;
+ }
+ }
+
+ if (subid && (__gen_unpack_uint(p, subid->start, subid->end) !=
+ subid->default_value)) {
+ continue;
+ }
+
+ return group;
+ }
+
+ return NULL;
+}
+
+/** Returns the size of a V3D packet. */
+int
+v3d_group_get_length(struct v3d_group *group)
+{
+ int last_bit = 0;
+ for (int i = 0; i < group->nfields; i++) {
+ struct v3d_field *field = group->fields[i];
+
+ last_bit = MAX2(last_bit, field->end);
+ }
+ return last_bit / 8 + 1;
+}
+
+void
+v3d_field_iterator_init(struct v3d_field_iterator *iter,
+ struct v3d_group *group,
+ const uint8_t *p,
+ bool print_colors)
+{
+ memset(iter, 0, sizeof(*iter));
+
+ iter->group = group;
+ iter->p = p;
+ iter->print_colors = print_colors;
+}
+
+static const char *
+v3d_get_enum_name(struct v3d_enum *e, uint64_t value)
+{
+ for (int i = 0; i < e->nvalues; i++) {
+ if (e->values[i]->value == value) {
+ return e->values[i]->name;
+ }
+ }
+ return NULL;
+}
+
+static bool
+iter_more_fields(const struct v3d_field_iterator *iter)
+{
+ return iter->field_iter < iter->group->nfields;
+}
+
+static uint32_t
+iter_group_offset_bits(const struct v3d_field_iterator *iter,
+ uint32_t group_iter)
+{
+ return iter->group->group_offset + (group_iter *
+ iter->group->group_size);
+}
+
+static bool
+iter_more_groups(const struct v3d_field_iterator *iter)
+{
+ if (iter->group->variable) {
+ return iter_group_offset_bits(iter, iter->group_iter + 1) <
+ (v3d_group_get_length(iter->group) * 8);
+ } else {
+ return (iter->group_iter + 1) < iter->group->group_count ||
+ iter->group->next != NULL;
+ }
+}
+
+static void
+iter_advance_group(struct v3d_field_iterator *iter)
+{
+ if (iter->group->variable)
+ iter->group_iter++;
+ else {
+ if ((iter->group_iter + 1) < iter->group->group_count) {
+ iter->group_iter++;
+ } else {
+ iter->group = iter->group->next;
+ iter->group_iter = 0;
+ }
+ }
+
+ iter->field_iter = 0;
+}
+
+static bool
+iter_advance_field(struct v3d_field_iterator *iter)
+{
+ while (!iter_more_fields(iter)) {
+ if (!iter_more_groups(iter))
+ return false;
+
+ iter_advance_group(iter);
+ }
+
+ iter->field = iter->group->fields[iter->field_iter++];
+ if (iter->field->name)
+ strncpy(iter->name, iter->field->name, sizeof(iter->name));
+ else
+ memset(iter->name, 0, sizeof(iter->name));
+ iter->offset = iter_group_offset_bits(iter, iter->group_iter) / 8 +
+ iter->field->start / 8;
+ iter->struct_desc = NULL;
+
+ return true;
+}
+
+bool
+v3d_field_iterator_next(struct v3d_field_iterator *iter)
+{
+ if (!iter_advance_field(iter))
+ return false;
+
+ const char *enum_name = NULL;
+
+ int s = iter->field->start;
+ int e = iter->field->end;
+
+ switch (iter->field->type.kind) {
+ case V3D_TYPE_UNKNOWN:
+ case V3D_TYPE_INT: {
+ uint32_t value = __gen_unpack_sint(iter->p, s, e);
+ snprintf(iter->value, sizeof(iter->value), "%d", value);
+ enum_name = v3d_get_enum_name(&iter->field->inline_enum, value);
+ break;
+ }
+ case V3D_TYPE_UINT: {
+ uint32_t value = __gen_unpack_uint(iter->p, s, e);
+ snprintf(iter->value, sizeof(iter->value), "%u", value);
+ enum_name = v3d_get_enum_name(&iter->field->inline_enum, value);
+ break;
+ }
+ case V3D_TYPE_BOOL: {
+ const char *true_string =
+ iter->print_colors ? "\e[0;35mtrue\e[0m" : "true";
+ snprintf(iter->value, sizeof(iter->value), "%s",
+ __gen_unpack_uint(iter->p, s, e) ?
+ true_string : "false");
+ break;
+ }
+ case V3D_TYPE_FLOAT:
+ snprintf(iter->value, sizeof(iter->value), "%f",
+ __gen_unpack_float(iter->p, s, e));
+ break;
+ case V3D_TYPE_ADDRESS:
+ case V3D_TYPE_OFFSET:
+ snprintf(iter->value, sizeof(iter->value), "0x%08"PRIx64,
+ __gen_unpack_uint(iter->p, s, e) << (31 - (e - s)));
+ break;
+ case V3D_TYPE_STRUCT:
+ snprintf(iter->value, sizeof(iter->value), "",
+ iter->field->type.v3d_struct->name);
+ iter->struct_desc =
+ v3d_spec_find_struct(iter->group->spec,
+ iter->field->type.v3d_struct->name);
+ break;
+ case V3D_TYPE_SFIXED:
+ snprintf(iter->value, sizeof(iter->value), "%f",
+ __gen_unpack_sfixed(iter->p, s, e,
+ iter->field->type.f));
+ break;
+ case V3D_TYPE_UFIXED:
+ snprintf(iter->value, sizeof(iter->value), "%f",
+ __gen_unpack_ufixed(iter->p, s, e,
+ iter->field->type.f));
+ break;
+ case V3D_TYPE_MBO:
+ break;
+ case V3D_TYPE_ENUM: {
+ uint32_t value = __gen_unpack_uint(iter->p, s, e);
+ snprintf(iter->value, sizeof(iter->value), "%d", value);
+ enum_name = v3d_get_enum_name(iter->field->type.v3d_enum, value);
+ break;
+ }
+ }
+
+ if (strlen(iter->group->name) == 0) {
+ int length = strlen(iter->name);
+ snprintf(iter->name + length, sizeof(iter->name) - length,
+ "[%i]", iter->group_iter);
+ }
+
+ if (enum_name) {
+ int length = strlen(iter->value);
+ snprintf(iter->value + length, sizeof(iter->value) - length,
+ " (%s)", enum_name);
+ }
+
+ return true;
+}
+
+void
+v3d_print_group(FILE *outfile, struct v3d_group *group,
+ uint64_t offset, const uint8_t *p, bool color)
+{
+ struct v3d_field_iterator iter;
+
+ v3d_field_iterator_init(&iter, group, p, color);
+ while (v3d_field_iterator_next(&iter)) {
+ fprintf(outfile, " %s: %s\n", iter.name, iter.value);
+ if (iter.struct_desc) {
+ uint64_t struct_offset = offset + iter.offset;
+ v3d_print_group(outfile, iter.struct_desc,
+ struct_offset,
+ &p[iter.offset], color);
+ }
+ }
+}
diff -Nru mesa-17.2.4/src/broadcom/cle/v3d_decoder.h mesa-17.3.3/src/broadcom/cle/v3d_decoder.h
--- mesa-17.2.4/src/broadcom/cle/v3d_decoder.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/v3d_decoder.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,146 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef V3D_DECODER_H
+#define V3D_DECODER_H
+
+#include
+#include
+#include
+
+#include "broadcom/common/v3d_device_info.h"
+
+struct v3d_spec;
+struct v3d_group;
+struct v3d_field;
+
+struct v3d_group *v3d_spec_find_struct(struct v3d_spec *spec, const char *name);
+struct v3d_spec *v3d_spec_load(const struct v3d_device_info *devinfo);
+struct v3d_group *v3d_spec_find_instruction(struct v3d_spec *spec, const uint8_t *p);
+struct v3d_group *v3d_spec_find_register(struct v3d_spec *spec, uint32_t offset);
+struct v3d_group *v3d_spec_find_register_by_name(struct v3d_spec *spec, const char *name);
+int v3d_group_get_length(struct v3d_group *group);
+const char *v3d_group_get_name(struct v3d_group *group);
+uint8_t v3d_group_get_opcode(struct v3d_group *group);
+struct v3d_enum *v3d_spec_find_enum(struct v3d_spec *spec, const char *name);
+
+struct v3d_field_iterator {
+ struct v3d_group *group;
+ char name[128];
+ char value[128];
+ struct v3d_group *struct_desc;
+ const uint8_t *p;
+ int offset; /**< current field starts at &p[offset] */
+
+ int field_iter;
+ int group_iter;
+
+ struct v3d_field *field;
+ bool print_colors;
+};
+
+struct v3d_group {
+ struct v3d_spec *spec;
+ char *name;
+
+ struct v3d_field **fields;
+ uint32_t nfields;
+ uint32_t fields_size;
+
+ uint32_t group_offset, group_count;
+ uint32_t group_size;
+ bool variable;
+
+ struct v3d_group *parent;
+ struct v3d_group *next;
+
+ uint8_t opcode;
+
+ /* Register specific */
+ uint32_t register_offset;
+};
+
+struct v3d_value {
+ char *name;
+ uint64_t value;
+};
+
+struct v3d_enum {
+ char *name;
+ int nvalues;
+ struct v3d_value **values;
+};
+
+struct v3d_type {
+ enum {
+ V3D_TYPE_UNKNOWN,
+ V3D_TYPE_INT,
+ V3D_TYPE_UINT,
+ V3D_TYPE_BOOL,
+ V3D_TYPE_FLOAT,
+ V3D_TYPE_ADDRESS,
+ V3D_TYPE_OFFSET,
+ V3D_TYPE_STRUCT,
+ V3D_TYPE_UFIXED,
+ V3D_TYPE_SFIXED,
+ V3D_TYPE_MBO,
+ V3D_TYPE_ENUM
+ } kind;
+
+ /* Struct definition for V3D_TYPE_STRUCT */
+ union {
+ struct v3d_group *v3d_struct;
+ struct v3d_enum *v3d_enum;
+ struct {
+ /* Integer and fractional sizes for V3D_TYPE_UFIXED and
+ * V3D_TYPE_SFIXED
+ */
+ int i, f;
+ };
+ };
+};
+
+struct v3d_field {
+ char *name;
+ int start, end;
+ struct v3d_type type;
+ bool has_default;
+ uint32_t default_value;
+
+ struct v3d_enum inline_enum;
+};
+
+void v3d_field_iterator_init(struct v3d_field_iterator *iter,
+ struct v3d_group *group,
+ const uint8_t *p,
+ bool print_colors);
+
+bool v3d_field_iterator_next(struct v3d_field_iterator *iter);
+
+void v3d_print_group(FILE *out,
+ struct v3d_group *group,
+ uint64_t offset, const uint8_t *p,
+ bool color);
+
+#endif /* V3D_DECODER_H */
diff -Nru mesa-17.2.4/src/broadcom/cle/v3d_packet_v21_pack.h mesa-17.3.3/src/broadcom/cle/v3d_packet_v21_pack.h
--- mesa-17.2.4/src/broadcom/cle/v3d_packet_v21_pack.h 2017-10-30 14:50:52.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/v3d_packet_v21_pack.h 2018-01-18 21:31:08.000000000 +0000
@@ -399,7 +399,7 @@
struct V3D21_STORE_FULL_RESOLUTION_TILE_BUFFER * restrict values)
{
values->opcode = __gen_unpack_uint(cl, 0, 7);
- values->address = __gen_unpack_address(cl, 8, 39);
+ values->address = __gen_unpack_address(cl, 12, 39);
values->last_tile = __gen_unpack_uint(cl, 11, 11);
values->disable_clear_on_write = __gen_unpack_uint(cl, 10, 10);
values->disable_z_stencil_buffer_write = __gen_unpack_uint(cl, 9, 9);
@@ -445,7 +445,7 @@
struct V3D21_RE_LOAD_FULL_RESOLUTION_TILE_BUFFER * restrict values)
{
values->opcode = __gen_unpack_uint(cl, 0, 7);
- values->address = __gen_unpack_address(cl, 8, 39);
+ values->address = __gen_unpack_address(cl, 12, 39);
values->disable_z_stencil_buffer_read = __gen_unpack_uint(cl, 9, 9);
values->disable_color_buffer_read = __gen_unpack_uint(cl, 8, 8);
}
@@ -523,7 +523,7 @@
struct V3D21_STORE_TILE_BUFFER_GENERAL * restrict values)
{
values->opcode = __gen_unpack_uint(cl, 0, 7);
- values->memory_base_address_of_frame_tile_dump_buffer = __gen_unpack_address(cl, 24, 55);
+ values->memory_base_address_of_frame_tile_dump_buffer = __gen_unpack_address(cl, 28, 55);
values->last_tile_of_frame = __gen_unpack_uint(cl, 27, 27);
values->disable_vg_mask_buffer_dump = __gen_unpack_uint(cl, 26, 26);
values->disable_z_stencil_buffer_dump = __gen_unpack_uint(cl, 25, 25);
@@ -602,7 +602,7 @@
struct V3D21_LOAD_TILE_BUFFER_GENERAL * restrict values)
{
values->opcode = __gen_unpack_uint(cl, 0, 7);
- values->memory_base_address_of_frame_tile_dump_buffer = __gen_unpack_address(cl, 24, 55);
+ values->memory_base_address_of_frame_tile_dump_buffer = __gen_unpack_address(cl, 28, 55);
values->disable_vg_mask_buffer_load = __gen_unpack_uint(cl, 26, 26);
values->disable_z_stencil_buffer_load = __gen_unpack_uint(cl, 25, 25);
values->disable_color_buffer_load = __gen_unpack_uint(cl, 24, 24);
@@ -1150,8 +1150,8 @@
struct V3D21_VIEWPORT_OFFSET {
uint32_t opcode;
- int32_t viewport_centre_y_coordinate;
- int32_t viewport_centre_x_coordinate;
+ float viewport_centre_y_coordinate;
+ float viewport_centre_x_coordinate;
};
static inline void
@@ -1160,13 +1160,13 @@
{
cl[ 0] = __gen_uint(values->opcode, 0, 7);
- cl[ 1] = __gen_sint(values->viewport_centre_x_coordinate, 0, 15);
+ cl[ 1] = __gen_sfixed(values->viewport_centre_x_coordinate, 0, 15, 4);
- cl[ 2] = __gen_sint(values->viewport_centre_x_coordinate, 0, 15) >> 8;
+ cl[ 2] = __gen_sfixed(values->viewport_centre_x_coordinate, 0, 15, 4) >> 8;
- cl[ 3] = __gen_sint(values->viewport_centre_y_coordinate, 0, 15);
+ cl[ 3] = __gen_sfixed(values->viewport_centre_y_coordinate, 0, 15, 4);
- cl[ 4] = __gen_sint(values->viewport_centre_y_coordinate, 0, 15) >> 8;
+ cl[ 4] = __gen_sfixed(values->viewport_centre_y_coordinate, 0, 15, 4) >> 8;
}
@@ -1177,8 +1177,8 @@
struct V3D21_VIEWPORT_OFFSET * restrict values)
{
values->opcode = __gen_unpack_uint(cl, 0, 7);
- values->viewport_centre_y_coordinate = __gen_unpack_sint(cl, 24, 39);
- values->viewport_centre_x_coordinate = __gen_unpack_sint(cl, 8, 23);
+ values->viewport_centre_y_coordinate = __gen_unpack_sfixed(cl, 24, 39, 4);
+ values->viewport_centre_x_coordinate = __gen_unpack_sfixed(cl, 8, 23, 4);
}
#endif
@@ -1520,7 +1520,9 @@
#endif
-#define V3D21_SHADER_RECORD_header
+#define V3D21_SHADER_RECORD_header \
+
+
struct V3D21_SHADER_RECORD {
bool fragment_shader_is_single_threaded;
bool point_size_included_in_shaded_vertex_data;
@@ -1639,7 +1641,9 @@
#endif
-#define V3D21_ATTRIBUTE_RECORD_header
+#define V3D21_ATTRIBUTE_RECORD_header \
+
+
struct V3D21_ATTRIBUTE_RECORD {
__gen_address_type address;
uint32_t number_of_bytes_minus_1;
diff -Nru mesa-17.2.4/src/broadcom/cle/v3d_packet_v21.xml mesa-17.3.3/src/broadcom/cle/v3d_packet_v21.xml
--- mesa-17.2.4/src/broadcom/cle/v3d_packet_v21.xml 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/v3d_packet_v21.xml 2018-01-18 21:30:28.000000000 +0000
@@ -18,7 +18,7 @@
-
+
@@ -26,13 +26,13 @@
-
+
-
+
@@ -69,7 +69,7 @@
-
+
@@ -212,8 +212,8 @@
-
-
+
+
diff -Nru mesa-17.2.4/src/broadcom/cle/v3d_packet_v33_pack.h mesa-17.3.3/src/broadcom/cle/v3d_packet_v33_pack.h
--- mesa-17.2.4/src/broadcom/cle/v3d_packet_v33_pack.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/v3d_packet_v33_pack.h 2018-01-18 21:31:08.000000000 +0000
@@ -0,0 +1,3733 @@
+/* Generated code, see packets.xml and gen_packet_header.py */
+
+
+/* Packets, enums and structures for V3D 3.3.
+ *
+ * This file has been generated, do not hand edit.
+ */
+
+#ifndef V3D33_PACK_H
+#define V3D33_PACK_H
+
+#include "v3d_packet_helpers.h"
+
+
+enum V3D33_Compare_Function {
+ V3D_COMPARE_FUNC_NEVER = 0,
+ V3D_COMPARE_FUNC_LESS = 1,
+ V3D_COMPARE_FUNC_EQUAL = 2,
+ V3D_COMPARE_FUNC_LEQUAL = 3,
+ V3D_COMPARE_FUNC_GREATER = 4,
+ V3D_COMPARE_FUNC_NOTEQUAL = 5,
+ V3D_COMPARE_FUNC_GEQUAL = 6,
+ V3D_COMPARE_FUNC_ALWAYS = 7,
+};
+
+enum V3D33_Blend_Factor {
+ V3D_BLEND_FACTOR_ZERO = 0,
+ V3D_BLEND_FACTOR_ONE = 1,
+ V3D_BLEND_FACTOR_SRC_COLOR = 2,
+ V3D_BLEND_FACTOR_INV_SRC_COLOR = 3,
+ V3D_BLEND_FACTOR_DST_COLOR = 4,
+ V3D_BLEND_FACTOR_INV_DST_COLOR = 5,
+ V3D_BLEND_FACTOR_SRC_ALPHA = 6,
+ V3D_BLEND_FACTOR_INV_SRC_ALPHA = 7,
+ V3D_BLEND_FACTOR_DST_ALPHA = 8,
+ V3D_BLEND_FACTOR_INV_DST_ALPHA = 9,
+ V3D_BLEND_FACTOR_CONST_COLOR = 10,
+ V3D_BLEND_FACTOR_INV_CONST_COLOR = 11,
+ V3D_BLEND_FACTOR_CONST_ALPHA = 12,
+ V3D_BLEND_FACTOR_INV_CONST_ALPHA = 13,
+ V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE = 14,
+};
+
+enum V3D33_Blend_Mode {
+ V3D_BLEND_MODE_ADD = 0,
+ V3D_BLEND_MODE_SUB = 1,
+ V3D_BLEND_MODE_RSUB = 2,
+ V3D_BLEND_MODE_MIN = 3,
+ V3D_BLEND_MODE_MAX = 4,
+ V3D_BLEND_MODE_MUL = 5,
+ V3D_BLEND_MODE_SCREEN = 6,
+ V3D_BLEND_MODE_DARKEN = 7,
+ V3D_BLEND_MODE_LIGHTEN = 8,
+};
+
+enum V3D33_Stencil_Op {
+ V3D_STENCIL_OP_ZERO = 0,
+ V3D_STENCIL_OP_KEEP = 1,
+ V3D_STENCIL_OP_REPLACE = 2,
+ V3D_STENCIL_OP_INCR = 3,
+ V3D_STENCIL_OP_DECR = 4,
+ V3D_STENCIL_OP_INVERT = 5,
+ V3D_STENCIL_OP_INCWRAP = 6,
+ V3D_STENCIL_OP_DECWRAP = 7,
+};
+
+enum V3D33_Primitive {
+ V3D_PRIM_POINTS = 0,
+ V3D_PRIM_LINES = 1,
+ V3D_PRIM_LINE_LOOP = 2,
+ V3D_PRIM_LINE_STRIP = 3,
+ V3D_PRIM_TRIANGLES = 4,
+ V3D_PRIM_TRIANGLE_STRIP = 5,
+ V3D_PRIM_TRIANGLE_FAN = 6,
+ V3D_PRIM_POINTS_TF = 16,
+ V3D_PRIM_LINES_TF = 17,
+ V3D_PRIM_LINE_LOOP_TF = 18,
+ V3D_PRIM_LINE_STRIP_TF = 19,
+ V3D_PRIM_TRIANGLES_TF = 20,
+ V3D_PRIM_TRIANGLE_STRIP_TF = 21,
+ V3D_PRIM_TRIANGLE_FAN_TF = 22,
+};
+
+#define V3D33_HALT_opcode 0
+#define V3D33_HALT_header \
+ .opcode = 0
+
+struct V3D33_HALT {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_HALT_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_HALT * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_HALT_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_HALT_unpack(const uint8_t * restrict cl,
+ struct V3D33_HALT * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_NOP_opcode 1
+#define V3D33_NOP_header \
+ .opcode = 1
+
+struct V3D33_NOP {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_NOP_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_NOP * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_NOP_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_NOP_unpack(const uint8_t * restrict cl,
+ struct V3D33_NOP * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_FLUSH_opcode 4
+#define V3D33_FLUSH_header \
+ .opcode = 4
+
+struct V3D33_FLUSH {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_FLUSH_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_FLUSH * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_FLUSH_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_FLUSH_unpack(const uint8_t * restrict cl,
+ struct V3D33_FLUSH * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_FLUSH_ALL_STATE_opcode 5
+#define V3D33_FLUSH_ALL_STATE_header \
+ .opcode = 5
+
+struct V3D33_FLUSH_ALL_STATE {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_FLUSH_ALL_STATE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_FLUSH_ALL_STATE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_FLUSH_ALL_STATE_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_FLUSH_ALL_STATE_unpack(const uint8_t * restrict cl,
+ struct V3D33_FLUSH_ALL_STATE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_START_TILE_BINNING_opcode 6
+#define V3D33_START_TILE_BINNING_header \
+ .opcode = 6
+
+struct V3D33_START_TILE_BINNING {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_START_TILE_BINNING_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_START_TILE_BINNING * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_START_TILE_BINNING_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_START_TILE_BINNING_unpack(const uint8_t * restrict cl,
+ struct V3D33_START_TILE_BINNING * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_INCREMENT_SEMAPHORE_opcode 7
+#define V3D33_INCREMENT_SEMAPHORE_header \
+ .opcode = 7
+
+struct V3D33_INCREMENT_SEMAPHORE {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_INCREMENT_SEMAPHORE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_INCREMENT_SEMAPHORE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_INCREMENT_SEMAPHORE_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_INCREMENT_SEMAPHORE_unpack(const uint8_t * restrict cl,
+ struct V3D33_INCREMENT_SEMAPHORE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_WAIT_ON_SEMAPHORE_opcode 8
+#define V3D33_WAIT_ON_SEMAPHORE_header \
+ .opcode = 8
+
+struct V3D33_WAIT_ON_SEMAPHORE {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_WAIT_ON_SEMAPHORE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_WAIT_ON_SEMAPHORE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_WAIT_ON_SEMAPHORE_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_WAIT_ON_SEMAPHORE_unpack(const uint8_t * restrict cl,
+ struct V3D33_WAIT_ON_SEMAPHORE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_WAIT_FOR_PREVIOUS_FRAME_opcode 9
+#define V3D33_WAIT_FOR_PREVIOUS_FRAME_header \
+ .opcode = 9
+
+struct V3D33_WAIT_FOR_PREVIOUS_FRAME {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_WAIT_FOR_PREVIOUS_FRAME_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_WAIT_FOR_PREVIOUS_FRAME * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_WAIT_FOR_PREVIOUS_FRAME_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_WAIT_FOR_PREVIOUS_FRAME_unpack(const uint8_t * restrict cl,
+ struct V3D33_WAIT_FOR_PREVIOUS_FRAME * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_ENABLE_Z_ONLY_RENDERING_opcode 10
+#define V3D33_ENABLE_Z_ONLY_RENDERING_header \
+ .opcode = 10
+
+struct V3D33_ENABLE_Z_ONLY_RENDERING {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_ENABLE_Z_ONLY_RENDERING_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_ENABLE_Z_ONLY_RENDERING * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_ENABLE_Z_ONLY_RENDERING_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_ENABLE_Z_ONLY_RENDERING_unpack(const uint8_t * restrict cl,
+ struct V3D33_ENABLE_Z_ONLY_RENDERING * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_DISABLE_Z_ONLY_RENDERING_opcode 11
+#define V3D33_DISABLE_Z_ONLY_RENDERING_header \
+ .opcode = 11
+
+struct V3D33_DISABLE_Z_ONLY_RENDERING {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_DISABLE_Z_ONLY_RENDERING_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_DISABLE_Z_ONLY_RENDERING * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_DISABLE_Z_ONLY_RENDERING_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_DISABLE_Z_ONLY_RENDERING_unpack(const uint8_t * restrict cl,
+ struct V3D33_DISABLE_Z_ONLY_RENDERING * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_END_OF_Z_ONLY_RENDERING_IN_FRAME_opcode 12
+#define V3D33_END_OF_Z_ONLY_RENDERING_IN_FRAME_header\
+ .opcode = 12
+
+struct V3D33_END_OF_Z_ONLY_RENDERING_IN_FRAME {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_END_OF_Z_ONLY_RENDERING_IN_FRAME_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_END_OF_Z_ONLY_RENDERING_IN_FRAME * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_END_OF_Z_ONLY_RENDERING_IN_FRAME_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_END_OF_Z_ONLY_RENDERING_IN_FRAME_unpack(const uint8_t * restrict cl,
+ struct V3D33_END_OF_Z_ONLY_RENDERING_IN_FRAME * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_END_OF_RENDERING_opcode 13
+#define V3D33_END_OF_RENDERING_header \
+ .opcode = 13
+
+struct V3D33_END_OF_RENDERING {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_END_OF_RENDERING_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_END_OF_RENDERING * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_END_OF_RENDERING_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_END_OF_RENDERING_unpack(const uint8_t * restrict cl,
+ struct V3D33_END_OF_RENDERING * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_WAIT_FOR_TRANSFORM_FEEDBACK_opcode 14
+#define V3D33_WAIT_FOR_TRANSFORM_FEEDBACK_header\
+ .opcode = 14
+
+struct V3D33_WAIT_FOR_TRANSFORM_FEEDBACK {
+ uint32_t opcode;
+ uint32_t block_count;
+};
+
+static inline void
+V3D33_WAIT_FOR_TRANSFORM_FEEDBACK_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_WAIT_FOR_TRANSFORM_FEEDBACK * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->block_count, 0, 7);
+
+}
+
+#define V3D33_WAIT_FOR_TRANSFORM_FEEDBACK_length 2
+#ifdef __gen_unpack_address
+static inline void
+V3D33_WAIT_FOR_TRANSFORM_FEEDBACK_unpack(const uint8_t * restrict cl,
+ struct V3D33_WAIT_FOR_TRANSFORM_FEEDBACK * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->block_count = __gen_unpack_uint(cl, 8, 15);
+}
+#endif
+
+
+#define V3D33_BRANCH_TO_AUTO_CHAINED_SUB_LIST_opcode 15
+#define V3D33_BRANCH_TO_AUTO_CHAINED_SUB_LIST_header\
+ .opcode = 15
+
+struct V3D33_BRANCH_TO_AUTO_CHAINED_SUB_LIST {
+ uint32_t opcode;
+ __gen_address_type address;
+};
+
+static inline void
+V3D33_BRANCH_TO_AUTO_CHAINED_SUB_LIST_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_BRANCH_TO_AUTO_CHAINED_SUB_LIST * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 1] = __gen_address_offset(&values->address);
+
+ cl[ 2] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 3] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 4] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_BRANCH_TO_AUTO_CHAINED_SUB_LIST_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_BRANCH_TO_AUTO_CHAINED_SUB_LIST_unpack(const uint8_t * restrict cl,
+ struct V3D33_BRANCH_TO_AUTO_CHAINED_SUB_LIST * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_BRANCH_opcode 16
+#define V3D33_BRANCH_header \
+ .opcode = 16
+
+struct V3D33_BRANCH {
+ uint32_t opcode;
+ __gen_address_type address;
+};
+
+static inline void
+V3D33_BRANCH_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_BRANCH * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 1] = __gen_address_offset(&values->address);
+
+ cl[ 2] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 3] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 4] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_BRANCH_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_BRANCH_unpack(const uint8_t * restrict cl,
+ struct V3D33_BRANCH * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_BRANCH_TO_SUB_LIST_opcode 17
+#define V3D33_BRANCH_TO_SUB_LIST_header \
+ .opcode = 17
+
+struct V3D33_BRANCH_TO_SUB_LIST {
+ uint32_t opcode;
+ __gen_address_type address;
+};
+
+static inline void
+V3D33_BRANCH_TO_SUB_LIST_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_BRANCH_TO_SUB_LIST * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 1] = __gen_address_offset(&values->address);
+
+ cl[ 2] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 3] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 4] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_BRANCH_TO_SUB_LIST_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_BRANCH_TO_SUB_LIST_unpack(const uint8_t * restrict cl,
+ struct V3D33_BRANCH_TO_SUB_LIST * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_RETURN_FROM_SUB_LIST_opcode 18
+#define V3D33_RETURN_FROM_SUB_LIST_header \
+ .opcode = 18
+
+struct V3D33_RETURN_FROM_SUB_LIST {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_RETURN_FROM_SUB_LIST_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_RETURN_FROM_SUB_LIST * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_RETURN_FROM_SUB_LIST_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_RETURN_FROM_SUB_LIST_unpack(const uint8_t * restrict cl,
+ struct V3D33_RETURN_FROM_SUB_LIST * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_FLUSH_VCD_CACHE_opcode 19
+#define V3D33_FLUSH_VCD_CACHE_header \
+ .opcode = 19
+
+struct V3D33_FLUSH_VCD_CACHE {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_FLUSH_VCD_CACHE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_FLUSH_VCD_CACHE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_FLUSH_VCD_CACHE_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_FLUSH_VCD_CACHE_unpack(const uint8_t * restrict cl,
+ struct V3D33_FLUSH_VCD_CACHE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_START_ADDRESS_OF_GENERIC_TILE_LIST_opcode 20
+#define V3D33_START_ADDRESS_OF_GENERIC_TILE_LIST_header\
+ .opcode = 20
+
+struct V3D33_START_ADDRESS_OF_GENERIC_TILE_LIST {
+ uint32_t opcode;
+ __gen_address_type start;
+ __gen_address_type end;
+};
+
+static inline void
+V3D33_START_ADDRESS_OF_GENERIC_TILE_LIST_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_START_ADDRESS_OF_GENERIC_TILE_LIST * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ __gen_emit_reloc(data, &values->start);
+ cl[ 1] = __gen_address_offset(&values->start);
+
+ cl[ 2] = __gen_address_offset(&values->start) >> 8;
+
+ cl[ 3] = __gen_address_offset(&values->start) >> 16;
+
+ cl[ 4] = __gen_address_offset(&values->start) >> 24;
+
+ __gen_emit_reloc(data, &values->end);
+ cl[ 5] = __gen_address_offset(&values->end);
+
+ cl[ 6] = __gen_address_offset(&values->end) >> 8;
+
+ cl[ 7] = __gen_address_offset(&values->end) >> 16;
+
+ cl[ 8] = __gen_address_offset(&values->end) >> 24;
+
+}
+
+#define V3D33_START_ADDRESS_OF_GENERIC_TILE_LIST_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_START_ADDRESS_OF_GENERIC_TILE_LIST_unpack(const uint8_t * restrict cl,
+ struct V3D33_START_ADDRESS_OF_GENERIC_TILE_LIST * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->start = __gen_unpack_address(cl, 8, 39);
+ values->end = __gen_unpack_address(cl, 40, 71);
+}
+#endif
+
+
+#define V3D33_BRANCH_TO_IMPLICIT_TILE_LIST_opcode 21
+#define V3D33_BRANCH_TO_IMPLICIT_TILE_LIST_header\
+ .opcode = 21
+
+struct V3D33_BRANCH_TO_IMPLICIT_TILE_LIST {
+ uint32_t opcode;
+ uint32_t tile_list_set_number;
+};
+
+static inline void
+V3D33_BRANCH_TO_IMPLICIT_TILE_LIST_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_BRANCH_TO_IMPLICIT_TILE_LIST * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->tile_list_set_number, 0, 7);
+
+}
+
+#define V3D33_BRANCH_TO_IMPLICIT_TILE_LIST_length 2
+#ifdef __gen_unpack_address
+static inline void
+V3D33_BRANCH_TO_IMPLICIT_TILE_LIST_unpack(const uint8_t * restrict cl,
+ struct V3D33_BRANCH_TO_IMPLICIT_TILE_LIST * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->tile_list_set_number = __gen_unpack_uint(cl, 8, 15);
+}
+#endif
+
+
+#define V3D33_BRANCH_TO_EXPLICIT_SUPERTILE_opcode 22
+#define V3D33_BRANCH_TO_EXPLICIT_SUPERTILE_header\
+ .opcode = 22
+
+struct V3D33_BRANCH_TO_EXPLICIT_SUPERTILE {
+ uint32_t opcode;
+ __gen_address_type absolute_address_of_explicit_supertile_render_list;
+ uint32_t explicit_supertile_number;
+ uint32_t row_number;
+ uint32_t column_number;
+};
+
+static inline void
+V3D33_BRANCH_TO_EXPLICIT_SUPERTILE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_BRANCH_TO_EXPLICIT_SUPERTILE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->column_number, 0, 7);
+
+ cl[ 2] = __gen_uint(values->row_number, 0, 7);
+
+ cl[ 3] = __gen_uint(values->explicit_supertile_number, 0, 7);
+
+ __gen_emit_reloc(data, &values->absolute_address_of_explicit_supertile_render_list);
+ cl[ 4] = __gen_address_offset(&values->absolute_address_of_explicit_supertile_render_list);
+
+ cl[ 5] = __gen_address_offset(&values->absolute_address_of_explicit_supertile_render_list) >> 8;
+
+ cl[ 6] = __gen_address_offset(&values->absolute_address_of_explicit_supertile_render_list) >> 16;
+
+ cl[ 7] = __gen_address_offset(&values->absolute_address_of_explicit_supertile_render_list) >> 24;
+
+}
+
+#define V3D33_BRANCH_TO_EXPLICIT_SUPERTILE_length 8
+#ifdef __gen_unpack_address
+static inline void
+V3D33_BRANCH_TO_EXPLICIT_SUPERTILE_unpack(const uint8_t * restrict cl,
+ struct V3D33_BRANCH_TO_EXPLICIT_SUPERTILE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->absolute_address_of_explicit_supertile_render_list = __gen_unpack_address(cl, 32, 63);
+ values->explicit_supertile_number = __gen_unpack_uint(cl, 24, 31);
+ values->row_number = __gen_unpack_uint(cl, 16, 23);
+ values->column_number = __gen_unpack_uint(cl, 8, 15);
+}
+#endif
+
+
+#define V3D33_SUPERTILE_COORDINATES_opcode 23
+#define V3D33_SUPERTILE_COORDINATES_header \
+ .opcode = 23
+
+struct V3D33_SUPERTILE_COORDINATES {
+ uint32_t opcode;
+ uint32_t row_number_in_supertiles;
+ uint32_t column_number_in_supertiles;
+};
+
+static inline void
+V3D33_SUPERTILE_COORDINATES_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_SUPERTILE_COORDINATES * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->column_number_in_supertiles, 0, 7);
+
+ cl[ 2] = __gen_uint(values->row_number_in_supertiles, 0, 7);
+
+}
+
+#define V3D33_SUPERTILE_COORDINATES_length 3
+#ifdef __gen_unpack_address
+static inline void
+V3D33_SUPERTILE_COORDINATES_unpack(const uint8_t * restrict cl,
+ struct V3D33_SUPERTILE_COORDINATES * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->row_number_in_supertiles = __gen_unpack_uint(cl, 16, 23);
+ values->column_number_in_supertiles = __gen_unpack_uint(cl, 8, 15);
+}
+#endif
+
+
+#define V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_opcode 24
+#define V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_header\
+ .opcode = 24
+
+struct V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_unpack(const uint8_t * restrict cl,
+ struct V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED_opcode 25
+#define V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED_header\
+ .opcode = 25
+
+struct V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED {
+ uint32_t opcode;
+ uint32_t disable_color_buffer_write;
+ bool enable_z_write;
+ bool enable_stencil_write;
+ bool disable_colour_buffers_clear_on_write;
+ bool disable_stencil_buffer_clear_on_write;
+ bool disable_z_buffer_clear_on_write;
+ bool disable_fast_opportunistic_write_out_in_multisample_mode;
+ bool last_tile_of_frame;
+};
+
+static inline void
+V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->enable_z_write, 7, 7) |
+ __gen_uint(values->enable_stencil_write, 6, 6) |
+ __gen_uint(values->disable_colour_buffers_clear_on_write, 4, 4) |
+ __gen_uint(values->disable_stencil_buffer_clear_on_write, 3, 3) |
+ __gen_uint(values->disable_z_buffer_clear_on_write, 2, 2) |
+ __gen_uint(values->disable_fast_opportunistic_write_out_in_multisample_mode, 1, 1) |
+ __gen_uint(values->last_tile_of_frame, 0, 0);
+
+ cl[ 2] = __gen_uint(values->disable_color_buffer_write, 0, 7);
+
+}
+
+#define V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED_length 3
+#ifdef __gen_unpack_address
+static inline void
+V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED_unpack(const uint8_t * restrict cl,
+ struct V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->disable_color_buffer_write = __gen_unpack_uint(cl, 16, 23);
+ values->enable_z_write = __gen_unpack_uint(cl, 15, 15);
+ values->enable_stencil_write = __gen_unpack_uint(cl, 14, 14);
+ values->disable_colour_buffers_clear_on_write = __gen_unpack_uint(cl, 12, 12);
+ values->disable_stencil_buffer_clear_on_write = __gen_unpack_uint(cl, 11, 11);
+ values->disable_z_buffer_clear_on_write = __gen_unpack_uint(cl, 10, 10);
+ values->disable_fast_opportunistic_write_out_in_multisample_mode = __gen_unpack_uint(cl, 9, 9);
+ values->last_tile_of_frame = __gen_unpack_uint(cl, 8, 8);
+}
+#endif
+
+
+#define V3D33_RELOAD_TILE_COLOUR_BUFFER_opcode 26
+#define V3D33_RELOAD_TILE_COLOUR_BUFFER_header \
+ .opcode = 26
+
+struct V3D33_RELOAD_TILE_COLOUR_BUFFER {
+ uint32_t opcode;
+ uint32_t disable_colour_buffer_load;
+ bool enable_z_load;
+ bool enable_stencil_load;
+};
+
+static inline void
+V3D33_RELOAD_TILE_COLOUR_BUFFER_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_RELOAD_TILE_COLOUR_BUFFER * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->enable_z_load, 7, 7) |
+ __gen_uint(values->enable_stencil_load, 6, 6);
+
+ cl[ 2] = __gen_uint(values->disable_colour_buffer_load, 0, 7);
+
+}
+
+#define V3D33_RELOAD_TILE_COLOUR_BUFFER_length 3
+#ifdef __gen_unpack_address
+static inline void
+V3D33_RELOAD_TILE_COLOUR_BUFFER_unpack(const uint8_t * restrict cl,
+ struct V3D33_RELOAD_TILE_COLOUR_BUFFER * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->disable_colour_buffer_load = __gen_unpack_uint(cl, 16, 23);
+ values->enable_z_load = __gen_unpack_uint(cl, 15, 15);
+ values->enable_stencil_load = __gen_unpack_uint(cl, 14, 14);
+}
+#endif
+
+
+#define V3D33_END_OF_TILE_MARKER_opcode 27
+#define V3D33_END_OF_TILE_MARKER_header \
+ .opcode = 27
+
+struct V3D33_END_OF_TILE_MARKER {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_END_OF_TILE_MARKER_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_END_OF_TILE_MARKER * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_END_OF_TILE_MARKER_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_END_OF_TILE_MARKER_unpack(const uint8_t * restrict cl,
+ struct V3D33_END_OF_TILE_MARKER * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_STORE_TILE_BUFFER_GENERAL_opcode 29
+#define V3D33_STORE_TILE_BUFFER_GENERAL_header \
+ .opcode = 29
+
+struct V3D33_STORE_TILE_BUFFER_GENERAL {
+ uint32_t opcode;
+ __gen_address_type address;
+ uint32_t padded_height_of_output_image_in_uif_blocks;
+ bool xor_uif;
+ bool last_tile_of_frame;
+ bool disable_colour_buffers_clear_on_write;
+ bool disable_stencil_buffer_clear_on_write;
+ bool disable_z_buffer_clear_on_write;
+ bool raw_mode;
+ uint32_t buffer_to_store;
+#define RENDER_TARGET_0 0
+#define RENDER_TARGET_1 1
+#define RENDER_TARGET_2 2
+#define RENDER_TARGET_3 3
+#define NONE 8
+#define Z 9
+#define STENCIL 10
+#define ZSTENCIL 11
+};
+
+static inline void
+V3D33_STORE_TILE_BUFFER_GENERAL_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_STORE_TILE_BUFFER_GENERAL * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->disable_colour_buffers_clear_on_write, 7, 7) |
+ __gen_uint(values->disable_stencil_buffer_clear_on_write, 6, 6) |
+ __gen_uint(values->disable_z_buffer_clear_on_write, 5, 5) |
+ __gen_uint(values->raw_mode, 4, 4) |
+ __gen_uint(values->buffer_to_store, 0, 3);
+
+ cl[ 2] = __gen_uint(values->padded_height_of_output_image_in_uif_blocks, 3, 15) |
+ __gen_uint(values->xor_uif, 2, 2) |
+ __gen_uint(values->last_tile_of_frame, 0, 0);
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 3] = __gen_address_offset(&values->address) |
+ __gen_uint(values->padded_height_of_output_image_in_uif_blocks, 3, 15) >> 8;
+
+ cl[ 4] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 5] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 6] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_STORE_TILE_BUFFER_GENERAL_length 7
+#ifdef __gen_unpack_address
+static inline void
+V3D33_STORE_TILE_BUFFER_GENERAL_unpack(const uint8_t * restrict cl,
+ struct V3D33_STORE_TILE_BUFFER_GENERAL * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 24, 55);
+ values->padded_height_of_output_image_in_uif_blocks = __gen_unpack_uint(cl, 19, 31);
+ values->xor_uif = __gen_unpack_uint(cl, 18, 18);
+ values->last_tile_of_frame = __gen_unpack_uint(cl, 16, 16);
+ values->disable_colour_buffers_clear_on_write = __gen_unpack_uint(cl, 15, 15);
+ values->disable_stencil_buffer_clear_on_write = __gen_unpack_uint(cl, 14, 14);
+ values->disable_z_buffer_clear_on_write = __gen_unpack_uint(cl, 13, 13);
+ values->raw_mode = __gen_unpack_uint(cl, 12, 12);
+ values->buffer_to_store = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_LOAD_TILE_BUFFER_GENERAL_opcode 30
+#define V3D33_LOAD_TILE_BUFFER_GENERAL_header \
+ .opcode = 30
+
+struct V3D33_LOAD_TILE_BUFFER_GENERAL {
+ uint32_t opcode;
+ __gen_address_type address;
+ uint32_t padded_height_of_output_image_in_uif_blocks;
+ bool xor_uif;
+ bool raw_mode;
+ uint32_t buffer_to_load;
+#define RENDER_TARGET_0 0
+#define RENDER_TARGET_1 1
+#define RENDER_TARGET_2 2
+#define RENDER_TARGET_3 3
+#define NONE 8
+#define Z 9
+#define STENCIL 10
+#define ZSTENCIL 11
+};
+
+static inline void
+V3D33_LOAD_TILE_BUFFER_GENERAL_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_LOAD_TILE_BUFFER_GENERAL * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->raw_mode, 4, 4) |
+ __gen_uint(values->buffer_to_load, 0, 3);
+
+ cl[ 2] = __gen_uint(values->padded_height_of_output_image_in_uif_blocks, 3, 15) |
+ __gen_uint(values->xor_uif, 2, 2);
+
+ cl[ 3] = __gen_uint(values->padded_height_of_output_image_in_uif_blocks, 3, 15) >> 8;
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 4] = __gen_address_offset(&values->address);
+
+ cl[ 5] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 6] = __gen_address_offset(&values->address) >> 16;
+
+}
+
+#define V3D33_LOAD_TILE_BUFFER_GENERAL_length 7
+#ifdef __gen_unpack_address
+static inline void
+V3D33_LOAD_TILE_BUFFER_GENERAL_unpack(const uint8_t * restrict cl,
+ struct V3D33_LOAD_TILE_BUFFER_GENERAL * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 32, 55);
+ values->padded_height_of_output_image_in_uif_blocks = __gen_unpack_uint(cl, 19, 31);
+ values->xor_uif = __gen_unpack_uint(cl, 18, 18);
+ values->raw_mode = __gen_unpack_uint(cl, 12, 12);
+ values->buffer_to_load = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_INDEXED_PRIMITIVE_LIST_opcode 32
+#define V3D33_INDEXED_PRIMITIVE_LIST_header \
+ .opcode = 32
+
+struct V3D33_INDEXED_PRIMITIVE_LIST {
+ uint32_t opcode;
+ uint32_t minimum_index;
+ bool enable_primitive_restarts;
+ uint32_t maximum_index;
+ __gen_address_type address_of_indices_list;
+ uint32_t length;
+ uint32_t index_type;
+#define INDEX_TYPE_8_BIT 0
+#define INDEX_TYPE_16_BIT 1
+#define INDEX_TYPE_32_BIT 2
+ uint32_t mode;
+};
+
+static inline void
+V3D33_INDEXED_PRIMITIVE_LIST_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_INDEXED_PRIMITIVE_LIST * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->index_type, 6, 7) |
+ __gen_uint(values->mode, 0, 4);
+
+
+ memcpy(&cl[2], &values->length, sizeof(values->length));
+ __gen_emit_reloc(data, &values->address_of_indices_list);
+ cl[ 6] = __gen_address_offset(&values->address_of_indices_list);
+
+ cl[ 7] = __gen_address_offset(&values->address_of_indices_list) >> 8;
+
+ cl[ 8] = __gen_address_offset(&values->address_of_indices_list) >> 16;
+
+ cl[ 9] = __gen_address_offset(&values->address_of_indices_list) >> 24;
+
+ cl[10] = __gen_uint(values->maximum_index, 0, 30);
+
+ cl[11] = __gen_uint(values->maximum_index, 0, 30) >> 8;
+
+ cl[12] = __gen_uint(values->maximum_index, 0, 30) >> 16;
+
+ cl[13] = __gen_uint(values->enable_primitive_restarts, 7, 7) |
+ __gen_uint(values->maximum_index, 0, 30) >> 24;
+
+
+ memcpy(&cl[14], &values->minimum_index, sizeof(values->minimum_index));
+}
+
+#define V3D33_INDEXED_PRIMITIVE_LIST_length 18
+#ifdef __gen_unpack_address
+static inline void
+V3D33_INDEXED_PRIMITIVE_LIST_unpack(const uint8_t * restrict cl,
+ struct V3D33_INDEXED_PRIMITIVE_LIST * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->minimum_index = __gen_unpack_uint(cl, 112, 143);
+ values->enable_primitive_restarts = __gen_unpack_uint(cl, 111, 111);
+ values->maximum_index = __gen_unpack_uint(cl, 80, 110);
+ values->address_of_indices_list = __gen_unpack_address(cl, 48, 79);
+ values->length = __gen_unpack_uint(cl, 16, 47);
+ values->index_type = __gen_unpack_uint(cl, 14, 15);
+ values->mode = __gen_unpack_uint(cl, 8, 12);
+}
+#endif
+
+
+#define V3D33_INDEXED_INSTANCED_PRIMITIVE_LIST_opcode 34
+#define V3D33_INDEXED_INSTANCED_PRIMITIVE_LIST_header\
+ .opcode = 34
+
+struct V3D33_INDEXED_INSTANCED_PRIMITIVE_LIST {
+ uint32_t opcode;
+ bool enable_primitive_restarts;
+ uint32_t maximum_index;
+ __gen_address_type address_of_indices_list;
+ uint32_t number_of_instances;
+ uint32_t instance_length;
+ uint32_t index_type;
+#define INDEX_TYPE_8_BIT 0
+#define INDEX_TYPE_16_BIT 1
+#define INDEX_TYPE_32_BIT 2
+ enum V3D33_Primitive mode;
+};
+
+static inline void
+V3D33_INDEXED_INSTANCED_PRIMITIVE_LIST_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_INDEXED_INSTANCED_PRIMITIVE_LIST * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->index_type, 6, 7) |
+ __gen_uint(values->mode, 0, 4);
+
+
+ memcpy(&cl[2], &values->instance_length, sizeof(values->instance_length));
+
+ memcpy(&cl[6], &values->number_of_instances, sizeof(values->number_of_instances));
+ __gen_emit_reloc(data, &values->address_of_indices_list);
+ cl[10] = __gen_address_offset(&values->address_of_indices_list);
+
+ cl[11] = __gen_address_offset(&values->address_of_indices_list) >> 8;
+
+ cl[12] = __gen_address_offset(&values->address_of_indices_list) >> 16;
+
+ cl[13] = __gen_address_offset(&values->address_of_indices_list) >> 24;
+
+ cl[14] = __gen_uint(values->maximum_index, 0, 30);
+
+ cl[15] = __gen_uint(values->maximum_index, 0, 30) >> 8;
+
+ cl[16] = __gen_uint(values->maximum_index, 0, 30) >> 16;
+
+ cl[17] = __gen_uint(values->enable_primitive_restarts, 7, 7) |
+ __gen_uint(values->maximum_index, 0, 30) >> 24;
+
+}
+
+#define V3D33_INDEXED_INSTANCED_PRIMITIVE_LIST_length 18
+#ifdef __gen_unpack_address
+static inline void
+V3D33_INDEXED_INSTANCED_PRIMITIVE_LIST_unpack(const uint8_t * restrict cl,
+ struct V3D33_INDEXED_INSTANCED_PRIMITIVE_LIST * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->enable_primitive_restarts = __gen_unpack_uint(cl, 143, 143);
+ values->maximum_index = __gen_unpack_uint(cl, 112, 142);
+ values->address_of_indices_list = __gen_unpack_address(cl, 80, 111);
+ values->number_of_instances = __gen_unpack_uint(cl, 48, 79);
+ values->instance_length = __gen_unpack_uint(cl, 16, 47);
+ values->index_type = __gen_unpack_uint(cl, 14, 15);
+ values->mode = __gen_unpack_uint(cl, 8, 12);
+}
+#endif
+
+
+#define V3D33_VERTEX_ARRAY_PRIMITIVES_opcode 36
+#define V3D33_VERTEX_ARRAY_PRIMITIVES_header \
+ .opcode = 36
+
+struct V3D33_VERTEX_ARRAY_PRIMITIVES {
+ uint32_t opcode;
+ uint32_t index_of_first_vertex;
+ uint32_t length;
+ enum V3D33_Primitive mode;
+};
+
+static inline void
+V3D33_VERTEX_ARRAY_PRIMITIVES_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_VERTEX_ARRAY_PRIMITIVES * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->mode, 0, 7);
+
+
+ memcpy(&cl[2], &values->length, sizeof(values->length));
+
+ memcpy(&cl[6], &values->index_of_first_vertex, sizeof(values->index_of_first_vertex));
+}
+
+#define V3D33_VERTEX_ARRAY_PRIMITIVES_length 10
+#ifdef __gen_unpack_address
+static inline void
+V3D33_VERTEX_ARRAY_PRIMITIVES_unpack(const uint8_t * restrict cl,
+ struct V3D33_VERTEX_ARRAY_PRIMITIVES * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->index_of_first_vertex = __gen_unpack_uint(cl, 48, 79);
+ values->length = __gen_unpack_uint(cl, 16, 47);
+ values->mode = __gen_unpack_uint(cl, 8, 15);
+}
+#endif
+
+
+#define V3D33_VERTEX_ARRAY_INSTANCED_PRIMITIVES_opcode 38
+#define V3D33_VERTEX_ARRAY_INSTANCED_PRIMITIVES_header\
+ .opcode = 38
+
+struct V3D33_VERTEX_ARRAY_INSTANCED_PRIMITIVES {
+ uint32_t opcode;
+ uint32_t index_of_first_vertex;
+ uint32_t number_of_instances;
+ uint32_t instance_length;
+ enum V3D33_Primitive mode;
+};
+
+static inline void
+V3D33_VERTEX_ARRAY_INSTANCED_PRIMITIVES_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_VERTEX_ARRAY_INSTANCED_PRIMITIVES * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->mode, 0, 7);
+
+
+ memcpy(&cl[2], &values->instance_length, sizeof(values->instance_length));
+
+ memcpy(&cl[6], &values->number_of_instances, sizeof(values->number_of_instances));
+
+ memcpy(&cl[10], &values->index_of_first_vertex, sizeof(values->index_of_first_vertex));
+}
+
+#define V3D33_VERTEX_ARRAY_INSTANCED_PRIMITIVES_length 14
+#ifdef __gen_unpack_address
+static inline void
+V3D33_VERTEX_ARRAY_INSTANCED_PRIMITIVES_unpack(const uint8_t * restrict cl,
+ struct V3D33_VERTEX_ARRAY_INSTANCED_PRIMITIVES * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->index_of_first_vertex = __gen_unpack_uint(cl, 80, 111);
+ values->number_of_instances = __gen_unpack_uint(cl, 48, 79);
+ values->instance_length = __gen_unpack_uint(cl, 16, 47);
+ values->mode = __gen_unpack_uint(cl, 8, 15);
+}
+#endif
+
+
+#define V3D33_BASE_VERTEX_BASE_INSTANCE_opcode 43
+#define V3D33_BASE_VERTEX_BASE_INSTANCE_header \
+ .opcode = 43
+
+struct V3D33_BASE_VERTEX_BASE_INSTANCE {
+ uint32_t opcode;
+ uint32_t base_instance;
+ uint32_t base_vertex;
+};
+
+static inline void
+V3D33_BASE_VERTEX_BASE_INSTANCE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_BASE_VERTEX_BASE_INSTANCE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+
+ memcpy(&cl[1], &values->base_vertex, sizeof(values->base_vertex));
+
+ memcpy(&cl[5], &values->base_instance, sizeof(values->base_instance));
+}
+
+#define V3D33_BASE_VERTEX_BASE_INSTANCE_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_BASE_VERTEX_BASE_INSTANCE_unpack(const uint8_t * restrict cl,
+ struct V3D33_BASE_VERTEX_BASE_INSTANCE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->base_instance = __gen_unpack_uint(cl, 40, 71);
+ values->base_vertex = __gen_unpack_uint(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_PRIMITIVE_LIST_FORMAT_opcode 56
+#define V3D33_PRIMITIVE_LIST_FORMAT_header \
+ .opcode = 56
+
+struct V3D33_PRIMITIVE_LIST_FORMAT {
+ uint32_t opcode;
+ uint32_t data_type;
+#define LIST_INDEXED 0
+#define LIST_32_BIT_X_Y 1
+ uint32_t primitive_type;
+#define LIST_POINTS 0
+#define LIST_LINES 1
+#define LIST_TRIANGLES 2
+};
+
+static inline void
+V3D33_PRIMITIVE_LIST_FORMAT_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_PRIMITIVE_LIST_FORMAT * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->data_type, 6, 6) |
+ __gen_uint(values->primitive_type, 0, 5);
+
+}
+
+#define V3D33_PRIMITIVE_LIST_FORMAT_length 2
+#ifdef __gen_unpack_address
+static inline void
+V3D33_PRIMITIVE_LIST_FORMAT_unpack(const uint8_t * restrict cl,
+ struct V3D33_PRIMITIVE_LIST_FORMAT * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->data_type = __gen_unpack_uint(cl, 14, 14);
+ values->primitive_type = __gen_unpack_uint(cl, 8, 13);
+}
+#endif
+
+
+#define V3D33_GL_SHADER_STATE_opcode 64
+#define V3D33_GL_SHADER_STATE_header \
+ .opcode = 64
+
+struct V3D33_GL_SHADER_STATE {
+ uint32_t opcode;
+ __gen_address_type address;
+ uint32_t number_of_attribute_arrays;
+};
+
+static inline void
+V3D33_GL_SHADER_STATE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_GL_SHADER_STATE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 1] = __gen_address_offset(&values->address) |
+ __gen_uint(values->number_of_attribute_arrays, 0, 4);
+
+ cl[ 2] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 3] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 4] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_GL_SHADER_STATE_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_GL_SHADER_STATE_unpack(const uint8_t * restrict cl,
+ struct V3D33_GL_SHADER_STATE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 13, 39);
+ values->number_of_attribute_arrays = __gen_unpack_uint(cl, 8, 12);
+}
+#endif
+
+
+#define V3D33_TRANSFORM_FEEDBACK_ENABLE_opcode 74
+#define V3D33_TRANSFORM_FEEDBACK_ENABLE_header \
+ .opcode = 74
+
+struct V3D33_TRANSFORM_FEEDBACK_ENABLE {
+ uint32_t opcode;
+ uint32_t number_of_32_bit_output_buffer_address_following;
+ uint32_t number_of_16_bit_output_data_specs_following;
+};
+
+static inline void
+V3D33_TRANSFORM_FEEDBACK_ENABLE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TRANSFORM_FEEDBACK_ENABLE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = 0;
+ cl[ 2] = __gen_uint(values->number_of_32_bit_output_buffer_address_following, 0, 2) |
+ __gen_uint(values->number_of_16_bit_output_data_specs_following, 3, 7);
+
+}
+
+#define V3D33_TRANSFORM_FEEDBACK_ENABLE_length 3
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TRANSFORM_FEEDBACK_ENABLE_unpack(const uint8_t * restrict cl,
+ struct V3D33_TRANSFORM_FEEDBACK_ENABLE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->number_of_32_bit_output_buffer_address_following = __gen_unpack_uint(cl, 16, 18);
+ values->number_of_16_bit_output_data_specs_following = __gen_unpack_uint(cl, 19, 23);
+}
+#endif
+
+
+#define V3D33_FLUSH_TRANSFORM_FEEDBACK_DATA_opcode 75
+#define V3D33_FLUSH_TRANSFORM_FEEDBACK_DATA_header\
+ .opcode = 75
+
+struct V3D33_FLUSH_TRANSFORM_FEEDBACK_DATA {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_FLUSH_TRANSFORM_FEEDBACK_DATA_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_FLUSH_TRANSFORM_FEEDBACK_DATA * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_FLUSH_TRANSFORM_FEEDBACK_DATA_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_FLUSH_TRANSFORM_FEEDBACK_DATA_unpack(const uint8_t * restrict cl,
+ struct V3D33_FLUSH_TRANSFORM_FEEDBACK_DATA * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_header\
+
+
+struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC {
+ uint32_t first_shaded_vertex_value_to_output;
+ uint32_t number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1;
+ uint32_t output_buffer_to_write_to;
+};
+
+static inline void
+V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC * restrict values)
+{
+ cl[ 0] = __gen_uint(values->first_shaded_vertex_value_to_output, 0, 7);
+
+ cl[ 1] = __gen_uint(values->number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1, 0, 3) |
+ __gen_uint(values->output_buffer_to_write_to, 4, 5);
+
+}
+
+#define V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_length 2
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_unpack(const uint8_t * restrict cl,
+ struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC * restrict values)
+{
+ values->first_shaded_vertex_value_to_output = __gen_unpack_uint(cl, 0, 7);
+ values->number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = __gen_unpack_uint(cl, 8, 11);
+ values->output_buffer_to_write_to = __gen_unpack_uint(cl, 12, 13);
+}
+#endif
+
+
+#define V3D33_TRANSFORM_FEEDBACK_OUTPUT_ADDRESS_header\
+
+
+struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_ADDRESS {
+ __gen_address_type address;
+};
+
+static inline void
+V3D33_TRANSFORM_FEEDBACK_OUTPUT_ADDRESS_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_ADDRESS * restrict values)
+{
+ __gen_emit_reloc(data, &values->address);
+ cl[ 0] = __gen_address_offset(&values->address);
+
+ cl[ 1] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 2] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 3] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_TRANSFORM_FEEDBACK_OUTPUT_ADDRESS_length 4
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TRANSFORM_FEEDBACK_OUTPUT_ADDRESS_unpack(const uint8_t * restrict cl,
+ struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_ADDRESS * restrict values)
+{
+ values->address = __gen_unpack_address(cl, 0, 31);
+}
+#endif
+
+
+#define V3D33_STENCIL_CONFIG_opcode 80
+#define V3D33_STENCIL_CONFIG_header \
+ .opcode = 80
+
+struct V3D33_STENCIL_CONFIG {
+ uint32_t opcode;
+ uint32_t stencil_write_mask;
+ bool back_config;
+ bool front_config;
+ enum V3D33_Stencil_Op stencil_pass_op;
+ enum V3D33_Stencil_Op depth_test_fail_op;
+ enum V3D33_Stencil_Op stencil_test_fail_op;
+ enum V3D33_Compare_Function stencil_test_function;
+ uint32_t stencil_test_mask;
+ uint32_t stencil_ref_value;
+};
+
+static inline void
+V3D33_STENCIL_CONFIG_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_STENCIL_CONFIG * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->stencil_ref_value, 0, 7);
+
+ cl[ 2] = __gen_uint(values->stencil_test_mask, 0, 7);
+
+ cl[ 3] = __gen_uint(values->depth_test_fail_op, 6, 8) |
+ __gen_uint(values->stencil_test_fail_op, 3, 5) |
+ __gen_uint(values->stencil_test_function, 0, 2);
+
+ cl[ 4] = __gen_uint(values->back_config, 5, 5) |
+ __gen_uint(values->front_config, 4, 4) |
+ __gen_uint(values->stencil_pass_op, 1, 3) |
+ __gen_uint(values->depth_test_fail_op, 6, 8) >> 8;
+
+ cl[ 5] = __gen_uint(values->stencil_write_mask, 0, 7);
+
+}
+
+#define V3D33_STENCIL_CONFIG_length 6
+#ifdef __gen_unpack_address
+static inline void
+V3D33_STENCIL_CONFIG_unpack(const uint8_t * restrict cl,
+ struct V3D33_STENCIL_CONFIG * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->stencil_write_mask = __gen_unpack_uint(cl, 40, 47);
+ values->back_config = __gen_unpack_uint(cl, 37, 37);
+ values->front_config = __gen_unpack_uint(cl, 36, 36);
+ values->stencil_pass_op = __gen_unpack_uint(cl, 33, 35);
+ values->depth_test_fail_op = __gen_unpack_uint(cl, 30, 32);
+ values->stencil_test_fail_op = __gen_unpack_uint(cl, 27, 29);
+ values->stencil_test_function = __gen_unpack_uint(cl, 24, 26);
+ values->stencil_test_mask = __gen_unpack_uint(cl, 16, 23);
+ values->stencil_ref_value = __gen_unpack_uint(cl, 8, 15);
+}
+#endif
+
+
+#define V3D33_BLEND_CONFIG_opcode 84
+#define V3D33_BLEND_CONFIG_header \
+ .opcode = 84
+
+struct V3D33_BLEND_CONFIG {
+ uint32_t opcode;
+ uint32_t vg_coverage_modes;
+ enum V3D33_Blend_Factor colour_blend_dst_factor;
+ enum V3D33_Blend_Factor colour_blend_src_factor;
+ enum V3D33_Blend_Mode colour_blend_mode;
+ enum V3D33_Blend_Factor alpha_blend_dst_factor;
+ enum V3D33_Blend_Factor alpha_blend_src_factor;
+ enum V3D33_Blend_Mode alpha_blend_mode;
+};
+
+static inline void
+V3D33_BLEND_CONFIG_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_BLEND_CONFIG * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->alpha_blend_src_factor, 4, 7) |
+ __gen_uint(values->alpha_blend_mode, 0, 3);
+
+ cl[ 2] = __gen_uint(values->colour_blend_mode, 4, 7) |
+ __gen_uint(values->alpha_blend_dst_factor, 0, 3);
+
+ cl[ 3] = __gen_uint(values->colour_blend_dst_factor, 4, 7) |
+ __gen_uint(values->colour_blend_src_factor, 0, 3);
+
+ cl[ 4] = __gen_uint(values->vg_coverage_modes, 4, 5);
+
+}
+
+#define V3D33_BLEND_CONFIG_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_BLEND_CONFIG_unpack(const uint8_t * restrict cl,
+ struct V3D33_BLEND_CONFIG * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->vg_coverage_modes = __gen_unpack_uint(cl, 36, 37);
+ values->colour_blend_dst_factor = __gen_unpack_uint(cl, 28, 31);
+ values->colour_blend_src_factor = __gen_unpack_uint(cl, 24, 27);
+ values->colour_blend_mode = __gen_unpack_uint(cl, 20, 23);
+ values->alpha_blend_dst_factor = __gen_unpack_uint(cl, 16, 19);
+ values->alpha_blend_src_factor = __gen_unpack_uint(cl, 12, 15);
+ values->alpha_blend_mode = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_BLEND_CONSTANT_COLOUR_opcode 86
+#define V3D33_BLEND_CONSTANT_COLOUR_header \
+ .opcode = 86
+
+struct V3D33_BLEND_CONSTANT_COLOUR {
+ uint32_t opcode;
+ uint32_t alpha_f16;
+ uint32_t blue_f16;
+ uint32_t green_f16;
+ uint32_t red_f16;
+};
+
+static inline void
+V3D33_BLEND_CONSTANT_COLOUR_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_BLEND_CONSTANT_COLOUR * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->red_f16, 0, 15);
+
+ cl[ 2] = __gen_uint(values->red_f16, 0, 15) >> 8;
+
+ cl[ 3] = __gen_uint(values->green_f16, 0, 15);
+
+ cl[ 4] = __gen_uint(values->green_f16, 0, 15) >> 8;
+
+ cl[ 5] = __gen_uint(values->blue_f16, 0, 15);
+
+ cl[ 6] = __gen_uint(values->blue_f16, 0, 15) >> 8;
+
+ cl[ 7] = __gen_uint(values->alpha_f16, 0, 15);
+
+ cl[ 8] = __gen_uint(values->alpha_f16, 0, 15) >> 8;
+
+}
+
+#define V3D33_BLEND_CONSTANT_COLOUR_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_BLEND_CONSTANT_COLOUR_unpack(const uint8_t * restrict cl,
+ struct V3D33_BLEND_CONSTANT_COLOUR * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->alpha_f16 = __gen_unpack_uint(cl, 56, 71);
+ values->blue_f16 = __gen_unpack_uint(cl, 40, 55);
+ values->green_f16 = __gen_unpack_uint(cl, 24, 39);
+ values->red_f16 = __gen_unpack_uint(cl, 8, 23);
+}
+#endif
+
+
+#define V3D33_COLOUR_WRITE_MASKS_opcode 87
+#define V3D33_COLOUR_WRITE_MASKS_header \
+ .opcode = 87
+
+struct V3D33_COLOUR_WRITE_MASKS {
+ uint32_t opcode;
+ uint32_t reserved;
+ uint32_t render_target_3_per_colour_component_write_masks;
+ uint32_t render_target_2_per_colour_component_write_masks;
+ uint32_t render_target_1_per_colour_component_write_masks;
+ uint32_t render_target_0_per_colour_component_write_masks;
+};
+
+static inline void
+V3D33_COLOUR_WRITE_MASKS_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_COLOUR_WRITE_MASKS * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->render_target_1_per_colour_component_write_masks, 4, 7) |
+ __gen_uint(values->render_target_0_per_colour_component_write_masks, 0, 3);
+
+ cl[ 2] = __gen_uint(values->render_target_3_per_colour_component_write_masks, 4, 7) |
+ __gen_uint(values->render_target_2_per_colour_component_write_masks, 0, 3);
+
+ cl[ 3] = __gen_uint(values->reserved, 0, 15);
+
+ cl[ 4] = __gen_uint(values->reserved, 0, 15) >> 8;
+
+}
+
+#define V3D33_COLOUR_WRITE_MASKS_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_COLOUR_WRITE_MASKS_unpack(const uint8_t * restrict cl,
+ struct V3D33_COLOUR_WRITE_MASKS * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->reserved = __gen_unpack_uint(cl, 24, 39);
+ values->render_target_3_per_colour_component_write_masks = __gen_unpack_uint(cl, 20, 23);
+ values->render_target_2_per_colour_component_write_masks = __gen_unpack_uint(cl, 16, 19);
+ values->render_target_1_per_colour_component_write_masks = __gen_unpack_uint(cl, 12, 15);
+ values->render_target_0_per_colour_component_write_masks = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_CONFIGURATION_BITS_opcode 96
+#define V3D33_CONFIGURATION_BITS_header \
+ .opcode = 96
+
+struct V3D33_CONFIGURATION_BITS {
+ uint32_t opcode;
+ bool direct3d_provoking_vertex;
+ bool direct3d_point_fill_mode;
+ bool blend_enable;
+ bool stencil_enable;
+ bool early_z_updates_enable;
+ bool early_z_enable;
+ bool z_updates_enable;
+ enum V3D33_Compare_Function depth_test_function;
+ bool direct3d_wireframe_triangles_mode;
+ uint32_t coverage_update_mode;
+ bool coverage_pipe_select;
+ uint32_t rasterizer_oversample_mode;
+ uint32_t line_rasterization;
+ bool enable_depth_offset;
+ bool clockwise_primitives;
+ bool enable_reverse_facing_primitive;
+ bool enable_forward_facing_primitive;
+};
+
+static inline void
+V3D33_CONFIGURATION_BITS_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_CONFIGURATION_BITS * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->rasterizer_oversample_mode, 6, 7) |
+ __gen_uint(values->line_rasterization, 4, 5) |
+ __gen_uint(values->enable_depth_offset, 3, 3) |
+ __gen_uint(values->clockwise_primitives, 2, 2) |
+ __gen_uint(values->enable_reverse_facing_primitive, 1, 1) |
+ __gen_uint(values->enable_forward_facing_primitive, 0, 0);
+
+ cl[ 2] = __gen_uint(values->z_updates_enable, 7, 7) |
+ __gen_uint(values->depth_test_function, 4, 6) |
+ __gen_uint(values->direct3d_wireframe_triangles_mode, 3, 3) |
+ __gen_uint(values->coverage_update_mode, 1, 2) |
+ __gen_uint(values->coverage_pipe_select, 0, 0);
+
+ cl[ 3] = __gen_uint(values->direct3d_provoking_vertex, 5, 5) |
+ __gen_uint(values->direct3d_point_fill_mode, 4, 4) |
+ __gen_uint(values->blend_enable, 3, 3) |
+ __gen_uint(values->stencil_enable, 2, 2) |
+ __gen_uint(values->early_z_updates_enable, 1, 1) |
+ __gen_uint(values->early_z_enable, 0, 0);
+
+}
+
+#define V3D33_CONFIGURATION_BITS_length 4
+#ifdef __gen_unpack_address
+static inline void
+V3D33_CONFIGURATION_BITS_unpack(const uint8_t * restrict cl,
+ struct V3D33_CONFIGURATION_BITS * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->direct3d_provoking_vertex = __gen_unpack_uint(cl, 29, 29);
+ values->direct3d_point_fill_mode = __gen_unpack_uint(cl, 28, 28);
+ values->blend_enable = __gen_unpack_uint(cl, 27, 27);
+ values->stencil_enable = __gen_unpack_uint(cl, 26, 26);
+ values->early_z_updates_enable = __gen_unpack_uint(cl, 25, 25);
+ values->early_z_enable = __gen_unpack_uint(cl, 24, 24);
+ values->z_updates_enable = __gen_unpack_uint(cl, 23, 23);
+ values->depth_test_function = __gen_unpack_uint(cl, 20, 22);
+ values->direct3d_wireframe_triangles_mode = __gen_unpack_uint(cl, 19, 19);
+ values->coverage_update_mode = __gen_unpack_uint(cl, 17, 18);
+ values->coverage_pipe_select = __gen_unpack_uint(cl, 16, 16);
+ values->rasterizer_oversample_mode = __gen_unpack_uint(cl, 14, 15);
+ values->line_rasterization = __gen_unpack_uint(cl, 12, 13);
+ values->enable_depth_offset = __gen_unpack_uint(cl, 11, 11);
+ values->clockwise_primitives = __gen_unpack_uint(cl, 10, 10);
+ values->enable_reverse_facing_primitive = __gen_unpack_uint(cl, 9, 9);
+ values->enable_forward_facing_primitive = __gen_unpack_uint(cl, 8, 8);
+}
+#endif
+
+
+#define V3D33_ZERO_ALL_FLAT_SHADE_FLAGS_opcode 97
+#define V3D33_ZERO_ALL_FLAT_SHADE_FLAGS_header \
+ .opcode = 97
+
+struct V3D33_ZERO_ALL_FLAT_SHADE_FLAGS {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_ZERO_ALL_FLAT_SHADE_FLAGS_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_ZERO_ALL_FLAT_SHADE_FLAGS * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_ZERO_ALL_FLAT_SHADE_FLAGS_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_ZERO_ALL_FLAT_SHADE_FLAGS_unpack(const uint8_t * restrict cl,
+ struct V3D33_ZERO_ALL_FLAT_SHADE_FLAGS * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_FLAT_SHADE_FLAGS_opcode 98
+#define V3D33_FLAT_SHADE_FLAGS_header \
+ .opcode = 98
+
+struct V3D33_FLAT_SHADE_FLAGS {
+ uint32_t opcode;
+ uint32_t flat_shade_flags_for_varyings_v024;
+ uint32_t action_for_flat_shade_flags_of_higher_numbered_varyings;
+ uint32_t action_for_flat_shade_flags_of_lower_numbered_varyings;
+ uint32_t varying_offset_v0;
+};
+
+static inline void
+V3D33_FLAT_SHADE_FLAGS_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_FLAT_SHADE_FLAGS * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->action_for_flat_shade_flags_of_higher_numbered_varyings, 6, 7) |
+ __gen_uint(values->action_for_flat_shade_flags_of_lower_numbered_varyings, 4, 5) |
+ __gen_uint(values->varying_offset_v0, 0, 3);
+
+ cl[ 2] = __gen_uint(values->flat_shade_flags_for_varyings_v024, 0, 23);
+
+ cl[ 3] = __gen_uint(values->flat_shade_flags_for_varyings_v024, 0, 23) >> 8;
+
+ cl[ 4] = __gen_uint(values->flat_shade_flags_for_varyings_v024, 0, 23) >> 16;
+
+}
+
+#define V3D33_FLAT_SHADE_FLAGS_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_FLAT_SHADE_FLAGS_unpack(const uint8_t * restrict cl,
+ struct V3D33_FLAT_SHADE_FLAGS * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->flat_shade_flags_for_varyings_v024 = __gen_unpack_uint(cl, 16, 39);
+ values->action_for_flat_shade_flags_of_higher_numbered_varyings = __gen_unpack_uint(cl, 14, 15);
+ values->action_for_flat_shade_flags_of_lower_numbered_varyings = __gen_unpack_uint(cl, 12, 13);
+ values->varying_offset_v0 = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_POINT_SIZE_opcode 104
+#define V3D33_POINT_SIZE_header \
+ .opcode = 104
+
+struct V3D33_POINT_SIZE {
+ uint32_t opcode;
+ float point_size;
+};
+
+static inline void
+V3D33_POINT_SIZE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_POINT_SIZE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+
+ memcpy(&cl[1], &values->point_size, sizeof(values->point_size));
+}
+
+#define V3D33_POINT_SIZE_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_POINT_SIZE_unpack(const uint8_t * restrict cl,
+ struct V3D33_POINT_SIZE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->point_size = __gen_unpack_float(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_LINE_WIDTH_opcode 105
+#define V3D33_LINE_WIDTH_header \
+ .opcode = 105
+
+struct V3D33_LINE_WIDTH {
+ uint32_t opcode;
+ float line_width;
+};
+
+static inline void
+V3D33_LINE_WIDTH_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_LINE_WIDTH * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+
+ memcpy(&cl[1], &values->line_width, sizeof(values->line_width));
+}
+
+#define V3D33_LINE_WIDTH_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_LINE_WIDTH_unpack(const uint8_t * restrict cl,
+ struct V3D33_LINE_WIDTH * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->line_width = __gen_unpack_float(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_DEPTH_OFFSET_opcode 106
+#define V3D33_DEPTH_OFFSET_header \
+ .opcode = 106
+
+struct V3D33_DEPTH_OFFSET {
+ uint32_t opcode;
+ uint32_t depth_offset_units;
+ uint32_t depth_offset_factor;
+};
+
+static inline void
+V3D33_DEPTH_OFFSET_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_DEPTH_OFFSET * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->depth_offset_factor, 0, 15);
+
+ cl[ 2] = __gen_uint(values->depth_offset_factor, 0, 15) >> 8;
+
+ cl[ 3] = __gen_uint(values->depth_offset_units, 0, 15);
+
+ cl[ 4] = __gen_uint(values->depth_offset_units, 0, 15) >> 8;
+
+}
+
+#define V3D33_DEPTH_OFFSET_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_DEPTH_OFFSET_unpack(const uint8_t * restrict cl,
+ struct V3D33_DEPTH_OFFSET * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->depth_offset_units = __gen_unpack_uint(cl, 24, 39);
+ values->depth_offset_factor = __gen_unpack_uint(cl, 8, 23);
+}
+#endif
+
+
+#define V3D33_CLIP_WINDOW_opcode 107
+#define V3D33_CLIP_WINDOW_header \
+ .opcode = 107
+
+struct V3D33_CLIP_WINDOW {
+ uint32_t opcode;
+ uint32_t clip_window_height_in_pixels;
+ uint32_t clip_window_width_in_pixels;
+ uint32_t clip_window_bottom_pixel_coordinate;
+ uint32_t clip_window_left_pixel_coordinate;
+};
+
+static inline void
+V3D33_CLIP_WINDOW_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_CLIP_WINDOW * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->clip_window_left_pixel_coordinate, 0, 15);
+
+ cl[ 2] = __gen_uint(values->clip_window_left_pixel_coordinate, 0, 15) >> 8;
+
+ cl[ 3] = __gen_uint(values->clip_window_bottom_pixel_coordinate, 0, 15);
+
+ cl[ 4] = __gen_uint(values->clip_window_bottom_pixel_coordinate, 0, 15) >> 8;
+
+ cl[ 5] = __gen_uint(values->clip_window_width_in_pixels, 0, 15);
+
+ cl[ 6] = __gen_uint(values->clip_window_width_in_pixels, 0, 15) >> 8;
+
+ cl[ 7] = __gen_uint(values->clip_window_height_in_pixels, 0, 15);
+
+ cl[ 8] = __gen_uint(values->clip_window_height_in_pixels, 0, 15) >> 8;
+
+}
+
+#define V3D33_CLIP_WINDOW_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_CLIP_WINDOW_unpack(const uint8_t * restrict cl,
+ struct V3D33_CLIP_WINDOW * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->clip_window_height_in_pixels = __gen_unpack_uint(cl, 56, 71);
+ values->clip_window_width_in_pixels = __gen_unpack_uint(cl, 40, 55);
+ values->clip_window_bottom_pixel_coordinate = __gen_unpack_uint(cl, 24, 39);
+ values->clip_window_left_pixel_coordinate = __gen_unpack_uint(cl, 8, 23);
+}
+#endif
+
+
+#define V3D33_VIEWPORT_OFFSET_opcode 108
+#define V3D33_VIEWPORT_OFFSET_header \
+ .opcode = 108
+
+struct V3D33_VIEWPORT_OFFSET {
+ uint32_t opcode;
+ float viewport_centre_y_coordinate;
+ float viewport_centre_x_coordinate;
+};
+
+static inline void
+V3D33_VIEWPORT_OFFSET_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_VIEWPORT_OFFSET * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_sfixed(values->viewport_centre_x_coordinate, 0, 31, 8);
+
+ cl[ 2] = __gen_sfixed(values->viewport_centre_x_coordinate, 0, 31, 8) >> 8;
+
+ cl[ 3] = __gen_sfixed(values->viewport_centre_x_coordinate, 0, 31, 8) >> 16;
+
+ cl[ 4] = __gen_sfixed(values->viewport_centre_x_coordinate, 0, 31, 8) >> 24;
+
+ cl[ 5] = __gen_sfixed(values->viewport_centre_y_coordinate, 0, 31, 8);
+
+ cl[ 6] = __gen_sfixed(values->viewport_centre_y_coordinate, 0, 31, 8) >> 8;
+
+ cl[ 7] = __gen_sfixed(values->viewport_centre_y_coordinate, 0, 31, 8) >> 16;
+
+ cl[ 8] = __gen_sfixed(values->viewport_centre_y_coordinate, 0, 31, 8) >> 24;
+
+}
+
+#define V3D33_VIEWPORT_OFFSET_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_VIEWPORT_OFFSET_unpack(const uint8_t * restrict cl,
+ struct V3D33_VIEWPORT_OFFSET * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->viewport_centre_y_coordinate = __gen_unpack_sfixed(cl, 40, 71, 8);
+ values->viewport_centre_x_coordinate = __gen_unpack_sfixed(cl, 8, 39, 8);
+}
+#endif
+
+
+#define V3D33_CLIPPER_Z_MIN_MAX_CLIPPING_PLANES_opcode 109
+#define V3D33_CLIPPER_Z_MIN_MAX_CLIPPING_PLANES_header\
+ .opcode = 109
+
+struct V3D33_CLIPPER_Z_MIN_MAX_CLIPPING_PLANES {
+ uint32_t opcode;
+ float maximum_zw;
+ float minimum_zw;
+};
+
+static inline void
+V3D33_CLIPPER_Z_MIN_MAX_CLIPPING_PLANES_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_CLIPPER_Z_MIN_MAX_CLIPPING_PLANES * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+
+ memcpy(&cl[1], &values->minimum_zw, sizeof(values->minimum_zw));
+
+ memcpy(&cl[5], &values->maximum_zw, sizeof(values->maximum_zw));
+}
+
+#define V3D33_CLIPPER_Z_MIN_MAX_CLIPPING_PLANES_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_CLIPPER_Z_MIN_MAX_CLIPPING_PLANES_unpack(const uint8_t * restrict cl,
+ struct V3D33_CLIPPER_Z_MIN_MAX_CLIPPING_PLANES * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->maximum_zw = __gen_unpack_float(cl, 40, 71);
+ values->minimum_zw = __gen_unpack_float(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_CLIPPER_XY_SCALING_opcode 110
+#define V3D33_CLIPPER_XY_SCALING_header \
+ .opcode = 110
+
+struct V3D33_CLIPPER_XY_SCALING {
+ uint32_t opcode;
+ float viewport_half_height_in_1_256th_of_pixel;
+ float viewport_half_width_in_1_256th_of_pixel;
+};
+
+static inline void
+V3D33_CLIPPER_XY_SCALING_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_CLIPPER_XY_SCALING * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+
+ memcpy(&cl[1], &values->viewport_half_width_in_1_256th_of_pixel, sizeof(values->viewport_half_width_in_1_256th_of_pixel));
+
+ memcpy(&cl[5], &values->viewport_half_height_in_1_256th_of_pixel, sizeof(values->viewport_half_height_in_1_256th_of_pixel));
+}
+
+#define V3D33_CLIPPER_XY_SCALING_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_CLIPPER_XY_SCALING_unpack(const uint8_t * restrict cl,
+ struct V3D33_CLIPPER_XY_SCALING * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->viewport_half_height_in_1_256th_of_pixel = __gen_unpack_float(cl, 40, 71);
+ values->viewport_half_width_in_1_256th_of_pixel = __gen_unpack_float(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_CLIPPER_Z_SCALE_AND_OFFSET_opcode 111
+#define V3D33_CLIPPER_Z_SCALE_AND_OFFSET_header \
+ .opcode = 111
+
+struct V3D33_CLIPPER_Z_SCALE_AND_OFFSET {
+ uint32_t opcode;
+ float viewport_z_offset_zc_to_zs;
+ float viewport_z_scale_zc_to_zs;
+};
+
+static inline void
+V3D33_CLIPPER_Z_SCALE_AND_OFFSET_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_CLIPPER_Z_SCALE_AND_OFFSET * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+
+ memcpy(&cl[1], &values->viewport_z_scale_zc_to_zs, sizeof(values->viewport_z_scale_zc_to_zs));
+
+ memcpy(&cl[5], &values->viewport_z_offset_zc_to_zs, sizeof(values->viewport_z_offset_zc_to_zs));
+}
+
+#define V3D33_CLIPPER_Z_SCALE_AND_OFFSET_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_CLIPPER_Z_SCALE_AND_OFFSET_unpack(const uint8_t * restrict cl,
+ struct V3D33_CLIPPER_Z_SCALE_AND_OFFSET * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->viewport_z_offset_zc_to_zs = __gen_unpack_float(cl, 40, 71);
+ values->viewport_z_scale_zc_to_zs = __gen_unpack_float(cl, 8, 39);
+}
+#endif
+
+
+#define V3D33_TILE_BINNING_MODE_CONFIGURATION_PART1_opcode 120
+#define V3D33_TILE_BINNING_MODE_CONFIGURATION_PART1_header\
+ .opcode = 120, \
+ .auto_initialize_tile_state_data_array = 1, \
+ .sub_id = 0
+
+struct V3D33_TILE_BINNING_MODE_CONFIGURATION_PART1 {
+ uint32_t opcode;
+ bool double_buffer_in_non_ms_mode;
+ bool multisample_mode_4x;
+ uint32_t maximum_bpp_of_all_render_targets;
+#define RENDER_TARGET_MAXIMUM_32BPP 0
+#define RENDER_TARGET_MAXIMUM_64BPP 1
+#define RENDER_TARGET_MAXIMUM_128BPP 2
+ uint32_t number_of_render_targets;
+ uint32_t height_in_tiles;
+ uint32_t width_in_tiles;
+ __gen_address_type tile_state_data_array_base_address;
+ uint32_t tile_allocation_block_size;
+#define TILE_ALLOCATION_BLOCK_SIZE_64B 0
+#define TILE_ALLOCATION_BLOCK_SIZE_128B 1
+#define TILE_ALLOCATION_BLOCK_SIZE_256B 2
+ uint32_t tile_allocation_initial_block_size;
+#define TILE_ALLOCATION_INITIAL_BLOCK_SIZE_64B 0
+#define TILE_ALLOCATION_INITIAL_BLOCK_SIZE_128B 1
+#define TILE_ALLOCATION_INITIAL_BLOCK_SIZE_256B 2
+ bool auto_initialize_tile_state_data_array;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_BINNING_MODE_CONFIGURATION_PART1_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_BINNING_MODE_CONFIGURATION_PART1 * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ __gen_emit_reloc(data, &values->tile_state_data_array_base_address);
+ cl[ 1] = __gen_address_offset(&values->tile_state_data_array_base_address) |
+ __gen_uint(values->tile_allocation_block_size, 4, 5) |
+ __gen_uint(values->tile_allocation_initial_block_size, 2, 3) |
+ __gen_uint(values->auto_initialize_tile_state_data_array, 1, 1) |
+ __gen_uint(values->sub_id, 0, 0);
+
+ cl[ 2] = __gen_address_offset(&values->tile_state_data_array_base_address) >> 8;
+
+ cl[ 3] = __gen_address_offset(&values->tile_state_data_array_base_address) >> 16;
+
+ cl[ 4] = __gen_address_offset(&values->tile_state_data_array_base_address) >> 24;
+
+ cl[ 5] = __gen_uint(values->width_in_tiles, 0, 11);
+
+ cl[ 6] = __gen_uint(values->height_in_tiles, 4, 15) |
+ __gen_uint(values->width_in_tiles, 0, 11) >> 8;
+
+ cl[ 7] = __gen_uint(values->height_in_tiles, 4, 15) >> 8;
+
+ cl[ 8] = __gen_uint(values->double_buffer_in_non_ms_mode, 7, 7) |
+ __gen_uint(values->multisample_mode_4x, 6, 6) |
+ __gen_uint(values->maximum_bpp_of_all_render_targets, 4, 5) |
+ __gen_uint(values->number_of_render_targets, 0, 3);
+
+}
+
+#define V3D33_TILE_BINNING_MODE_CONFIGURATION_PART1_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_BINNING_MODE_CONFIGURATION_PART1_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_BINNING_MODE_CONFIGURATION_PART1 * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->double_buffer_in_non_ms_mode = __gen_unpack_uint(cl, 71, 71);
+ values->multisample_mode_4x = __gen_unpack_uint(cl, 70, 70);
+ values->maximum_bpp_of_all_render_targets = __gen_unpack_uint(cl, 68, 69);
+ values->number_of_render_targets = __gen_unpack_uint(cl, 64, 67);
+ values->height_in_tiles = __gen_unpack_uint(cl, 52, 63);
+ values->width_in_tiles = __gen_unpack_uint(cl, 40, 51);
+ values->tile_state_data_array_base_address = __gen_unpack_address(cl, 8, 39);
+ values->tile_allocation_block_size = __gen_unpack_uint(cl, 12, 13);
+ values->tile_allocation_initial_block_size = __gen_unpack_uint(cl, 10, 11);
+ values->auto_initialize_tile_state_data_array = __gen_unpack_uint(cl, 9, 9);
+ values->sub_id = __gen_unpack_uint(cl, 8, 8);
+}
+#endif
+
+
+#define V3D33_TILE_BINNING_MODE_CONFIGURATION_PART2_opcode 120
+#define V3D33_TILE_BINNING_MODE_CONFIGURATION_PART2_header\
+ .opcode = 120, \
+ .sub_id = 1
+
+struct V3D33_TILE_BINNING_MODE_CONFIGURATION_PART2 {
+ uint32_t opcode;
+ __gen_address_type tile_allocation_memory_address;
+ uint32_t tile_allocation_memory_size;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_BINNING_MODE_CONFIGURATION_PART2_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_BINNING_MODE_CONFIGURATION_PART2 * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->tile_allocation_memory_size, 0, 31) |
+ __gen_uint(values->sub_id, 0, 0);
+
+ cl[ 2] = __gen_uint(values->tile_allocation_memory_size, 0, 31) >> 8;
+
+ cl[ 3] = __gen_uint(values->tile_allocation_memory_size, 0, 31) >> 16;
+
+ cl[ 4] = __gen_uint(values->tile_allocation_memory_size, 0, 31) >> 24;
+
+ __gen_emit_reloc(data, &values->tile_allocation_memory_address);
+ cl[ 5] = __gen_address_offset(&values->tile_allocation_memory_address);
+
+ cl[ 6] = __gen_address_offset(&values->tile_allocation_memory_address) >> 8;
+
+ cl[ 7] = __gen_address_offset(&values->tile_allocation_memory_address) >> 16;
+
+ cl[ 8] = __gen_address_offset(&values->tile_allocation_memory_address) >> 24;
+
+}
+
+#define V3D33_TILE_BINNING_MODE_CONFIGURATION_PART2_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_BINNING_MODE_CONFIGURATION_PART2_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_BINNING_MODE_CONFIGURATION_PART2 * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->tile_allocation_memory_address = __gen_unpack_address(cl, 40, 71);
+ values->tile_allocation_memory_size = __gen_unpack_uint(cl, 8, 39);
+ values->sub_id = __gen_unpack_uint(cl, 8, 8);
+}
+#endif
+
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION_opcode 121
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION_header\
+ .opcode = 121, \
+ .sub_id = 0
+
+struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION {
+ uint32_t opcode;
+ uint32_t disable_render_target_stores;
+ bool enable_z_store;
+ bool enable_stencil_store;
+ bool early_z_disable;
+ uint32_t early_z_test_and_update_direction;
+#define EARLY_Z_DIRECTION_LT_LE 0
+#define EARLY_Z_DIRECTION_GT_GE 1
+ bool select_coverage_mode;
+ bool double_buffer_in_non_ms_mode;
+ bool multisample_mode_4x;
+ uint32_t maximum_bpp_of_all_render_targets;
+#define RENDER_TARGET_MAXIMUM_32BPP 0
+#define RENDER_TARGET_MAXIMUM_64BPP 1
+#define RENDER_TARGET_MAXIMUM_128BPP 2
+ uint32_t image_height_pixels;
+ uint32_t image_width_pixels;
+ uint32_t number_of_render_targets_minus_1;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->number_of_render_targets_minus_1, 4, 7) |
+ __gen_uint(values->sub_id, 0, 3);
+
+ cl[ 2] = __gen_uint(values->image_width_pixels, 0, 15);
+
+ cl[ 3] = __gen_uint(values->image_width_pixels, 0, 15) >> 8;
+
+ cl[ 4] = __gen_uint(values->image_height_pixels, 0, 15);
+
+ cl[ 5] = __gen_uint(values->image_height_pixels, 0, 15) >> 8;
+
+ cl[ 6] = __gen_uint(values->early_z_disable, 6, 6) |
+ __gen_uint(values->early_z_test_and_update_direction, 5, 5) |
+ __gen_uint(values->select_coverage_mode, 4, 4) |
+ __gen_uint(values->double_buffer_in_non_ms_mode, 3, 3) |
+ __gen_uint(values->multisample_mode_4x, 2, 2) |
+ __gen_uint(values->maximum_bpp_of_all_render_targets, 0, 1);
+
+ cl[ 7] = __gen_uint(values->enable_z_store, 7, 7) |
+ __gen_uint(values->enable_stencil_store, 6, 6);
+
+ cl[ 8] = __gen_uint(values->disable_render_target_stores, 0, 7);
+
+}
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_COMMON_CONFIGURATION * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->disable_render_target_stores = __gen_unpack_uint(cl, 64, 71);
+ values->enable_z_store = __gen_unpack_uint(cl, 63, 63);
+ values->enable_stencil_store = __gen_unpack_uint(cl, 62, 62);
+ values->early_z_disable = __gen_unpack_uint(cl, 54, 54);
+ values->early_z_test_and_update_direction = __gen_unpack_uint(cl, 53, 53);
+ values->select_coverage_mode = __gen_unpack_uint(cl, 52, 52);
+ values->double_buffer_in_non_ms_mode = __gen_unpack_uint(cl, 51, 51);
+ values->multisample_mode_4x = __gen_unpack_uint(cl, 50, 50);
+ values->maximum_bpp_of_all_render_targets = __gen_unpack_uint(cl, 48, 49);
+ values->image_height_pixels = __gen_unpack_uint(cl, 32, 47);
+ values->image_width_pixels = __gen_unpack_uint(cl, 16, 31);
+ values->number_of_render_targets_minus_1 = __gen_unpack_uint(cl, 12, 15);
+ values->sub_id = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG_opcode 121
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG_header\
+ .opcode = 121, \
+ .sub_id = 2
+
+struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG {
+ uint32_t opcode;
+ __gen_address_type address;
+ uint32_t pad;
+ bool flip_y;
+ uint32_t memory_format;
+#define MEMORY_FORMAT_RASTER 0
+#define MEMORY_FORMAT_LINEARTILE 1
+#define MEMORY_FORMAT_UB_LINEAR_1_UIF_BLOCK_WIDE 2
+#define MEMORY_FORMAT_UB_LINEAR_2_UIF_BLOCKS_WIDE 3
+#define MEMORY_FORMAT_UIF_NO_XOR 4
+#define MEMORY_FORMAT_UIF_XOR 5
+ bool a_dithered;
+ bool bgr_dithered;
+ uint32_t output_image_format;
+#define OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8 0
+#define OUTPUT_IMAGE_FORMAT_SRGB 1
+#define OUTPUT_IMAGE_FORMAT_RGB10_A2UI 2
+#define OUTPUT_IMAGE_FORMAT_RGB10_A2 3
+#define OUTPUT_IMAGE_FORMAT_ABGR1555 4
+#define OUTPUT_IMAGE_FORMAT_ALPHA_MASKED_ABGR1555 5
+#define OUTPUT_IMAGE_FORMAT_ABGR4444 6
+#define OUTPUT_IMAGE_FORMAT_BGR565 7
+#define OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F 8
+#define OUTPUT_IMAGE_FORMAT_RGBA32F 9
+#define OUTPUT_IMAGE_FORMAT_RG32F 10
+#define OUTPUT_IMAGE_FORMAT_R32F 11
+#define OUTPUT_IMAGE_FORMAT_RGBA32I 12
+#define OUTPUT_IMAGE_FORMAT_RG32I 13
+#define OUTPUT_IMAGE_FORMAT_R32I 14
+#define OUTPUT_IMAGE_FORMAT_RGBA32UI 15
+#define OUTPUT_IMAGE_FORMAT_RG32UI 16
+#define OUTPUT_IMAGE_FORMAT_R32UI 17
+#define OUTPUT_IMAGE_FORMAT_RGBA16F 18
+#define OUTPUT_IMAGE_FORMAT_RG16F 19
+#define OUTPUT_IMAGE_FORMAT_R16F 20
+#define OUTPUT_IMAGE_FORMAT_RGBA16I 21
+#define OUTPUT_IMAGE_FORMAT_RG16I 22
+#define OUTPUT_IMAGE_FORMAT_R16I 23
+#define OUTPUT_IMAGE_FORMAT_RGBA16UI 24
+#define OUTPUT_IMAGE_FORMAT_RG16UI 25
+#define OUTPUT_IMAGE_FORMAT_R16UI 26
+#define OUTPUT_IMAGE_FORMAT_RGBA8 27
+#define OUTPUT_IMAGE_FORMAT_RGB8 28
+#define OUTPUT_IMAGE_FORMAT_RG8 29
+#define OUTPUT_IMAGE_FORMAT_R8 30
+#define OUTPUT_IMAGE_FORMAT_RGBA8I 31
+#define OUTPUT_IMAGE_FORMAT_RG8I 32
+#define OUTPUT_IMAGE_FORMAT_R8I 33
+#define OUTPUT_IMAGE_FORMAT_RGBA8UI 34
+#define OUTPUT_IMAGE_FORMAT_RG8UI 35
+#define OUTPUT_IMAGE_FORMAT_R8UI 36
+#define OUTPUT_IMAGE_FORMAT_SRGBX8 37
+#define OUTPUT_IMAGE_FORMAT_RGBX8 38
+ uint32_t decimate_mode;
+ uint32_t internal_type;
+#define INTERNAL_TYPE_8I 0
+#define INTERNAL_TYPE_8UI 1
+#define INTERNAL_TYPE_8 2
+#define INTERNAL_TYPE_16I 4
+#define INTERNAL_TYPE_16UI 5
+#define INTERNAL_TYPE_16F 6
+#define INTERNAL_TYPE_32I 8
+#define INTERNAL_TYPE_32UI 9
+#define INTERNAL_TYPE_32F 10
+ uint32_t internal_bpp;
+#define INTERNAL_BPP_32 0
+#define INTERNAL_BPP_64 1
+#define INTERNAL_BPP_128 2
+ uint32_t render_target_number;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->render_target_number, 4, 7) |
+ __gen_uint(values->sub_id, 0, 3);
+
+ cl[ 2] = __gen_uint(values->decimate_mode, 6, 7) |
+ __gen_uint(values->internal_type, 2, 5) |
+ __gen_uint(values->internal_bpp, 0, 1);
+
+ cl[ 3] = __gen_uint(values->a_dithered, 7, 7) |
+ __gen_uint(values->bgr_dithered, 6, 6) |
+ __gen_uint(values->output_image_format, 0, 5);
+
+ cl[ 4] = __gen_uint(values->pad, 4, 7) |
+ __gen_uint(values->flip_y, 3, 3) |
+ __gen_uint(values->memory_format, 0, 2);
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 5] = __gen_address_offset(&values->address);
+
+ cl[ 6] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 7] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 8] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_RENDER_TARGET_CONFIG * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 40, 71);
+ values->pad = __gen_unpack_uint(cl, 36, 39);
+ values->flip_y = __gen_unpack_uint(cl, 35, 35);
+ values->memory_format = __gen_unpack_uint(cl, 32, 34);
+ values->a_dithered = __gen_unpack_uint(cl, 31, 31);
+ values->bgr_dithered = __gen_unpack_uint(cl, 30, 30);
+ values->output_image_format = __gen_unpack_uint(cl, 24, 29);
+ values->decimate_mode = __gen_unpack_uint(cl, 22, 23);
+ values->internal_type = __gen_unpack_uint(cl, 18, 21);
+ values->internal_bpp = __gen_unpack_uint(cl, 16, 17);
+ values->render_target_number = __gen_unpack_uint(cl, 12, 15);
+ values->sub_id = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG_opcode 121
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG_header\
+ .opcode = 121, \
+ .z_stencil_id = 0, \
+ .sub_id = 1
+
+struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG {
+ uint32_t opcode;
+ __gen_address_type address;
+ uint32_t padded_height_of_output_image_in_uif_blocks;
+ uint32_t memory_format;
+#define MEMORY_FORMAT_RASTER 0
+#define MEMORY_FORMAT_LINEARTILE 1
+#define MEMORY_FORMAT_UB_LINEAR_1_UIF_BLOCK_WIDE 2
+#define MEMORY_FORMAT_UB_LINEAR_2_UIF_BLOCKS_WIDE 3
+#define MEMORY_FORMAT_UIF_NO_XOR 4
+#define MEMORY_FORMAT_UIF_XOR 5
+ uint32_t output_image_format;
+#define OUTPUT_IMAGE_FORMAT_DEPTH_COMPONENT32F 0
+#define OUTPUT_IMAGE_FORMAT_DEPTH_COMPONENT24 1
+#define OUTPUT_IMAGE_FORMAT_DEPTH_COMPONENT16 2
+#define OUTPUT_IMAGE_FORMAT_DEPTH24_STENCIL8 3
+ uint32_t decimate_mode;
+ uint32_t internal_type;
+#define INTERNAL_TYPE_DEPTH_32F 0
+#define INTERNAL_TYPE_DEPTH_24 1
+#define INTERNAL_TYPE_DEPTH_16 2
+ uint32_t internal_bpp_ignored;
+ uint32_t z_stencil_id;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->z_stencil_id, 4, 7) |
+ __gen_uint(values->sub_id, 0, 3);
+
+ cl[ 2] = __gen_uint(values->decimate_mode, 6, 7) |
+ __gen_uint(values->internal_type, 2, 5) |
+ __gen_uint(values->internal_bpp_ignored, 0, 1);
+
+ cl[ 3] = __gen_uint(values->memory_format, 6, 8) |
+ __gen_uint(values->output_image_format, 0, 5);
+
+ cl[ 4] = __gen_uint(values->padded_height_of_output_image_in_uif_blocks, 1, 13) |
+ __gen_uint(values->memory_format, 6, 8) >> 8;
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 5] = __gen_address_offset(&values->address) |
+ __gen_uint(values->padded_height_of_output_image_in_uif_blocks, 1, 13) >> 8;
+
+ cl[ 6] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 7] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 8] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CONFIG * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 40, 71);
+ values->padded_height_of_output_image_in_uif_blocks = __gen_unpack_uint(cl, 33, 45);
+ values->memory_format = __gen_unpack_uint(cl, 30, 32);
+ values->output_image_format = __gen_unpack_uint(cl, 24, 29);
+ values->decimate_mode = __gen_unpack_uint(cl, 22, 23);
+ values->internal_type = __gen_unpack_uint(cl, 18, 21);
+ values->internal_bpp_ignored = __gen_unpack_uint(cl, 16, 17);
+ values->z_stencil_id = __gen_unpack_uint(cl, 12, 15);
+ values->sub_id = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES_opcode 121
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES_header\
+ .opcode = 121, \
+ .sub_id = 3
+
+struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES {
+ uint32_t opcode;
+ uint32_t unused;
+ float z_clear_value;
+ uint32_t stencil_vg_mask_clear_value;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->sub_id, 0, 3);
+
+ cl[ 2] = __gen_uint(values->stencil_vg_mask_clear_value, 0, 7);
+
+
+ memcpy(&cl[3], &values->z_clear_value, sizeof(values->z_clear_value));
+ cl[ 7] = __gen_uint(values->unused, 0, 15);
+
+ cl[ 8] = __gen_uint(values->unused, 0, 15) >> 8;
+
+}
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_Z_STENCIL_CLEAR_VALUES * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->unused = __gen_unpack_uint(cl, 56, 71);
+ values->z_clear_value = __gen_unpack_float(cl, 24, 55);
+ values->stencil_vg_mask_clear_value = __gen_unpack_uint(cl, 16, 23);
+ values->sub_id = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1_opcode 121
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1_header\
+ .opcode = 121, \
+ .sub_id = 4
+
+struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1 {
+ uint32_t opcode;
+ uint32_t clear_color_next_24_bits;
+ uint32_t clear_color_low_32_bits;
+ uint32_t render_target_number;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1 * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->render_target_number, 4, 7) |
+ __gen_uint(values->sub_id, 0, 3);
+
+
+ memcpy(&cl[2], &values->clear_color_low_32_bits, sizeof(values->clear_color_low_32_bits));
+ cl[ 6] = __gen_uint(values->clear_color_next_24_bits, 0, 23);
+
+ cl[ 7] = __gen_uint(values->clear_color_next_24_bits, 0, 23) >> 8;
+
+ cl[ 8] = __gen_uint(values->clear_color_next_24_bits, 0, 23) >> 16;
+
+}
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART1 * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->clear_color_next_24_bits = __gen_unpack_uint(cl, 48, 71);
+ values->clear_color_low_32_bits = __gen_unpack_uint(cl, 16, 47);
+ values->render_target_number = __gen_unpack_uint(cl, 12, 15);
+ values->sub_id = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2_opcode 121
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2_header\
+ .opcode = 121, \
+ .sub_id = 5
+
+struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2 {
+ uint32_t opcode;
+ uint32_t clear_color_mid_high_24_bits;
+ uint32_t clear_color_mid_low_32_bits;
+ uint32_t render_target_number;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2 * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->render_target_number, 4, 7) |
+ __gen_uint(values->sub_id, 0, 3);
+
+
+ memcpy(&cl[2], &values->clear_color_mid_low_32_bits, sizeof(values->clear_color_mid_low_32_bits));
+ cl[ 6] = __gen_uint(values->clear_color_mid_high_24_bits, 0, 23);
+
+ cl[ 7] = __gen_uint(values->clear_color_mid_high_24_bits, 0, 23) >> 8;
+
+ cl[ 8] = __gen_uint(values->clear_color_mid_high_24_bits, 0, 23) >> 16;
+
+}
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART2 * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->clear_color_mid_high_24_bits = __gen_unpack_uint(cl, 48, 71);
+ values->clear_color_mid_low_32_bits = __gen_unpack_uint(cl, 16, 47);
+ values->render_target_number = __gen_unpack_uint(cl, 12, 15);
+ values->sub_id = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3_opcode 121
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3_header\
+ .opcode = 121, \
+ .sub_id = 6
+
+struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3 {
+ uint32_t opcode;
+ uint32_t pad;
+ uint32_t uif_padded_height_in_uif_blocks;
+ uint32_t raster_row_stride_or_image_height_in_pixels;
+ uint32_t clear_color_high_16_bits;
+ uint32_t render_target_number;
+ uint32_t sub_id;
+};
+
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3 * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->render_target_number, 4, 7) |
+ __gen_uint(values->sub_id, 0, 3);
+
+ cl[ 2] = __gen_uint(values->clear_color_high_16_bits, 0, 15);
+
+ cl[ 3] = __gen_uint(values->clear_color_high_16_bits, 0, 15) >> 8;
+
+ cl[ 4] = __gen_uint(values->raster_row_stride_or_image_height_in_pixels, 0, 15);
+
+ cl[ 5] = __gen_uint(values->raster_row_stride_or_image_height_in_pixels, 0, 15) >> 8;
+
+ cl[ 6] = __gen_uint(values->uif_padded_height_in_uif_blocks, 0, 12);
+
+ cl[ 7] = __gen_uint(values->pad, 5, 15) |
+ __gen_uint(values->uif_padded_height_in_uif_blocks, 0, 12) >> 8;
+
+ cl[ 8] = __gen_uint(values->pad, 5, 15) >> 8;
+
+}
+
+#define V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_RENDERING_MODE_CONFIGURATION_CLEAR_COLORS_PART3 * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->pad = __gen_unpack_uint(cl, 61, 71);
+ values->uif_padded_height_in_uif_blocks = __gen_unpack_uint(cl, 48, 60);
+ values->raster_row_stride_or_image_height_in_pixels = __gen_unpack_uint(cl, 32, 47);
+ values->clear_color_high_16_bits = __gen_unpack_uint(cl, 16, 31);
+ values->render_target_number = __gen_unpack_uint(cl, 12, 15);
+ values->sub_id = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_TILE_COORDINATES_opcode 124
+#define V3D33_TILE_COORDINATES_header \
+ .opcode = 124
+
+struct V3D33_TILE_COORDINATES {
+ uint32_t opcode;
+ uint32_t tile_row_number;
+ uint32_t tile_column_number;
+};
+
+static inline void
+V3D33_TILE_COORDINATES_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_COORDINATES * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->tile_column_number, 0, 11);
+
+ cl[ 2] = __gen_uint(values->tile_row_number, 4, 15) |
+ __gen_uint(values->tile_column_number, 0, 11) >> 8;
+
+ cl[ 3] = __gen_uint(values->tile_row_number, 4, 15) >> 8;
+
+}
+
+#define V3D33_TILE_COORDINATES_length 4
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_COORDINATES_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_COORDINATES * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->tile_row_number = __gen_unpack_uint(cl, 20, 31);
+ values->tile_column_number = __gen_unpack_uint(cl, 8, 19);
+}
+#endif
+
+
+#define V3D33_MULTICORE_RENDERING_SUPERTILE_CONFIGURATION_opcode 122
+#define V3D33_MULTICORE_RENDERING_SUPERTILE_CONFIGURATION_header\
+ .opcode = 122
+
+struct V3D33_MULTICORE_RENDERING_SUPERTILE_CONFIGURATION {
+ uint32_t opcode;
+ bool supertile_raster_order;
+ bool multicore_enable;
+ uint32_t total_frame_height_in_tiles;
+ uint32_t total_frame_width_in_tiles;
+ uint32_t total_frame_height_in_supertiles;
+ uint32_t total_frame_width_in_supertiles;
+ uint32_t supertile_height_in_tiles_minus_1;
+ uint32_t supertile_width_in_tiles_minus_1;
+};
+
+static inline void
+V3D33_MULTICORE_RENDERING_SUPERTILE_CONFIGURATION_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_MULTICORE_RENDERING_SUPERTILE_CONFIGURATION * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->supertile_width_in_tiles_minus_1, 0, 7);
+
+ cl[ 2] = __gen_uint(values->supertile_height_in_tiles_minus_1, 0, 7);
+
+ cl[ 3] = __gen_uint(values->total_frame_width_in_supertiles, 0, 7);
+
+ cl[ 4] = __gen_uint(values->total_frame_height_in_supertiles, 0, 7);
+
+ cl[ 5] = __gen_uint(values->total_frame_width_in_tiles, 0, 11);
+
+ cl[ 6] = __gen_uint(values->total_frame_height_in_tiles, 4, 15) |
+ __gen_uint(values->total_frame_width_in_tiles, 0, 11) >> 8;
+
+ cl[ 7] = __gen_uint(values->total_frame_height_in_tiles, 4, 15) >> 8;
+
+ cl[ 8] = __gen_uint(values->supertile_raster_order, 4, 4) |
+ __gen_uint(values->multicore_enable, 0, 0);
+
+}
+
+#define V3D33_MULTICORE_RENDERING_SUPERTILE_CONFIGURATION_length 9
+#ifdef __gen_unpack_address
+static inline void
+V3D33_MULTICORE_RENDERING_SUPERTILE_CONFIGURATION_unpack(const uint8_t * restrict cl,
+ struct V3D33_MULTICORE_RENDERING_SUPERTILE_CONFIGURATION * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->supertile_raster_order = __gen_unpack_uint(cl, 68, 68);
+ values->multicore_enable = __gen_unpack_uint(cl, 64, 64);
+ values->total_frame_height_in_tiles = __gen_unpack_uint(cl, 52, 63);
+ values->total_frame_width_in_tiles = __gen_unpack_uint(cl, 40, 51);
+ values->total_frame_height_in_supertiles = __gen_unpack_uint(cl, 32, 39);
+ values->total_frame_width_in_supertiles = __gen_unpack_uint(cl, 24, 31);
+ values->supertile_height_in_tiles_minus_1 = __gen_unpack_uint(cl, 16, 23);
+ values->supertile_width_in_tiles_minus_1 = __gen_unpack_uint(cl, 8, 15);
+}
+#endif
+
+
+#define V3D33_MULTICORE_RENDERING_TILE_LIST_SET_BASE_opcode 123
+#define V3D33_MULTICORE_RENDERING_TILE_LIST_SET_BASE_header\
+ .opcode = 123
+
+struct V3D33_MULTICORE_RENDERING_TILE_LIST_SET_BASE {
+ uint32_t opcode;
+ __gen_address_type address;
+ uint32_t tile_list_set_number;
+};
+
+static inline void
+V3D33_MULTICORE_RENDERING_TILE_LIST_SET_BASE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_MULTICORE_RENDERING_TILE_LIST_SET_BASE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ __gen_emit_reloc(data, &values->address);
+ cl[ 1] = __gen_address_offset(&values->address) |
+ __gen_uint(values->tile_list_set_number, 0, 3);
+
+ cl[ 2] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 3] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 4] = __gen_address_offset(&values->address) >> 24;
+
+}
+
+#define V3D33_MULTICORE_RENDERING_TILE_LIST_SET_BASE_length 5
+#ifdef __gen_unpack_address
+static inline void
+V3D33_MULTICORE_RENDERING_TILE_LIST_SET_BASE_unpack(const uint8_t * restrict cl,
+ struct V3D33_MULTICORE_RENDERING_TILE_LIST_SET_BASE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->address = __gen_unpack_address(cl, 14, 39);
+ values->tile_list_set_number = __gen_unpack_uint(cl, 8, 11);
+}
+#endif
+
+
+#define V3D33_TILE_COORDINATES_IMPLICIT_opcode 125
+#define V3D33_TILE_COORDINATES_IMPLICIT_header \
+ .opcode = 125
+
+struct V3D33_TILE_COORDINATES_IMPLICIT {
+ uint32_t opcode;
+};
+
+static inline void
+V3D33_TILE_COORDINATES_IMPLICIT_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_COORDINATES_IMPLICIT * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+}
+
+#define V3D33_TILE_COORDINATES_IMPLICIT_length 1
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_COORDINATES_IMPLICIT_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_COORDINATES_IMPLICIT * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+}
+#endif
+
+
+#define V3D33_TILE_LIST_INITIAL_BLOCK_SIZE_opcode 126
+#define V3D33_TILE_LIST_INITIAL_BLOCK_SIZE_header\
+ .opcode = 126
+
+struct V3D33_TILE_LIST_INITIAL_BLOCK_SIZE {
+ uint32_t opcode;
+ bool use_auto_chained_tile_lists;
+ uint32_t size_of_first_block_in_chained_tile_lists;
+#define TILE_ALLOCATION_BLOCK_SIZE_64B 0
+#define TILE_ALLOCATION_BLOCK_SIZE_128B 1
+#define TILE_ALLOCATION_BLOCK_SIZE_256B 2
+};
+
+static inline void
+V3D33_TILE_LIST_INITIAL_BLOCK_SIZE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TILE_LIST_INITIAL_BLOCK_SIZE * restrict values)
+{
+ cl[ 0] = __gen_uint(values->opcode, 0, 7);
+
+ cl[ 1] = __gen_uint(values->use_auto_chained_tile_lists, 2, 2) |
+ __gen_uint(values->size_of_first_block_in_chained_tile_lists, 0, 1);
+
+}
+
+#define V3D33_TILE_LIST_INITIAL_BLOCK_SIZE_length 2
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TILE_LIST_INITIAL_BLOCK_SIZE_unpack(const uint8_t * restrict cl,
+ struct V3D33_TILE_LIST_INITIAL_BLOCK_SIZE * restrict values)
+{
+ values->opcode = __gen_unpack_uint(cl, 0, 7);
+ values->use_auto_chained_tile_lists = __gen_unpack_uint(cl, 10, 10);
+ values->size_of_first_block_in_chained_tile_lists = __gen_unpack_uint(cl, 8, 9);
+}
+#endif
+
+
+#define V3D33_GL_SHADER_STATE_RECORD_header \
+
+
+struct V3D33_GL_SHADER_STATE_RECORD {
+ bool point_size_in_shaded_vertex_data;
+ bool enable_clipping;
+ bool vertex_id_read_by_coordinate_shader;
+ bool instance_id_read_by_coordinate_shader;
+ bool vertex_id_read_by_vertex_shader;
+ bool instance_id_read_by_vertex_shader;
+ bool fragment_shader_does_z_writes;
+ bool turn_off_early_z_test;
+ bool coordinate_shader_has_separate_input_and_output_vpm_blocks;
+ bool vertex_shader_has_separate_input_and_output_vpm_blocks;
+ bool fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2;
+ uint32_t number_of_varyings_in_fragment_shader;
+ uint32_t coordinate_shader_output_vpm_segment_size;
+ uint32_t coordinate_shader_input_vpm_segment_size;
+ uint32_t vertex_shader_output_vpm_segment_size;
+ uint32_t vertex_shader_input_vpm_segment_size;
+ __gen_address_type address_of_default_attribute_values;
+ __gen_address_type fragment_shader_code_address;
+ bool _2_way_threadable;
+ bool _4_way_threadable;
+ bool propagate_nans;
+ __gen_address_type fragment_shader_uniforms_address;
+ __gen_address_type vertex_shader_code_address;
+ __gen_address_type vertex_shader_uniforms_address;
+ __gen_address_type coordinate_shader_code_address;
+ __gen_address_type coordinate_shader_uniforms_address;
+};
+
+static inline void
+V3D33_GL_SHADER_STATE_RECORD_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_GL_SHADER_STATE_RECORD * restrict values)
+{
+ cl[ 0] = __gen_uint(values->point_size_in_shaded_vertex_data, 0, 0) |
+ __gen_uint(values->enable_clipping, 1, 1) |
+ __gen_uint(values->vertex_id_read_by_coordinate_shader, 2, 2) |
+ __gen_uint(values->instance_id_read_by_coordinate_shader, 3, 3) |
+ __gen_uint(values->vertex_id_read_by_vertex_shader, 4, 4) |
+ __gen_uint(values->instance_id_read_by_vertex_shader, 5, 5) |
+ __gen_uint(values->fragment_shader_does_z_writes, 6, 6) |
+ __gen_uint(values->turn_off_early_z_test, 7, 7);
+
+ cl[ 1] = __gen_uint(values->coordinate_shader_has_separate_input_and_output_vpm_blocks, 0, 0) |
+ __gen_uint(values->vertex_shader_has_separate_input_and_output_vpm_blocks, 1, 1) |
+ __gen_uint(values->fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2, 2, 2);
+
+ cl[ 2] = __gen_uint(values->number_of_varyings_in_fragment_shader, 0, 7);
+
+ cl[ 3] = 0;
+ cl[ 4] = __gen_uint(values->coordinate_shader_output_vpm_segment_size, 0, 7);
+
+ cl[ 5] = __gen_uint(values->coordinate_shader_input_vpm_segment_size, 0, 7);
+
+ cl[ 6] = __gen_uint(values->vertex_shader_output_vpm_segment_size, 0, 7);
+
+ cl[ 7] = __gen_uint(values->vertex_shader_input_vpm_segment_size, 0, 7);
+
+ __gen_emit_reloc(data, &values->address_of_default_attribute_values);
+ cl[ 8] = __gen_address_offset(&values->address_of_default_attribute_values);
+
+ cl[ 9] = __gen_address_offset(&values->address_of_default_attribute_values) >> 8;
+
+ cl[10] = __gen_address_offset(&values->address_of_default_attribute_values) >> 16;
+
+ cl[11] = __gen_address_offset(&values->address_of_default_attribute_values) >> 24;
+
+ __gen_emit_reloc(data, &values->fragment_shader_code_address);
+ cl[12] = __gen_address_offset(&values->fragment_shader_code_address) |
+ __gen_uint(values->_2_way_threadable, 0, 0) |
+ __gen_uint(values->_4_way_threadable, 1, 1) |
+ __gen_uint(values->propagate_nans, 2, 2);
+
+ cl[13] = __gen_address_offset(&values->fragment_shader_code_address) >> 8;
+
+ cl[14] = __gen_address_offset(&values->fragment_shader_code_address) >> 16;
+
+ cl[15] = __gen_address_offset(&values->fragment_shader_code_address) >> 24;
+
+ __gen_emit_reloc(data, &values->fragment_shader_uniforms_address);
+ cl[16] = __gen_address_offset(&values->fragment_shader_uniforms_address);
+
+ cl[17] = __gen_address_offset(&values->fragment_shader_uniforms_address) >> 8;
+
+ cl[18] = __gen_address_offset(&values->fragment_shader_uniforms_address) >> 16;
+
+ cl[19] = __gen_address_offset(&values->fragment_shader_uniforms_address) >> 24;
+
+ __gen_emit_reloc(data, &values->vertex_shader_code_address);
+ cl[20] = __gen_address_offset(&values->vertex_shader_code_address);
+
+ cl[21] = __gen_address_offset(&values->vertex_shader_code_address) >> 8;
+
+ cl[22] = __gen_address_offset(&values->vertex_shader_code_address) >> 16;
+
+ cl[23] = __gen_address_offset(&values->vertex_shader_code_address) >> 24;
+
+ __gen_emit_reloc(data, &values->vertex_shader_uniforms_address);
+ cl[24] = __gen_address_offset(&values->vertex_shader_uniforms_address);
+
+ cl[25] = __gen_address_offset(&values->vertex_shader_uniforms_address) >> 8;
+
+ cl[26] = __gen_address_offset(&values->vertex_shader_uniforms_address) >> 16;
+
+ cl[27] = __gen_address_offset(&values->vertex_shader_uniforms_address) >> 24;
+
+ __gen_emit_reloc(data, &values->coordinate_shader_code_address);
+ cl[28] = __gen_address_offset(&values->coordinate_shader_code_address);
+
+ cl[29] = __gen_address_offset(&values->coordinate_shader_code_address) >> 8;
+
+ cl[30] = __gen_address_offset(&values->coordinate_shader_code_address) >> 16;
+
+ cl[31] = __gen_address_offset(&values->coordinate_shader_code_address) >> 24;
+
+ __gen_emit_reloc(data, &values->coordinate_shader_uniforms_address);
+ cl[32] = __gen_address_offset(&values->coordinate_shader_uniforms_address);
+
+ cl[33] = __gen_address_offset(&values->coordinate_shader_uniforms_address) >> 8;
+
+ cl[34] = __gen_address_offset(&values->coordinate_shader_uniforms_address) >> 16;
+
+ cl[35] = __gen_address_offset(&values->coordinate_shader_uniforms_address) >> 24;
+
+}
+
+#define V3D33_GL_SHADER_STATE_RECORD_length 36
+#ifdef __gen_unpack_address
+static inline void
+V3D33_GL_SHADER_STATE_RECORD_unpack(const uint8_t * restrict cl,
+ struct V3D33_GL_SHADER_STATE_RECORD * restrict values)
+{
+ values->point_size_in_shaded_vertex_data = __gen_unpack_uint(cl, 0, 0);
+ values->enable_clipping = __gen_unpack_uint(cl, 1, 1);
+ values->vertex_id_read_by_coordinate_shader = __gen_unpack_uint(cl, 2, 2);
+ values->instance_id_read_by_coordinate_shader = __gen_unpack_uint(cl, 3, 3);
+ values->vertex_id_read_by_vertex_shader = __gen_unpack_uint(cl, 4, 4);
+ values->instance_id_read_by_vertex_shader = __gen_unpack_uint(cl, 5, 5);
+ values->fragment_shader_does_z_writes = __gen_unpack_uint(cl, 6, 6);
+ values->turn_off_early_z_test = __gen_unpack_uint(cl, 7, 7);
+ values->coordinate_shader_has_separate_input_and_output_vpm_blocks = __gen_unpack_uint(cl, 8, 8);
+ values->vertex_shader_has_separate_input_and_output_vpm_blocks = __gen_unpack_uint(cl, 9, 9);
+ values->fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = __gen_unpack_uint(cl, 10, 10);
+ values->number_of_varyings_in_fragment_shader = __gen_unpack_uint(cl, 16, 23);
+ values->coordinate_shader_output_vpm_segment_size = __gen_unpack_uint(cl, 32, 39);
+ values->coordinate_shader_input_vpm_segment_size = __gen_unpack_uint(cl, 40, 47);
+ values->vertex_shader_output_vpm_segment_size = __gen_unpack_uint(cl, 48, 55);
+ values->vertex_shader_input_vpm_segment_size = __gen_unpack_uint(cl, 56, 63);
+ values->address_of_default_attribute_values = __gen_unpack_address(cl, 64, 95);
+ values->fragment_shader_code_address = __gen_unpack_address(cl, 99, 127);
+ values->_2_way_threadable = __gen_unpack_uint(cl, 96, 96);
+ values->_4_way_threadable = __gen_unpack_uint(cl, 97, 97);
+ values->propagate_nans = __gen_unpack_uint(cl, 98, 98);
+ values->fragment_shader_uniforms_address = __gen_unpack_address(cl, 128, 159);
+ values->vertex_shader_code_address = __gen_unpack_address(cl, 160, 191);
+ values->vertex_shader_uniforms_address = __gen_unpack_address(cl, 192, 223);
+ values->coordinate_shader_code_address = __gen_unpack_address(cl, 224, 255);
+ values->coordinate_shader_uniforms_address = __gen_unpack_address(cl, 256, 287);
+}
+#endif
+
+
+#define V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_header\
+
+
+struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD {
+ __gen_address_type address;
+ uint32_t vec_size;
+ uint32_t type;
+#define ATTRIBUTE_HALF_FLOAT 1
+#define ATTRIBUTE_FLOAT 2
+#define ATTRIBUTE_FIXED 3
+#define ATTRIBUTE_BYTE 4
+#define ATTRIBUTE_SHORT 5
+#define ATTRIBUTE_INT 6
+#define ATTRIBUTE_INT2_10_10_10 7
+ bool signed_int_type;
+ bool normalized_int_type;
+ bool read_as_int_uint;
+ uint32_t number_of_values_read_by_coordinate_shader;
+ uint32_t number_of_values_read_by_vertex_shader;
+ uint32_t instance_divisor;
+ uint32_t stride;
+};
+
+static inline void
+V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD * restrict values)
+{
+ __gen_emit_reloc(data, &values->address);
+ cl[ 0] = __gen_address_offset(&values->address);
+
+ cl[ 1] = __gen_address_offset(&values->address) >> 8;
+
+ cl[ 2] = __gen_address_offset(&values->address) >> 16;
+
+ cl[ 3] = __gen_address_offset(&values->address) >> 24;
+
+ cl[ 4] = __gen_uint(values->vec_size, 0, 1) |
+ __gen_uint(values->type, 2, 4) |
+ __gen_uint(values->signed_int_type, 5, 5) |
+ __gen_uint(values->normalized_int_type, 6, 6) |
+ __gen_uint(values->read_as_int_uint, 7, 7);
+
+ cl[ 5] = __gen_uint(values->number_of_values_read_by_coordinate_shader, 0, 3) |
+ __gen_uint(values->number_of_values_read_by_vertex_shader, 4, 7);
+
+ cl[ 6] = __gen_uint(values->instance_divisor, 0, 15);
+
+ cl[ 7] = __gen_uint(values->instance_divisor, 0, 15) >> 8;
+
+
+ memcpy(&cl[8], &values->stride, sizeof(values->stride));
+}
+
+#define V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_length 12
+#ifdef __gen_unpack_address
+static inline void
+V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_unpack(const uint8_t * restrict cl,
+ struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD * restrict values)
+{
+ values->address = __gen_unpack_address(cl, 0, 31);
+ values->vec_size = __gen_unpack_uint(cl, 32, 33);
+ values->type = __gen_unpack_uint(cl, 34, 36);
+ values->signed_int_type = __gen_unpack_uint(cl, 37, 37);
+ values->normalized_int_type = __gen_unpack_uint(cl, 38, 38);
+ values->read_as_int_uint = __gen_unpack_uint(cl, 39, 39);
+ values->number_of_values_read_by_coordinate_shader = __gen_unpack_uint(cl, 40, 43);
+ values->number_of_values_read_by_vertex_shader = __gen_unpack_uint(cl, 44, 47);
+ values->instance_divisor = __gen_unpack_uint(cl, 48, 63);
+ values->stride = __gen_unpack_uint(cl, 64, 95);
+}
+#endif
+
+
+#define V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header\
+ .id = 0, \
+ .id0 = 0
+
+struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP {
+ uint32_t id;
+ uint32_t id0;
+ bool horiz;
+ bool laned;
+ bool segs;
+ int32_t stride;
+ uint32_t size;
+#define VPM_SETUP_SIZE_8_BIT 0
+#define VPM_SETUP_SIZE_16_BIT 1
+#define VPM_SETUP_SIZE_32_BIT 2
+ uint32_t addr;
+};
+
+static inline void
+V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP * restrict values)
+{
+ cl[ 0] = __gen_uint(values->addr, 0, 12);
+
+ cl[ 1] = __gen_sint(values->stride, 7, 13) |
+ __gen_uint(values->size, 5, 6) |
+ __gen_uint(values->addr, 0, 12) >> 8;
+
+ cl[ 2] = __gen_uint(values->laned, 7, 7) |
+ __gen_uint(values->segs, 6, 6) |
+ __gen_sint(values->stride, 7, 13) >> 8;
+
+ cl[ 3] = __gen_uint(values->id, 6, 7) |
+ __gen_uint(values->id0, 3, 5) |
+ __gen_uint(values->horiz, 0, 0);
+
+}
+
+#define V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_length 4
+#ifdef __gen_unpack_address
+static inline void
+V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_unpack(const uint8_t * restrict cl,
+ struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP * restrict values)
+{
+ values->id = __gen_unpack_uint(cl, 30, 31);
+ values->id0 = __gen_unpack_uint(cl, 27, 29);
+ values->horiz = __gen_unpack_uint(cl, 24, 24);
+ values->laned = __gen_unpack_uint(cl, 23, 23);
+ values->segs = __gen_unpack_uint(cl, 22, 22);
+ values->stride = __gen_unpack_sint(cl, 15, 21);
+ values->size = __gen_unpack_uint(cl, 13, 14);
+ values->addr = __gen_unpack_uint(cl, 0, 12);
+}
+#endif
+
+
+#define V3D33_VPM_GENERIC_BLOCK_READ_SETUP_header\
+ .id = 1
+
+struct V3D33_VPM_GENERIC_BLOCK_READ_SETUP {
+ uint32_t id;
+ bool horiz;
+ bool laned;
+ bool segs;
+ uint32_t num;
+ int32_t stride;
+ uint32_t size;
+#define VPM_SETUP_SIZE_8_BIT 0
+#define VPM_SETUP_SIZE_16_BIT 1
+#define VPM_SETUP_SIZE_32_BIT 2
+ uint32_t addr;
+};
+
+static inline void
+V3D33_VPM_GENERIC_BLOCK_READ_SETUP_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_VPM_GENERIC_BLOCK_READ_SETUP * restrict values)
+{
+ cl[ 0] = __gen_uint(values->addr, 0, 12);
+
+ cl[ 1] = __gen_sint(values->stride, 7, 13) |
+ __gen_uint(values->size, 5, 6) |
+ __gen_uint(values->addr, 0, 12) >> 8;
+
+ cl[ 2] = __gen_uint(values->num, 6, 10) |
+ __gen_sint(values->stride, 7, 13) >> 8;
+
+ cl[ 3] = __gen_uint(values->id, 6, 7) |
+ __gen_uint(values->horiz, 5, 5) |
+ __gen_uint(values->laned, 4, 4) |
+ __gen_uint(values->segs, 3, 3) |
+ __gen_uint(values->num, 6, 10) >> 8;
+
+}
+
+#define V3D33_VPM_GENERIC_BLOCK_READ_SETUP_length 4
+#ifdef __gen_unpack_address
+static inline void
+V3D33_VPM_GENERIC_BLOCK_READ_SETUP_unpack(const uint8_t * restrict cl,
+ struct V3D33_VPM_GENERIC_BLOCK_READ_SETUP * restrict values)
+{
+ values->id = __gen_unpack_uint(cl, 30, 31);
+ values->horiz = __gen_unpack_uint(cl, 29, 29);
+ values->laned = __gen_unpack_uint(cl, 28, 28);
+ values->segs = __gen_unpack_uint(cl, 27, 27);
+ values->num = __gen_unpack_uint(cl, 22, 26);
+ values->stride = __gen_unpack_sint(cl, 15, 21);
+ values->size = __gen_unpack_uint(cl, 13, 14);
+ values->addr = __gen_unpack_uint(cl, 0, 12);
+}
+#endif
+
+
+#define V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_header\
+ .new_configuration_mode = 1
+
+struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 {
+ bool per_pixel_mask_enable;
+ int32_t texel_offset_for_r_coordinate;
+ int32_t texel_offset_for_t_coordinate;
+ int32_t texel_offset_for_s_coordinate;
+ uint32_t r_wrap_mode;
+#define WRAP_MODE_REPEAT 0
+#define WRAP_MODE_CLAMP 1
+#define WRAP_MODE_MIRROR 2
+#define WRAP_MODE_BORDER 3
+#define WRAP_MODE_MIRROR_ONCE 4
+ uint32_t t_wrap_mode;
+#define WRAP_MODE_REPEAT 0
+#define WRAP_MODE_CLAMP 1
+#define WRAP_MODE_MIRROR 2
+#define WRAP_MODE_BORDER 3
+#define WRAP_MODE_MIRROR_ONCE 4
+ uint32_t s_wrap_mode;
+#define WRAP_MODE_REPEAT 0
+#define WRAP_MODE_CLAMP 1
+#define WRAP_MODE_MIRROR 2
+#define WRAP_MODE_BORDER 3
+#define WRAP_MODE_MIRROR_ONCE 4
+ bool new_configuration_mode;
+ bool shadow;
+ bool coefficient_lookup_mode;
+ bool disable_autolod_use_bias_only;
+ bool bias_supplied;
+ bool gather_sample_mode;
+ bool fetch_sample_mode;
+ bool lookup_type;
+#define TEXTURE_2D 0
+#define TEXTURE_2D_ARRAY 1
+#define TEXTURE_3D 2
+#define TEXTURE_CUBE_MAP 3
+#define TEXTURE_1D 4
+#define TEXTURE_1D_ARRAY 5
+#define TEXTURE_CHILD_IMAGE 6
+};
+
+static inline void
+V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 * restrict values)
+{
+ cl[ 0] = __gen_uint(values->coefficient_lookup_mode, 7, 7) |
+ __gen_uint(values->disable_autolod_use_bias_only, 6, 6) |
+ __gen_uint(values->bias_supplied, 5, 5) |
+ __gen_uint(values->gather_sample_mode, 4, 4) |
+ __gen_uint(values->fetch_sample_mode, 3, 3) |
+ __gen_uint(values->lookup_type, 0, 2);
+
+ cl[ 1] = __gen_uint(values->t_wrap_mode, 5, 7) |
+ __gen_uint(values->s_wrap_mode, 2, 4) |
+ __gen_uint(values->new_configuration_mode, 1, 1) |
+ __gen_uint(values->shadow, 0, 0);
+
+ cl[ 2] = __gen_sint(values->texel_offset_for_t_coordinate, 7, 10) |
+ __gen_sint(values->texel_offset_for_s_coordinate, 3, 6) |
+ __gen_uint(values->r_wrap_mode, 0, 2);
+
+ cl[ 3] = __gen_uint(values->per_pixel_mask_enable, 7, 7) |
+ __gen_sint(values->texel_offset_for_r_coordinate, 3, 6) |
+ __gen_sint(values->texel_offset_for_t_coordinate, 7, 10) >> 8;
+
+}
+
+#define V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_length 4
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_unpack(const uint8_t * restrict cl,
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 * restrict values)
+{
+ values->per_pixel_mask_enable = __gen_unpack_uint(cl, 31, 31);
+ values->texel_offset_for_r_coordinate = __gen_unpack_sint(cl, 27, 30);
+ values->texel_offset_for_t_coordinate = __gen_unpack_sint(cl, 23, 26);
+ values->texel_offset_for_s_coordinate = __gen_unpack_sint(cl, 19, 22);
+ values->r_wrap_mode = __gen_unpack_uint(cl, 16, 18);
+ values->t_wrap_mode = __gen_unpack_uint(cl, 13, 15);
+ values->s_wrap_mode = __gen_unpack_uint(cl, 10, 12);
+ values->new_configuration_mode = __gen_unpack_uint(cl, 9, 9);
+ values->shadow = __gen_unpack_uint(cl, 8, 8);
+ values->coefficient_lookup_mode = __gen_unpack_uint(cl, 7, 7);
+ values->disable_autolod_use_bias_only = __gen_unpack_uint(cl, 6, 6);
+ values->bias_supplied = __gen_unpack_uint(cl, 5, 5);
+ values->gather_sample_mode = __gen_unpack_uint(cl, 4, 4);
+ values->fetch_sample_mode = __gen_unpack_uint(cl, 3, 3);
+ values->lookup_type = __gen_unpack_uint(cl, 0, 2);
+}
+#endif
+
+
+#define V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_header\
+
+
+struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 {
+ __gen_address_type texture_state_record_base_address;
+ bool return_word_3_of_texture_data;
+ bool return_word_2_of_texture_data;
+ bool return_word_1_of_texture_data;
+ bool return_word_0_of_texture_data;
+};
+
+static inline void
+V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 * restrict values)
+{
+ __gen_emit_reloc(data, &values->texture_state_record_base_address);
+ cl[ 0] = __gen_address_offset(&values->texture_state_record_base_address) |
+ __gen_uint(values->return_word_3_of_texture_data, 3, 3) |
+ __gen_uint(values->return_word_2_of_texture_data, 2, 2) |
+ __gen_uint(values->return_word_1_of_texture_data, 1, 1) |
+ __gen_uint(values->return_word_0_of_texture_data, 0, 0);
+
+ cl[ 1] = __gen_address_offset(&values->texture_state_record_base_address) >> 8;
+
+ cl[ 2] = __gen_address_offset(&values->texture_state_record_base_address) >> 16;
+
+ cl[ 3] = __gen_address_offset(&values->texture_state_record_base_address) >> 24;
+
+}
+
+#define V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_length 4
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_unpack(const uint8_t * restrict cl,
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 * restrict values)
+{
+ values->texture_state_record_base_address = __gen_unpack_address(cl, 0, 31);
+ values->return_word_3_of_texture_data = __gen_unpack_uint(cl, 3, 3);
+ values->return_word_2_of_texture_data = __gen_unpack_uint(cl, 2, 2);
+ values->return_word_1_of_texture_data = __gen_unpack_uint(cl, 1, 1);
+ values->return_word_0_of_texture_data = __gen_unpack_uint(cl, 0, 0);
+}
+#endif
+
+
+#define V3D33_TEXTURE_SHADER_STATE_header \
+ .flip_etc_y = 1
+
+struct V3D33_TEXTURE_SHADER_STATE {
+ bool level_0_is_strictly_uif;
+ bool level_0_xor_enable;
+ uint32_t level_0_ub_pad;
+ uint32_t base_level;
+ float fixed_bias;
+ float max_level_of_detail;
+ float min_level_of_detail;
+ uint32_t border_color_alpha;
+ uint32_t border_color_blue;
+ uint32_t border_color_green;
+ uint32_t border_color_red;
+ bool flip_s_and_t_on_incoming_request;
+ bool flip_etc_y;
+ bool flip_texture_y_axis;
+ bool flip_texture_x_axis;
+ uint32_t swizzle_a;
+#define SWIZZLE_ZERO 0
+#define SWIZZLE_ONE 1
+#define SWIZZLE_RED 2
+#define SWIZZLE_GREEN 3
+#define SWIZZLE_BLUE 4
+#define SWIZZLE_ALPHA 5
+ uint32_t swizzle_b;
+ uint32_t swizzle_g;
+ uint32_t swizzle_r;
+ enum V3D33_Compare_Function depth_compare_function;
+ bool srgb;
+ uint32_t texture_type;
+ uint32_t image_depth;
+ uint32_t image_height;
+ uint32_t image_width;
+ uint32_t array_stride_64_byte_aligned;
+ __gen_address_type texture_base_pointer;
+ uint32_t minification_filter;
+ uint32_t magnification_filter;
+};
+
+static inline void
+V3D33_TEXTURE_SHADER_STATE_pack(__gen_user_data *data, uint8_t * restrict cl,
+ const struct V3D33_TEXTURE_SHADER_STATE * restrict values)
+{
+ __gen_emit_reloc(data, &values->texture_base_pointer);
+ cl[ 0] = __gen_address_offset(&values->texture_base_pointer) |
+ __gen_uint(values->minification_filter, 1, 3) |
+ __gen_uint(values->magnification_filter, 0, 0);
+
+ cl[ 1] = __gen_address_offset(&values->texture_base_pointer) >> 8;
+
+ cl[ 2] = __gen_address_offset(&values->texture_base_pointer) >> 16;
+
+ cl[ 3] = __gen_address_offset(&values->texture_base_pointer) >> 24;
+
+ cl[ 4] = __gen_uint(values->array_stride_64_byte_aligned, 0, 25);
+
+ cl[ 5] = __gen_uint(values->array_stride_64_byte_aligned, 0, 25) >> 8;
+
+ cl[ 6] = __gen_uint(values->array_stride_64_byte_aligned, 0, 25) >> 16;
+
+ cl[ 7] = __gen_uint(values->image_width, 2, 15) |
+ __gen_uint(values->array_stride_64_byte_aligned, 0, 25) >> 24;
+
+ cl[ 8] = __gen_uint(values->image_width, 2, 15) >> 8;
+
+ cl[ 9] = __gen_uint(values->image_height, 0, 13);
+
+ cl[10] = __gen_uint(values->image_depth, 6, 19) |
+ __gen_uint(values->image_height, 0, 13) >> 8;
+
+ cl[11] = __gen_uint(values->image_depth, 6, 19) >> 8;
+
+ cl[12] = __gen_uint(values->texture_type, 4, 10) |
+ __gen_uint(values->image_depth, 6, 19) >> 16;
+
+ cl[13] = __gen_uint(values->depth_compare_function, 5, 7) |
+ __gen_uint(values->srgb, 3, 3) |
+ __gen_uint(values->texture_type, 4, 10) >> 8;
+
+ cl[14] = __gen_uint(values->swizzle_b, 6, 8) |
+ __gen_uint(values->swizzle_g, 3, 5) |
+ __gen_uint(values->swizzle_r, 0, 2);
+
+ cl[15] = __gen_uint(values->flip_s_and_t_on_incoming_request, 7, 7) |
+ __gen_uint(values->flip_etc_y, 6, 6) |
+ __gen_uint(values->flip_texture_y_axis, 5, 5) |
+ __gen_uint(values->flip_texture_x_axis, 4, 4) |
+ __gen_uint(values->swizzle_a, 1, 3) |
+ __gen_uint(values->swizzle_b, 6, 8) >> 8;
+
+ cl[16] = __gen_uint(values->border_color_red, 0, 15);
+
+ cl[17] = __gen_uint(values->border_color_red, 0, 15) >> 8;
+
+ cl[18] = __gen_uint(values->border_color_green, 0, 15);
+
+ cl[19] = __gen_uint(values->border_color_green, 0, 15) >> 8;
+
+ cl[20] = __gen_uint(values->border_color_blue, 0, 15);
+
+ cl[21] = __gen_uint(values->border_color_blue, 0, 15) >> 8;
+
+ cl[22] = __gen_uint(values->border_color_alpha, 0, 15);
+
+ cl[23] = __gen_uint(values->border_color_alpha, 0, 15) >> 8;
+
+ cl[24] = __gen_sfixed(values->min_level_of_detail, 0, 15, 8);
+
+ cl[25] = __gen_sfixed(values->min_level_of_detail, 0, 15, 8) >> 8;
+
+ cl[26] = __gen_sfixed(values->max_level_of_detail, 0, 15, 8);
+
+ cl[27] = __gen_sfixed(values->max_level_of_detail, 0, 15, 8) >> 8;
+
+ cl[28] = __gen_sfixed(values->fixed_bias, 0, 15, 8);
+
+ cl[29] = __gen_sfixed(values->fixed_bias, 0, 15, 8) >> 8;
+
+ cl[30] = __gen_uint(values->base_level, 0, 3);
+
+ cl[31] = __gen_uint(values->level_0_is_strictly_uif, 6, 6) |
+ __gen_uint(values->level_0_xor_enable, 4, 4) |
+ __gen_uint(values->level_0_ub_pad, 0, 3);
+
+}
+
+#define V3D33_TEXTURE_SHADER_STATE_length 32
+#ifdef __gen_unpack_address
+static inline void
+V3D33_TEXTURE_SHADER_STATE_unpack(const uint8_t * restrict cl,
+ struct V3D33_TEXTURE_SHADER_STATE * restrict values)
+{
+ values->level_0_is_strictly_uif = __gen_unpack_uint(cl, 254, 254);
+ values->level_0_xor_enable = __gen_unpack_uint(cl, 252, 252);
+ values->level_0_ub_pad = __gen_unpack_uint(cl, 248, 251);
+ values->base_level = __gen_unpack_uint(cl, 240, 243);
+ values->fixed_bias = __gen_unpack_sfixed(cl, 224, 239, 8);
+ values->max_level_of_detail = __gen_unpack_sfixed(cl, 208, 223, 8);
+ values->min_level_of_detail = __gen_unpack_sfixed(cl, 192, 207, 8);
+ values->border_color_alpha = __gen_unpack_uint(cl, 176, 191);
+ values->border_color_blue = __gen_unpack_uint(cl, 160, 175);
+ values->border_color_green = __gen_unpack_uint(cl, 144, 159);
+ values->border_color_red = __gen_unpack_uint(cl, 128, 143);
+ values->flip_s_and_t_on_incoming_request = __gen_unpack_uint(cl, 127, 127);
+ values->flip_etc_y = __gen_unpack_uint(cl, 126, 126);
+ values->flip_texture_y_axis = __gen_unpack_uint(cl, 125, 125);
+ values->flip_texture_x_axis = __gen_unpack_uint(cl, 124, 124);
+ values->swizzle_a = __gen_unpack_uint(cl, 121, 123);
+ values->swizzle_b = __gen_unpack_uint(cl, 118, 120);
+ values->swizzle_g = __gen_unpack_uint(cl, 115, 117);
+ values->swizzle_r = __gen_unpack_uint(cl, 112, 114);
+ values->depth_compare_function = __gen_unpack_uint(cl, 109, 111);
+ values->srgb = __gen_unpack_uint(cl, 107, 107);
+ values->texture_type = __gen_unpack_uint(cl, 100, 106);
+ values->image_depth = __gen_unpack_uint(cl, 86, 99);
+ values->image_height = __gen_unpack_uint(cl, 72, 85);
+ values->image_width = __gen_unpack_uint(cl, 58, 71);
+ values->array_stride_64_byte_aligned = __gen_unpack_uint(cl, 32, 57);
+ values->texture_base_pointer = __gen_unpack_address(cl, 0, 31);
+ values->minification_filter = __gen_unpack_uint(cl, 1, 3);
+ values->magnification_filter = __gen_unpack_uint(cl, 0, 0);
+}
+#endif
+
+
+enum V3D33_Texture_Data_Formats {
+ TEXTURE_DATA_FORMAT_R8 = 0,
+ TEXTURE_DATA_FORMAT_R8_SNORM = 1,
+ TEXTURE_DATA_FORMAT_RG8 = 2,
+ TEXTURE_DATA_FORMAT_RG8_SNORM = 3,
+ TEXTURE_DATA_FORMAT_RGBA8 = 4,
+ TEXTURE_DATA_FORMAT_RGBA8_SNORM = 5,
+ TEXTURE_DATA_FORMAT_RGB565 = 6,
+ TEXTURE_DATA_FORMAT_RGBA4 = 7,
+ TEXTURE_DATA_FORMAT_RGB5_A1 = 8,
+ TEXTURE_DATA_FORMAT_RGB10_A2 = 9,
+ TEXTURE_DATA_FORMAT_R16 = 10,
+ TEXTURE_DATA_FORMAT_R16_SNORM = 11,
+ TEXTURE_DATA_FORMAT_RG16 = 12,
+ TEXTURE_DATA_FORMAT_RG16_SNORM = 13,
+ TEXTURE_DATA_FORMAT_RGBA16 = 14,
+ TEXTURE_DATA_FORMAT_RGBA16_SNORM = 15,
+ TEXTURE_DATA_FORMAT_R16F = 16,
+ TEXTURE_DATA_FORMAT_RG16F = 17,
+ TEXTURE_DATA_FORMAT_RGBA16F = 18,
+ TEXTURE_DATA_FORMAT_R11F_G11F_B10F = 19,
+ TEXTURE_DATA_FORMAT_RGB9_E5 = 20,
+ TEXTURE_DATA_FORMAT_DEPTH_COMP16 = 21,
+ TEXTURE_DATA_FORMAT_DEPTH_COMP24 = 22,
+ TEXTURE_DATA_FORMAT_DEPTH_COMP32F = 23,
+ TEXTURE_DATA_FORMAT_DEPTH24_X8 = 24,
+ TEXTURE_DATA_FORMAT_R4 = 25,
+ TEXTURE_DATA_FORMAT_R1 = 26,
+ TEXTURE_DATA_FORMAT_S8 = 27,
+ TEXTURE_DATA_FORMAT_S16 = 28,
+ TEXTURE_DATA_FORMAT_R32F = 29,
+ TEXTURE_DATA_FORMAT_RG32F = 30,
+ TEXTURE_DATA_FORMAT_RGBA32F = 31,
+ TEXTURE_DATA_FORMAT_RGB8_ETC2 = 32,
+ TEXTURE_DATA_FORMAT_RGB8_PUNCHTHROUGH_ALPHA1 = 33,
+ TEXTURE_DATA_FORMAT_R11_EAC = 34,
+ TEXTURE_DATA_FORMAT_SIGNED_R11_EAC = 35,
+ TEXTURE_DATA_FORMAT_RG11_EAC = 36,
+ TEXTURE_DATA_FORMAT_SIGNED_RG11_EAC = 37,
+ TEXTURE_DATA_FORMAT_RGBA8_ETC2_EAC = 38,
+ TEXTURE_DATA_FORMAT_YCBCR_LUMA = 39,
+ TEXTURE_DATA_FORMAT_YCBCR_420_CHROMA = 40,
+ TEXTURE_DATA_FORMAT_BC1 = 48,
+ TEXTURE_DATA_FORMAT_BC2 = 49,
+ TEXTURE_DATA_FORMAT_BC3 = 50,
+ TEXTURE_DATA_FORMAT_ASTC_4X4 = 64,
+ TEXTURE_DATA_FORMAT_ASTC_5X4 = 65,
+ TEXTURE_DATA_FORMAT_ASTC_5X5 = 66,
+ TEXTURE_DATA_FORMAT_ASTC_6X5 = 67,
+ TEXTURE_DATA_FORMAT_ASTC_6X6 = 68,
+ TEXTURE_DATA_FORMAT_ASTC_8X5 = 69,
+ TEXTURE_DATA_FORMAT_ASTC_8X6 = 70,
+ TEXTURE_DATA_FORMAT_ASTC_8X8 = 71,
+ TEXTURE_DATA_FORMAT_ASTC_10X5 = 72,
+ TEXTURE_DATA_FORMAT_ASTC_10X6 = 73,
+ TEXTURE_DATA_FORMAT_ASTC_10X8 = 74,
+ TEXTURE_DATA_FORMAT_ASTC_10X10 = 75,
+ TEXTURE_DATA_FORMAT_ASTC_12X10 = 76,
+ TEXTURE_DATA_FORMAT_ASTC_12X12 = 77,
+};
+
+#endif /* V3D33_PACK_H */
diff -Nru mesa-17.2.4/src/broadcom/cle/v3d_packet_v33.xml mesa-17.3.3/src/broadcom/cle/v3d_packet_v33.xml
--- mesa-17.2.4/src/broadcom/cle/v3d_packet_v33.xml 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/v3d_packet_v33.xml 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,910 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -Nru mesa-17.2.4/src/broadcom/cle/v3d_xml.h mesa-17.3.3/src/broadcom/cle/v3d_xml.h
--- mesa-17.2.4/src/broadcom/cle/v3d_xml.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/cle/v3d_xml.h 2018-01-18 21:31:08.000000000 +0000
@@ -0,0 +1,719 @@
+static const struct {
+ uint32_t gen_10;
+ uint32_t offset;
+ uint32_t length;
+} genxml_files_table[] = {
+ { 21, 0, 15431 },
+ { 33, 15431, 42910 },
+};
+
+static const uint8_t compress_genxmls[] = {
+ 0x78, 0x9c, 0xed, 0x5d, 0x5b, 0x73, 0xe3, 0xb6, 0x92, 0x7e, 0xcf, 0xaf,
+ 0xc0, 0xce, 0xcb, 0xf1, 0xec, 0xae, 0x62, 0x91, 0xba, 0x58, 0xae, 0x4a,
+ 0xb2, 0xa5, 0x9b, 0x3d, 0xae, 0x23, 0x5b, 0x5a, 0x49, 0xf6, 0xcc, 0xf8,
+ 0x45, 0x45, 0x49, 0x94, 0xcd, 0x8a, 0x44, 0xea, 0x90, 0x94, 0x2f, 0xf9,
+ 0xf5, 0x8b, 0x0b, 0x49, 0x00, 0x64, 0x83, 0x00, 0x35, 0x9e, 0x49, 0xb2,
+ 0xc7, 0xa7, 0x4e, 0x25, 0x8e, 0xdd, 0x68, 0x00, 0xdd, 0x8d, 0x46, 0xa3,
+ 0xd1, 0xf8, 0xf8, 0xcb, 0xd3, 0xea, 0x65, 0xb7, 0x45, 0x0f, 0xae, 0xff,
+ 0xeb, 0x07, 0xfb, 0x67, 0xeb, 0xc3, 0x6f, 0x3f, 0x21, 0xf4, 0xcb, 0xde,
+ 0x59, 0xfd, 0xee, 0xc6, 0xc8, 0x77, 0x76, 0xee, 0xaf, 0x1f, 0x3e, 0x39,
+ 0xdb, 0xf8, 0x03, 0x5a, 0x05, 0x6b, 0xfc, 0x73, 0xfd, 0xc3, 0x69, 0x91,
+ 0xe0, 0x66, 0x3c, 0x49, 0xff, 0x6e, 0x41, 0x7f, 0xbf, 0xd8, 0x1e, 0xa2,
+ 0xc7, 0x94, 0xa2, 0x89, 0x7f, 0xd8, 0xfe, 0xfa, 0xa1, 0xa7, 0x24, 0x44,
+ 0xdd, 0xed, 0x16, 0xcd, 0x62, 0x27, 0x76, 0xd3, 0x26, 0xad, 0x92, 0x26,
+ 0x98, 0x30, 0x8c, 0xd1, 0xdc, 0xdb, 0xba, 0xa8, 0xe7, 0xf9, 0xbe, 0xe7,
+ 0x3f, 0xa4, 0xad, 0xda, 0x25, 0xad, 0xae, 0xfc, 0x55, 0xe8, 0xee, 0x5c,
+ 0x3f, 0x46, 0x33, 0x77, 0xe7, 0xec, 0x1f, 0x83, 0x30, 0xeb, 0xec, 0x0c,
+ 0xa2, 0xff, 0xec, 0x78, 0x31, 0x0a, 0xfc, 0x22, 0x75, 0x07, 0xa2, 0xee,
+ 0x85, 0x8e, 0xbf, 0xca, 0x26, 0x6c, 0xb5, 0xa9, 0x4c, 0x31, 0xd1, 0xc6,
+ 0x73, 0xb7, 0xeb, 0x84, 0xa6, 0xbb, 0x5e, 0x87, 0x6e, 0x14, 0x7d, 0x40,
+ 0x91, 0xf7, 0x07, 0xfe, 0xcf, 0x86, 0x8d, 0x7f, 0x22, 0x53, 0x21, 0x22,
+ 0x46, 0xf1, 0xeb, 0x1e, 0xff, 0xce, 0x49, 0x48, 0x58, 0x0f, 0xa7, 0xac,
+ 0x0b, 0x55, 0x6f, 0x28, 0x0e, 0x50, 0x74, 0x58, 0xd6, 0xb6, 0x5e, 0x94,
+ 0x29, 0xcb, 0x3a, 0xfb, 0xbe, 0x3d, 0x4f, 0xdd, 0xf8, 0x10, 0xfa, 0x68,
+ 0x13, 0x06, 0xbb, 0x62, 0xdf, 0x54, 0x32, 0x80, 0xba, 0xb0, 0xf0, 0xd0,
+ 0xf5, 0x61, 0x1b, 0x7b, 0xb5, 0xc8, 0xd9, 0xed, 0xb1, 0xda, 0xa6, 0x6e,
+ 0x14, 0x6c, 0x9f, 0xdc, 0x35, 0x53, 0x62, 0x3f, 0xd8, 0x06, 0x21, 0xea,
+ 0x1d, 0x36, 0x1b, 0x37, 0x4c, 0x79, 0xd9, 0x89, 0xcd, 0x4c, 0x61, 0x03,
+ 0xa8, 0xc0, 0x11, 0x39, 0xfe, 0x1a, 0x0d, 0xc7, 0x17, 0x19, 0xe7, 0x96,
+ 0xc0, 0x59, 0xc1, 0xfa, 0xe2, 0x80, 0xed, 0x91, 0xb2, 0x3c, 0xc4, 0x1e,
+ 0xb6, 0x01, 0x66, 0x6b, 0xe9, 0x00, 0x69, 0xe3, 0x94, 0x9b, 0x89, 0xa6,
+ 0xed, 0x4e, 0x26, 0xef, 0x26, 0x28, 0xef, 0x5c, 0xf3, 0x91, 0x13, 0x31,
+ 0xfb, 0x4e, 0x19, 0x58, 0x59, 0xfb, 0x46, 0xda, 0x7e, 0x19, 0x04, 0x5b,
+ 0xb0, 0xf1, 0xc0, 0x8b, 0x9c, 0x25, 0x91, 0xc1, 0xd6, 0x75, 0x42, 0x62,
+ 0xc0, 0x9f, 0x43, 0x2f, 0x06, 0x38, 0xd9, 0xc6, 0x9c, 0xee, 0x4f, 0x67,
+ 0xb1, 0xeb, 0xaf, 0xbc, 0x6d, 0x2a, 0xd1, 0x67, 0x98, 0xa3, 0x65, 0x3e,
+ 0x36, 0x51, 0x3f, 0x0a, 0x6e, 0xf5, 0x22, 0x37, 0x6e, 0x96, 0x80, 0x5d,
+ 0xd6, 0xb6, 0x81, 0xb3, 0xae, 0xa2, 0x39, 0x93, 0x95, 0x52, 0x55, 0x73,
+ 0x4a, 0x91, 0x85, 0xae, 0xb3, 0x7e, 0x33, 0x89, 0xc1, 0xcc, 0xaa, 0x09,
+ 0x8c, 0x19, 0xba, 0x20, 0x1e, 0x74, 0xe9, 0xfa, 0x6e, 0xe8, 0x6c, 0x33,
+ 0xf9, 0x74, 0x52, 0x81, 0x01, 0xc3, 0xba, 0x76, 0x77, 0x41, 0xf8, 0x8a,
+ 0x96, 0x4e, 0xe4, 0xa2, 0x44, 0x1e, 0x28, 0xd8, 0x60, 0xb7, 0x80, 0xff,
+ 0x7a, 0x1a, 0x13, 0xa6, 0xeb, 0xc3, 0x6e, 0x8f, 0x96, 0x89, 0xe0, 0x0b,
+ 0xc2, 0xb4, 0x61, 0xbf, 0xa3, 0x5a, 0x07, 0x84, 0xf7, 0x05, 0xe1, 0x0d,
+ 0x88, 0xf0, 0xdc, 0x58, 0x86, 0x77, 0x97, 0xb5, 0x6b, 0x27, 0xfa, 0x3d,
+ 0x19, 0x15, 0x1d, 0x21, 0xc0, 0xaf, 0x73, 0xc4, 0xba, 0x28, 0xe7, 0x78,
+ 0x56, 0x51, 0xcb, 0xe5, 0xdc, 0xda, 0xc7, 0xce, 0x77, 0x95, 0x3a, 0x84,
+ 0x88, 0xa8, 0xfe, 0x54, 0xc1, 0xbe, 0x75, 0xfc, 0xf4, 0xcd, 0x3a, 0x68,
+ 0x1e, 0x27, 0x0d, 0x33, 0xe6, 0x05, 0xf7, 0x58, 0xe4, 0x3e, 0xf1, 0x5e,
+ 0xdc, 0x6d, 0xc2, 0xfb, 0x22, 0x08, 0x77, 0x4e, 0x9c, 0xd9, 0x67, 0xc6,
+ 0x27, 0xb3, 0x81, 0x83, 0xe7, 0xc7, 0x89, 0xfd, 0x63, 0x36, 0x4f, 0xce,
+ 0xf6, 0xe0, 0x26, 0x6c, 0xc2, 0x87, 0xa5, 0xd3, 0xc1, 0xff, 0xfb, 0x80,
+ 0xe8, 0x6f, 0xb3, 0x30, 0xa9, 0x40, 0xb8, 0x7c, 0x08, 0x5b, 0xed, 0x16,
+ 0x5a, 0x7b, 0xf1, 0xa3, 0x1b, 0xba, 0xeb, 0x8c, 0xde, 0x2a, 0xa7, 0xf7,
+ 0x83, 0xa4, 0x49, 0xd6, 0xc0, 0xce, 0x84, 0x75, 0x4a, 0xe7, 0x03, 0x4d,
+ 0xee, 0x1a, 0x2f, 0xdd, 0xe2, 0x74, 0xda, 0xfa, 0xe9, 0xcc, 0xd8, 0x36,
+ 0x5a, 0xd7, 0x4e, 0x67, 0xe0, 0xae, 0x3c, 0x2c, 0x33, 0x17, 0xbd, 0x34,
+ 0xb5, 0x53, 0xe1, 0xb4, 0xc4, 0x6a, 0x2b, 0x4c, 0x43, 0xa5, 0x97, 0xa6,
+ 0x7e, 0x22, 0x53, 0xec, 0x33, 0x04, 0xa1, 0xa9, 0xa6, 0x31, 0xd7, 0x0e,
+ 0x7e, 0x34, 0xaf, 0x34, 0xe4, 0xc4, 0x89, 0xe2, 0x90, 0x8c, 0x7a, 0xd6,
+ 0x2c, 0xd4, 0x2a, 0xba, 0x67, 0xf5, 0xd8, 0x6f, 0x02, 0xdf, 0xd5, 0x8e,
+ 0x9c, 0x5a, 0xae, 0x76, 0xf4, 0xf7, 0xa7, 0x11, 0x5b, 0x9c, 0xc5, 0x49,
+ 0xe4, 0x29, 0x33, 0x8a, 0x86, 0x82, 0x22, 0xf1, 0x23, 0x19, 0x5d, 0xb3,
+ 0x20, 0x8e, 0xd2, 0x9d, 0x66, 0x44, 0xf6, 0xe5, 0xb2, 0x8d, 0xe6, 0xfc,
+ 0x4f, 0xdf, 0x68, 0x14, 0x1e, 0x93, 0x44, 0x14, 0x6f, 0xbb, 0x43, 0x28,
+ 0x38, 0x1e, 0xb9, 0x43, 0x28, 0xb8, 0x15, 0x76, 0x88, 0x77, 0x27, 0xf8,
+ 0xee, 0x04, 0xdf, 0x9d, 0xe0, 0x9f, 0xea, 0x04, 0xaf, 0xfc, 0xb5, 0xfb,
+ 0x82, 0x8f, 0xa7, 0x93, 0xd0, 0xdb, 0x79, 0xb1, 0xf7, 0xe4, 0xa2, 0x91,
+ 0x70, 0x72, 0xc6, 0x07, 0x72, 0xc8, 0xf7, 0x39, 0x2f, 0xde, 0xee, 0xb0,
+ 0x43, 0xb4, 0x2d, 0x70, 0x76, 0x3f, 0xb3, 0x25, 0x69, 0x42, 0x6e, 0xa3,
+ 0xcb, 0x5d, 0x26, 0xe6, 0xe2, 0xad, 0xdc, 0x28, 0xe9, 0xb7, 0xc0, 0xac,
+ 0x59, 0xd7, 0x32, 0x1b, 0xb9, 0xfe, 0x43, 0xfc, 0x08, 0xb4, 0xed, 0x68,
+ 0x9b, 0xd2, 0x29, 0x50, 0xa2, 0xb4, 0x79, 0xb3, 0x8a, 0x3d, 0x77, 0x6a,
+ 0x4b, 0x2f, 0xd6, 0x1a, 0x85, 0xd5, 0x96, 0xc8, 0x2c, 0x40, 0x43, 0x79,
+ 0x37, 0x98, 0x69, 0x63, 0x27, 0xf8, 0x8c, 0x66, 0x15, 0x73, 0xdd, 0x07,
+ 0xf8, 0x4f, 0x91, 0x76, 0x6c, 0x5b, 0xcf, 0x77, 0x23, 0xad, 0xc1, 0x12,
+ 0x2a, 0xec, 0xd5, 0x83, 0xbd, 0xd6, 0x60, 0x29, 0x65, 0x14, 0x87, 0xde,
+ 0x5e, 0x6b, 0xb9, 0x98, 0xc8, 0xf1, 0x1f, 0xb6, 0x42, 0xf7, 0x4d, 0x1d,
+ 0x65, 0x8e, 0x73, 0x4b, 0x4b, 0xbf, 0x71, 0xfc, 0x8c, 0xba, 0x5d, 0x6d,
+ 0x65, 0xdc, 0xb9, 0x61, 0x8c, 0x6d, 0xa3, 0x1b, 0x86, 0xce, 0x2b, 0x5f,
+ 0x1e, 0x51, 0xb6, 0x34, 0x1a, 0xd0, 0xd2, 0x60, 0xf6, 0x44, 0x4e, 0x85,
+ 0x5e, 0x88, 0xcf, 0x88, 0x8c, 0xc7, 0x0f, 0xb7, 0xea, 0x77, 0xf3, 0xf9,
+ 0xf3, 0xcd, 0x47, 0x76, 0xa8, 0x59, 0x30, 0x23, 0xa5, 0x7a, 0x5a, 0x60,
+ 0x92, 0x6e, 0xe0, 0xc4, 0x0e, 0x9a, 0x1f, 0xeb, 0x92, 0x98, 0xb3, 0x41,
+ 0x1e, 0xf3, 0xcd, 0x1a, 0xc5, 0x34, 0x6c, 0x4a, 0xfc, 0x72, 0xfa, 0x5a,
+ 0x14, 0x77, 0xc9, 0x76, 0xca, 0xe7, 0x06, 0x0f, 0xd3, 0xc0, 0xbe, 0x26,
+ 0xd4, 0xbe, 0x12, 0xa7, 0xaf, 0x31, 0xb2, 0x11, 0x31, 0x32, 0x99, 0x54,
+ 0x35, 0xa1, 0x79, 0xa6, 0x3a, 0x89, 0x5c, 0x65, 0x6e, 0xd3, 0x4f, 0x5f,
+ 0x65, 0xc2, 0xe2, 0xec, 0x4b, 0x95, 0x7c, 0x39, 0x42, 0xb3, 0x47, 0x67,
+ 0x8d, 0x43, 0x0b, 0xe9, 0x92, 0xa0, 0xdd, 0x4c, 0xf5, 0xfa, 0x1f, 0xb5,
+ 0x1a, 0x9a, 0x3f, 0xf2, 0x23, 0x02, 0x93, 0xe2, 0xb3, 0xb7, 0xc5, 0x91,
+ 0xb7, 0x8b, 0xff, 0x6b, 0xbb, 0xc5, 0x7b, 0xaf, 0xe7, 0xa3, 0xe5, 0x2b,
+ 0xfa, 0xdd, 0x0d, 0x7d, 0x1c, 0xfb, 0xe2, 0x81, 0x78, 0x6b, 0x87, 0xe6,
+ 0x08, 0x09, 0xb3, 0x9f, 0x51, 0xad, 0x56, 0x29, 0x1d, 0xa8, 0x77, 0x2c,
+ 0xc3, 0x17, 0x1c, 0x7c, 0xac, 0x71, 0xbf, 0x11, 0x1b, 0x7a, 0xe8, 0xae,
+ 0x82, 0x10, 0x88, 0xd7, 0xf5, 0x29, 0xdd, 0x9b, 0xc3, 0x6e, 0x89, 0x19,
+ 0x60, 0x77, 0xe7, 0xc4, 0x78, 0xd1, 0x2c, 0x0f, 0x38, 0x9a, 0x74, 0x88,
+ 0xbf, 0x8c, 0x74, 0x11, 0x96, 0x2e, 0x0b, 0xc8, 0x92, 0xc4, 0x34, 0x9a,
+ 0x8a, 0x72, 0x8b, 0xc6, 0xb2, 0x9a, 0xd0, 0xaa, 0x61, 0x2d, 0x66, 0x69,
+ 0x5c, 0xc5, 0xba, 0xe7, 0x62, 0x39, 0x6f, 0x6b, 0xe5, 0xc2, 0x38, 0xdc,
+ 0x5d, 0x22, 0x16, 0x4f, 0xe5, 0x39, 0x74, 0xf4, 0x7e, 0x97, 0x71, 0xb8,
+ 0x9f, 0x65, 0x6a, 0xe1, 0x4b, 0xa2, 0xdd, 0x34, 0x6c, 0x9d, 0x44, 0x90,
+ 0x8c, 0x41, 0x5b, 0xb5, 0xa6, 0xb4, 0xf2, 0x0b, 0xfc, 0x8d, 0xf7, 0x70,
+ 0x08, 0x99, 0x21, 0xf5, 0xbc, 0x38, 0xdb, 0xb7, 0xce, 0x41, 0x9f, 0x33,
+ 0x74, 0xc2, 0xed, 0x2b, 0xba, 0x47, 0x87, 0x3d, 0xb6, 0x3d, 0xbc, 0x7a,
+ 0x5c, 0x9f, 0x9c, 0xea, 0x8e, 0x3a, 0x11, 0xa6, 0xac, 0x94, 0x2c, 0xf4,
+ 0x89, 0x42, 0x83, 0x71, 0x18, 0xa4, 0x03, 0xdd, 0x7d, 0xfc, 0x58, 0x9b,
+ 0xbb, 0xc4, 0xf9, 0x1e, 0xfc, 0x15, 0x11, 0x44, 0xd1, 0x2a, 0x2d, 0x38,
+ 0x54, 0x25, 0xcb, 0x16, 0x2f, 0x59, 0xe6, 0x15, 0x22, 0x78, 0x09, 0xf6,
+ 0x83, 0x27, 0x37, 0x74, 0x1e, 0xc8, 0x15, 0x8f, 0xb3, 0x46, 0xe2, 0xd9,
+ 0x4e, 0x18, 0xa6, 0xf5, 0xed, 0xec, 0x27, 0xde, 0xde, 0x45, 0x33, 0x77,
+ 0xeb, 0xae, 0xe2, 0x62, 0x07, 0xfa, 0x23, 0x3f, 0x3b, 0x72, 0x79, 0x11,
+ 0x5e, 0xa7, 0x63, 0xcc, 0x31, 0xb9, 0x96, 0xd2, 0x1c, 0x45, 0x25, 0x6e,
+ 0xd5, 0x85, 0x21, 0xee, 0x0a, 0x7c, 0xac, 0xad, 0xa3, 0x65, 0xd1, 0xf5,
+ 0x63, 0x0f, 0xbb, 0x45, 0x27, 0x22, 0x07, 0x15, 0xb6, 0x71, 0x90, 0xcb,
+ 0xb3, 0x11, 0x0b, 0x3e, 0xf2, 0xfd, 0xe8, 0x13, 0xb9, 0x43, 0x6a, 0x55,
+ 0x88, 0x5a, 0x08, 0x1a, 0x6f, 0x36, 0x91, 0x0b, 0x88, 0x56, 0xef, 0xfe,
+ 0xfa, 0xdb, 0x60, 0xf5, 0xfb, 0x33, 0x16, 0xad, 0x14, 0x1e, 0x56, 0xbf,
+ 0xcf, 0x4a, 0x86, 0x33, 0x75, 0x89, 0x7e, 0x5c, 0x74, 0xe1, 0xac, 0x3c,
+ 0xff, 0x81, 0xf3, 0x3c, 0xe6, 0x7a, 0x26, 0x61, 0x89, 0xa3, 0x8e, 0x67,
+ 0x27, 0x5c, 0x1b, 0xb0, 0xac, 0x76, 0x49, 0x73, 0xb1, 0x75, 0x62, 0xb6,
+ 0xf3, 0x21, 0xfc, 0xe3, 0x03, 0x77, 0x2e, 0xe0, 0xdd, 0x15, 0xa1, 0xae,
+ 0x91, 0xcd, 0x86, 0x8c, 0x21, 0xa1, 0x57, 0x5f, 0xf8, 0x1a, 0xf9, 0x37,
+ 0x6a, 0x05, 0x94, 0x49, 0xd6, 0x75, 0x07, 0xea, 0x9a, 0xd1, 0xcd, 0x28,
+ 0x9d, 0xba, 0xcb, 0xcd, 0x36, 0x70, 0xf4, 0x7d, 0x12, 0x73, 0xc3, 0x5b,
+ 0xf7, 0x3a, 0xce, 0xae, 0xd3, 0xcf, 0xcf, 0xa1, 0x3e, 0x45, 0xba, 0x6f,
+ 0xed, 0x73, 0xfa, 0x69, 0x8e, 0xbe, 0xa0, 0x65, 0x70, 0xf0, 0xd7, 0x4e,
+ 0xf8, 0x9a, 0x6d, 0x81, 0xf5, 0x3a, 0xd4, 0x31, 0x21, 0xde, 0x67, 0xa1,
+ 0x99, 0xd8, 0x2c, 0xd1, 0x76, 0xbb, 0x38, 0x0c, 0x13, 0x61, 0xcb, 0xeb,
+ 0x24, 0x1d, 0x82, 0x25, 0xc6, 0x38, 0xf1, 0xa3, 0x1b, 0xb9, 0x2c, 0xba,
+ 0xc1, 0x6b, 0x33, 0x74, 0x71, 0xdf, 0xb8, 0x09, 0x9d, 0x63, 0xcd, 0xaa,
+ 0x75, 0x6a, 0x67, 0xd8, 0x93, 0x93, 0x86, 0x6b, 0x74, 0x12, 0x07, 0x7b,
+ 0x64, 0xb5, 0x11, 0x8e, 0x3b, 0xe9, 0xf1, 0xdf, 0x61, 0x54, 0x0d, 0xfb,
+ 0x23, 0xbc, 0xf6, 0xc5, 0xce, 0xd1, 0xad, 0x4f, 0x77, 0xb2, 0xc2, 0x74,
+ 0x2c, 0xfd, 0xee, 0x2e, 0xf1, 0xc1, 0xeb, 0x21, 0xe6, 0xbb, 0x2c, 0x24,
+ 0x17, 0xc3, 0x28, 0xc5, 0xdb, 0xa3, 0xcf, 0x38, 0xd4, 0x0e, 0x9e, 0xb9,
+ 0x5c, 0xc0, 0x94, 0x89, 0x40, 0x89, 0x3e, 0xb9, 0xde, 0xc3, 0x23, 0x89,
+ 0xd0, 0xd1, 0x9e, 0x64, 0x3b, 0xa1, 0xf9, 0x34, 0x4d, 0x62, 0x0d, 0xce,
+ 0xf1, 0x33, 0x31, 0xb6, 0x52, 0x86, 0x0d, 0x7d, 0x4a, 0x46, 0x64, 0xd8,
+ 0x0b, 0xe2, 0x38, 0xd8, 0xa1, 0x34, 0x1b, 0x8b, 0xc3, 0x43, 0xcf, 0x77,
+ 0x84, 0xcb, 0xf0, 0x4a, 0x92, 0x17, 0x19, 0x8f, 0xdc, 0x4d, 0x6c, 0xc2,
+ 0xb6, 0x9a, 0x1e, 0xee, 0x3c, 0xf7, 0x79, 0x1f, 0x84, 0x71, 0xc1, 0x46,
+ 0xc1, 0x33, 0x7a, 0x46, 0xdd, 0x77, 0xfd, 0x18, 0x9b, 0xea, 0xd7, 0xda,
+ 0xca, 0x6c, 0x86, 0x91, 0x65, 0xff, 0xdc, 0x04, 0xa7, 0x98, 0x67, 0xf9,
+ 0xa5, 0x9c, 0x65, 0x1d, 0xe0, 0x58, 0x32, 0xbd, 0x7b, 0xb4, 0xc3, 0xba,
+ 0x25, 0x3b, 0xde, 0xce, 0x79, 0xc1, 0xc1, 0xb0, 0xb7, 0xdf, 0x13, 0x4f,
+ 0xba, 0xdf, 0x3a, 0x3e, 0xcf, 0x47, 0x58, 0x75, 0x30, 0x2c, 0x4e, 0x73,
+ 0x75, 0xf7, 0xcf, 0x80, 0x33, 0xe2, 0x56, 0x21, 0x78, 0xa3, 0x3c, 0x03,
+ 0xcf, 0x57, 0x33, 0xa8, 0xe8, 0xcd, 0x88, 0x25, 0xec, 0x71, 0x20, 0xf2,
+ 0xe5, 0x2b, 0x9a, 0xad, 0xf0, 0x86, 0xce, 0x0b, 0xa4, 0xac, 0x7a, 0x56,
+ 0x58, 0x55, 0x26, 0xde, 0x4f, 0xce, 0x76, 0x53, 0xe3, 0xeb, 0xc7, 0x3a,
+ 0xb5, 0xda, 0xd8, 0xf0, 0xb1, 0x17, 0xa1, 0x86, 0x7f, 0xe4, 0x0c, 0x65,
+ 0xee, 0xd9, 0x5a, 0xd2, 0x33, 0x3f, 0x72, 0xf6, 0xf7, 0x74, 0xf2, 0x2e,
+ 0x55, 0x68, 0xde, 0x60, 0xdb, 0x26, 0x52, 0xb8, 0x4f, 0xdd, 0xd8, 0xc9,
+ 0xfd, 0x8a, 0xe4, 0xb4, 0xef, 0xa3, 0x8f, 0xdf, 0x3a, 0xf5, 0x74, 0x4c,
+ 0xa5, 0x1c, 0x2b, 0xce, 0x57, 0x2c, 0x84, 0xa3, 0x01, 0x27, 0x92, 0x4e,
+ 0x25, 0xfc, 0x3c, 0x67, 0x97, 0x4c, 0x7a, 0x10, 0x1c, 0x70, 0x20, 0x53,
+ 0x4b, 0x2e, 0x96, 0xb0, 0x5a, 0xfc, 0xc0, 0xaf, 0xed, 0x22, 0x29, 0xb1,
+ 0x25, 0xc6, 0xdb, 0x85, 0xaa, 0x8b, 0x22, 0x4b, 0x3a, 0xae, 0xee, 0x16,
+ 0x07, 0x6f, 0xc9, 0xe9, 0x88, 0x84, 0x71, 0x52, 0x8c, 0x60, 0x0b, 0x0c,
+ 0xcf, 0x24, 0x4f, 0x04, 0x5f, 0x0a, 0x51, 0x06, 0xa4, 0x29, 0x22, 0xd2,
+ 0xd2, 0x5d, 0x39, 0x71, 0xea, 0xb6, 0xfe, 0x96, 0x46, 0xa0, 0xb6, 0xec,
+ 0x8e, 0x36, 0xa1, 0x21, 0x90, 0xdb, 0xad, 0x76, 0xa5, 0xa4, 0x4e, 0x5e,
+ 0x2c, 0x57, 0x78, 0xaf, 0xc5, 0x51, 0xb7, 0x46, 0x3c, 0xad, 0x7f, 0x17,
+ 0xf1, 0x74, 0x0f, 0x71, 0x50, 0xf3, 0x98, 0x4c, 0x48, 0xc8, 0x4f, 0xc5,
+ 0x45, 0xb3, 0x3e, 0x88, 0x26, 0xec, 0x68, 0x9e, 0x18, 0xb2, 0x48, 0xfd,
+ 0x71, 0x44, 0xbc, 0x83, 0x6e, 0x37, 0x69, 0x46, 0x8e, 0xdd, 0x80, 0xd2,
+ 0x98, 0x05, 0xe2, 0xa9, 0x3f, 0x9b, 0xd0, 0x22, 0x44, 0xe1, 0xb0, 0x87,
+ 0x4e, 0x9a, 0x2f, 0x1f, 0x21, 0x56, 0x85, 0xe3, 0x49, 0x91, 0x57, 0xe2,
+ 0x70, 0x4f, 0xf0, 0xea, 0x23, 0x37, 0xdb, 0xdc, 0x37, 0xf0, 0xe4, 0x08,
+ 0xd9, 0x7d, 0x74, 0x71, 0x00, 0xf3, 0xac, 0x65, 0x5c, 0x8a, 0x49, 0x1a,
+ 0x85, 0xb0, 0xf2, 0x72, 0x47, 0xea, 0x42, 0x52, 0x83, 0xc4, 0x4b, 0xde,
+ 0xf2, 0x77, 0xec, 0x6e, 0x3f, 0x82, 0x4f, 0x0d, 0x06, 0xa1, 0x94, 0x82,
+ 0xa1, 0xa3, 0xaf, 0x76, 0x15, 0xe6, 0xad, 0xf3, 0xac, 0x53, 0x92, 0xc4,
+ 0x0b, 0x75, 0xbe, 0xb5, 0x51, 0x52, 0xc3, 0x50, 0xcd, 0xb7, 0x9e, 0xe9,
+ 0xf3, 0x36, 0x34, 0xf5, 0x53, 0xbb, 0x3f, 0x65, 0xff, 0xee, 0x07, 0x4f,
+ 0x68, 0xcd, 0xca, 0x03, 0x00, 0x6e, 0xfa, 0x04, 0x4e, 0xc2, 0x0d, 0xdd,
+ 0xd2, 0x5c, 0x10, 0x1a, 0x78, 0xa1, 0x4b, 0xb3, 0x38, 0xe8, 0x72, 0x7e,
+ 0x7a, 0x39, 0x04, 0x58, 0xea, 0x57, 0x1a, 0x4b, 0xa3, 0xa0, 0x2c, 0x5b,
+ 0x01, 0x67, 0x6d, 0xce, 0xf4, 0xcb, 0x2b, 0x39, 0x5f, 0x27, 0xc9, 0xc2,
+ 0xac, 0x90, 0xb3, 0xc0, 0x49, 0x7f, 0xf8, 0x4f, 0x0a, 0x49, 0x54, 0x97,
+ 0xea, 0x67, 0x06, 0xb9, 0xf4, 0x1f, 0x73, 0xab, 0x0e, 0x9c, 0xa8, 0x92,
+ 0x02, 0x02, 0x45, 0x32, 0xa9, 0x70, 0x86, 0x01, 0x12, 0xc8, 0xd8, 0xe0,
+ 0x3e, 0x0d, 0xa6, 0xac, 0x86, 0x32, 0x75, 0x81, 0xe5, 0xd5, 0x1f, 0x6d,
+ 0x83, 0x7a, 0x89, 0x30, 0x5b, 0x1a, 0x2b, 0xba, 0x2a, 0x90, 0xaa, 0xcc,
+ 0x43, 0x25, 0xaa, 0x02, 0x83, 0x42, 0x41, 0x89, 0x4a, 0x84, 0xaa, 0xae,
+ 0x8f, 0xab, 0x18, 0xa9, 0xba, 0x33, 0xb4, 0xf5, 0xab, 0xca, 0x70, 0x63,
+ 0x68, 0xeb, 0x17, 0x53, 0xba, 0x2d, 0xb0, 0x43, 0x27, 0xe7, 0x51, 0xe9,
+ 0x18, 0x9b, 0x6c, 0x0a, 0x6a, 0x1e, 0x06, 0xee, 0x36, 0x59, 0x42, 0xc7,
+ 0x3f, 0x26, 0x80, 0x1d, 0x2c, 0x3f, 0xa0, 0xf2, 0x43, 0x96, 0xd5, 0x2a,
+ 0xf1, 0xa7, 0xcc, 0x2d, 0xe3, 0x53, 0x2e, 0xbb, 0x17, 0x01, 0xae, 0x10,
+ 0xcc, 0x76, 0x0e, 0xac, 0xe2, 0xc3, 0xce, 0x57, 0x72, 0xa9, 0x76, 0x36,
+ 0xbe, 0x74, 0x77, 0x78, 0xab, 0x48, 0xb7, 0xa2, 0x6c, 0x26, 0x76, 0xab,
+ 0x59, 0x12, 0x75, 0x27, 0x5b, 0x82, 0x75, 0xdc, 0xf6, 0x97, 0xb4, 0xae,
+ 0x1f, 0x93, 0xe5, 0x8b, 0xe2, 0xf0, 0xb0, 0xca, 0x6a, 0xc1, 0xd9, 0x05,
+ 0xd5, 0x94, 0x5d, 0x50, 0x41, 0x99, 0x45, 0xec, 0xc5, 0xd9, 0xd3, 0x19,
+ 0x46, 0xe9, 0x45, 0xb8, 0x4b, 0x72, 0xff, 0x87, 0xe2, 0x47, 0x52, 0x8c,
+ 0xee, 0x9a, 0x95, 0xa3, 0x2b, 0x93, 0x86, 0x78, 0x53, 0x5c, 0x6d, 0x0f,
+ 0x6b, 0x76, 0x47, 0x47, 0xaf, 0xcb, 0xd6, 0xe8, 0x89, 0xd5, 0x05, 0xe0,
+ 0x3d, 0xc9, 0xf9, 0x86, 0xcc, 0x6c, 0x3f, 0x39, 0xc5, 0x1b, 0xe4, 0x8b,
+ 0xf5, 0xd3, 0xe6, 0xf7, 0x70, 0xb7, 0xbe, 0xb7, 0xc1, 0x8e, 0x33, 0x42,
+ 0x27, 0x7e, 0x10, 0xa3, 0x03, 0x49, 0x98, 0xaf, 0x0e, 0x21, 0x76, 0x4a,
+ 0xf1, 0xf6, 0x15, 0x5a, 0x5d, 0xf6, 0x52, 0xab, 0x4d, 0x75, 0x67, 0x77,
+ 0x4e, 0xf8, 0x8a, 0xa7, 0x10, 0x15, 0x4d, 0xb4, 0x51, 0x9d, 0x6d, 0x9f,
+ 0xf8, 0x22, 0xf5, 0x12, 0x6e, 0x2e, 0xc1, 0x35, 0x9c, 0xa4, 0x1f, 0xc9,
+ 0x61, 0xf8, 0xb0, 0x27, 0xb7, 0xa8, 0xd8, 0xc5, 0xa6, 0x37, 0xa9, 0x60,
+ 0x3a, 0x31, 0xdf, 0x6d, 0x26, 0x30, 0x75, 0xd7, 0x9d, 0xc2, 0x64, 0x80,
+ 0x83, 0x34, 0x33, 0x8a, 0x6f, 0xd6, 0x87, 0x65, 0xa0, 0x10, 0xb9, 0xaf,
+ 0x6e, 0x76, 0xf3, 0xca, 0x22, 0xe1, 0x88, 0x85, 0x38, 0x4b, 0x21, 0x51,
+ 0x2a, 0x84, 0xe9, 0xcd, 0xaa, 0xec, 0xe7, 0x41, 0x8c, 0xcf, 0x80, 0x59,
+ 0x27, 0x91, 0x74, 0x10, 0x14, 0x18, 0xb7, 0xaa, 0x32, 0xd6, 0xe8, 0xdb,
+ 0x6a, 0xbf, 0x91, 0xc2, 0xe5, 0x5e, 0x0d, 0xd4, 0x2d, 0xf4, 0xac, 0xd6,
+ 0x37, 0xdf, 0x1b, 0xde, 0x60, 0x0d, 0x1a, 0x28, 0xa5, 0xd8, 0x5f, 0x25,
+ 0xbd, 0xdb, 0xc5, 0x39, 0x19, 0x74, 0x61, 0xa8, 0x7b, 0xfb, 0xec, 0x18,
+ 0xe6, 0x1a, 0xfd, 0xdb, 0x9d, 0x37, 0xd2, 0x7f, 0xb1, 0x67, 0x03, 0x1b,
+ 0x68, 0x40, 0xcb, 0xf0, 0x97, 0x53, 0xb6, 0x39, 0x01, 0x1b, 0x15, 0x57,
+ 0x86, 0x7a, 0xaf, 0x2a, 0x09, 0x4f, 0x94, 0x53, 0x85, 0x8b, 0x2d, 0x7a,
+ 0xaf, 0x44, 0x19, 0x3b, 0xcf, 0x3f, 0x44, 0x7c, 0x8b, 0xee, 0x00, 0xae,
+ 0x52, 0xa9, 0x8e, 0x19, 0x1e, 0xee, 0x1a, 0x50, 0x65, 0xe5, 0x55, 0x7c,
+ 0x37, 0xb9, 0xc6, 0x23, 0x12, 0x2f, 0x50, 0x39, 0xb3, 0xa3, 0x6c, 0xae,
+ 0x8c, 0x21, 0x64, 0x67, 0x82, 0x52, 0x7e, 0x39, 0xa5, 0xcf, 0x87, 0x7f,
+ 0xfb, 0xe9, 0x17, 0xe1, 0x19, 0x71, 0xe3, 0x67, 0x72, 0x07, 0x40, 0x08,
+ 0x5d, 0xff, 0xb0, 0xcb, 0xba, 0xdd, 0xed, 0x1d, 0xfa, 0x9c, 0x32, 0xad,
+ 0x0b, 0xd8, 0x87, 0xee, 0xc6, 0x7b, 0xc1, 0xb3, 0x6b, 0x0c, 0x16, 0xfd,
+ 0xf1, 0xf5, 0xa4, 0x3b, 0x1d, 0x2e, 0x2e, 0x6e, 0x6f, 0xfa, 0xa9, 0x22,
+ 0xa5, 0x42, 0xe0, 0xe1, 0xdd, 0x70, 0x5a, 0x3c, 0x42, 0xc8, 0x27, 0xa9,
+ 0xe1, 0x6c, 0x06, 0x14, 0x7c, 0x8a, 0x24, 0xc3, 0xff, 0xbd, 0xed, 0x8e,
+ 0x80, 0xd3, 0x80, 0xcc, 0x46, 0x22, 0x6a, 0x80, 0x44, 0x97, 0xd3, 0x61,
+ 0x77, 0x2e, 0x8c, 0xa8, 0x09, 0x52, 0xdd, 0x8c, 0xe7, 0x32, 0xb3, 0x16,
+ 0xcc, 0x4c, 0x26, 0x6a, 0x83, 0x44, 0xdd, 0xd1, 0xe7, 0xee, 0x57, 0x3e,
+ 0xbf, 0xf4, 0x99, 0xf2, 0x29, 0x11, 0x71, 0x41, 0xd8, 0xbd, 0x2d, 0x3e,
+ 0x12, 0x65, 0x37, 0x6e, 0xa2, 0xa0, 0x7b, 0xa3, 0xe1, 0xcd, 0x60, 0x71,
+ 0xd1, 0xed, 0xcf, 0xc7, 0x53, 0x48, 0xd0, 0xf7, 0xc3, 0xe9, 0x58, 0x23,
+ 0xe7, 0xf1, 0xcd, 0x50, 0x23, 0xe6, 0xd9, 0xb4, 0x8f, 0x35, 0x3a, 0x1a,
+ 0x4f, 0x35, 0xa2, 0xbe, 0xba, 0xb9, 0x5b, 0x14, 0x69, 0x61, 0x89, 0x0f,
+ 0x66, 0xf3, 0x1c, 0x1d, 0x2c, 0x73, 0xc2, 0xb3, 0x48, 0x0b, 0x0b, 0x9e,
+ 0xf4, 0xdd, 0x1d, 0x4d, 0x3e, 0x75, 0x35, 0xb2, 0x4f, 0xc7, 0x29, 0xd3,
+ 0x9e, 0x29, 0xc7, 0x29, 0xd3, 0x75, 0x4a, 0xc7, 0x29, 0xd3, 0x9e, 0x83,
+ 0xb4, 0xfd, 0xf1, 0x4d, 0x61, 0x46, 0x16, 0xac, 0x1c, 0xc2, 0x16, 0x24,
+ 0x87, 0x35, 0xc5, 0x48, 0xe5, 0x31, 0x58, 0x6a, 0x65, 0x81, 0xe4, 0xb0,
+ 0xbe, 0x32, 0x79, 0x2d, 0x66, 0xdd, 0xf9, 0xed, 0x14, 0xaf, 0x16, 0xde,
+ 0xa2, 0x69, 0x60, 0xbc, 0x2c, 0xc1, 0x51, 0x34, 0xdd, 0xeb, 0xf1, 0x60,
+ 0x08, 0x19, 0x6e, 0x77, 0x30, 0xd0, 0xd8, 0xed, 0xec, 0xb6, 0xa7, 0xb1,
+ 0xdb, 0xa9, 0x48, 0x02, 0x4b, 0xe1, 0xfa, 0xea, 0x46, 0x63, 0xa8, 0xd7,
+ 0xdd, 0x2f, 0x1a, 0x13, 0xbd, 0xbe, 0xd5, 0x79, 0x84, 0x59, 0x7f, 0x3a,
+ 0x1c, 0xde, 0x68, 0xac, 0x72, 0xd0, 0x9d, 0xfe, 0x53, 0x20, 0x82, 0xcd,
+ 0x71, 0x74, 0x75, 0xf9, 0x69, 0x2e, 0x50, 0x75, 0x4a, 0x45, 0x9f, 0xbe,
+ 0x50, 0x1a, 0xef, 0x65, 0xd1, 0xcf, 0x30, 0x8b, 0xfe, 0xd5, 0x68, 0x31,
+ 0x9e, 0x1c, 0xe9, 0x33, 0xfe, 0x39, 0x1c, 0x4e, 0x74, 0xc2, 0x1f, 0x4e,
+ 0x46, 0xdd, 0xfe, 0x50, 0xeb, 0x32, 0xfa, 0x5a, 0x4f, 0x31, 0xec, 0x1b,
+ 0x38, 0x89, 0xe1, 0x74, 0xae, 0x51, 0x02, 0xee, 0xea, 0xf3, 0xb4, 0x3b,
+ 0xd1, 0x69, 0x61, 0x28, 0x53, 0x95, 0x3b, 0x66, 0xa1, 0x1a, 0x48, 0x94,
+ 0xef, 0x64, 0x7a, 0x75, 0x0d, 0x49, 0x76, 0x32, 0xbe, 0xba, 0x99, 0xcf,
+ 0x34, 0xb2, 0x1d, 0x5d, 0xdd, 0x0c, 0x75, 0x1b, 0x1f, 0xa1, 0x59, 0x8c,
+ 0xc6, 0xe3, 0x89, 0x46, 0xbc, 0x94, 0x6e, 0x36, 0x9f, 0x5e, 0x4d, 0x34,
+ 0x42, 0xc6, 0x24, 0xdd, 0x9b, 0xcb, 0x91, 0xd0, 0x31, 0x2c, 0xe9, 0x94,
+ 0x2e, 0xc7, 0x14, 0x96, 0x78, 0x46, 0x7c, 0xd1, 0xd5, 0x19, 0x3f, 0x93,
+ 0xcc, 0x62, 0x7e, 0xc1, 0x27, 0x0e, 0x13, 0x52, 0xe9, 0x48, 0x74, 0xaa,
+ 0x85, 0x92, 0x48, 0x48, 0xa2, 0x85, 0x7d, 0x37, 0x97, 0x92, 0x44, 0x0c,
+ 0x3b, 0xef, 0x4c, 0x52, 0x22, 0xad, 0x0d, 0xab, 0x52, 0x96, 0x96, 0xd4,
+ 0x00, 0xd6, 0xab, 0x28, 0x31, 0x89, 0xdc, 0x2e, 0x9a, 0x61, 0x92, 0xa2,
+ 0x4a, 0x81, 0x5c, 0xd2, 0x8c, 0x22, 0x41, 0x77, 0x61, 0xb4, 0x12, 0x81,
+ 0x95, 0x12, 0x10, 0x74, 0x17, 0xe0, 0xef, 0xcd, 0xf4, 0xef, 0x0c, 0xdd,
+ 0x05, 0xa0, 0x68, 0x49, 0x14, 0x02, 0xac, 0x0b, 0x40, 0xdb, 0xfe, 0x90,
+ 0x79, 0x20, 0x27, 0x8f, 0xe7, 0x02, 0x90, 0x9f, 0xa5, 0xe4, 0x10, 0x90,
+ 0x0b, 0x40, 0xdf, 0x49, 0xe9, 0x8b, 0x40, 0x2e, 0x00, 0xf5, 0xb9, 0x44,
+ 0x8d, 0xcf, 0x2d, 0x64, 0xad, 0x3e, 0x79, 0x01, 0x0e, 0xfd, 0xe9, 0x5b,
+ 0x53, 0x50, 0x5c, 0x99, 0x40, 0x93, 0x64, 0xd3, 0x7d, 0x2d, 0xf0, 0xb7,
+ 0xaf, 0x28, 0xcb, 0x4d, 0x83, 0x10, 0x26, 0x69, 0x86, 0x33, 0x6d, 0x9b,
+ 0xbd, 0x15, 0xad, 0xd0, 0xd8, 0xe6, 0x1d, 0xaf, 0xc9, 0x49, 0x25, 0xdf,
+ 0x96, 0xa4, 0xd1, 0xd4, 0xc3, 0x6e, 0xe4, 0x5a, 0xf3, 0x2e, 0x4f, 0x01,
+ 0xab, 0xb1, 0x9a, 0x05, 0xd1, 0xc4, 0xa1, 0xe3, 0x47, 0xe4, 0x6c, 0x87,
+ 0x36, 0xae, 0xbb, 0x5e, 0x62, 0xea, 0x92, 0x1c, 0x27, 0xbb, 0xd7, 0x5e,
+ 0x05, 0x07, 0x1f, 0x38, 0x6a, 0x98, 0xe6, 0x58, 0x93, 0xa1, 0x64, 0xe6,
+ 0xc5, 0x01, 0x70, 0x1c, 0x72, 0x5f, 0xbc, 0x7a, 0x74, 0x3c, 0x9f, 0x54,
+ 0xda, 0xa7, 0x88, 0x34, 0xc0, 0x38, 0x0c, 0x6e, 0x05, 0xf5, 0x69, 0xeb,
+ 0x0c, 0xde, 0x47, 0x1a, 0xc7, 0xf7, 0xee, 0xee, 0xac, 0x38, 0xed, 0xd9,
+ 0x0f, 0x9a, 0x69, 0xb6, 0x8a, 0x40, 0xe0, 0x1f, 0xc8, 0xba, 0xce, 0x65,
+ 0x17, 0x70, 0xd7, 0x1f, 0xa0, 0x95, 0xb3, 0x7a, 0x74, 0x41, 0xe3, 0xb2,
+ 0xeb, 0xb2, 0x13, 0x10, 0xde, 0x2b, 0xd2, 0x57, 0xe2, 0xde, 0x8a, 0xf9,
+ 0x85, 0x91, 0x62, 0xaa, 0x74, 0x5e, 0xc6, 0x13, 0xcd, 0x35, 0xc6, 0x66,
+ 0x5f, 0x9a, 0x78, 0x37, 0x15, 0x92, 0x6d, 0x15, 0xf5, 0x73, 0xb5, 0xdb,
+ 0x6f, 0xbd, 0x95, 0x17, 0x97, 0x0f, 0x9f, 0x3e, 0x60, 0x27, 0x92, 0xa4,
+ 0x89, 0x16, 0xff, 0x1b, 0x2f, 0x22, 0x92, 0xd1, 0xd8, 0xc5, 0xd1, 0x0c,
+ 0x5f, 0x92, 0xd1, 0xcc, 0x0e, 0x7b, 0x37, 0x24, 0xbd, 0x82, 0x99, 0x93,
+ 0x25, 0x85, 0xcd, 0x91, 0x1e, 0xda, 0xbb, 0x69, 0xcb, 0x28, 0x6d, 0x99,
+ 0xb8, 0x0b, 0xb4, 0x85, 0xdf, 0x91, 0xda, 0x46, 0x38, 0x39, 0x00, 0x5b,
+ 0xd5, 0xec, 0x0d, 0x2a, 0x1f, 0xc3, 0xe0, 0x59, 0xd9, 0x5c, 0x7f, 0x17,
+ 0xb4, 0x62, 0xd7, 0x40, 0x6f, 0x24, 0xfd, 0xcc, 0xb9, 0x66, 0xa2, 0x96,
+ 0x2e, 0xb8, 0x4a, 0x47, 0x4f, 0x6f, 0x3f, 0xd2, 0x66, 0x40, 0x7a, 0xb1,
+ 0xe2, 0x5c, 0x74, 0xec, 0x2a, 0xce, 0xac, 0xc9, 0x97, 0x2a, 0x87, 0xdf,
+ 0x9a, 0x19, 0x00, 0x7a, 0x01, 0x60, 0x5b, 0x1c, 0x8a, 0xeb, 0x08, 0x96,
+ 0xe8, 0xc4, 0x4d, 0x9e, 0x56, 0x7d, 0x2c, 0xab, 0xba, 0xd0, 0x62, 0x4d,
+ 0x55, 0x11, 0x6c, 0xba, 0xc3, 0xab, 0xd0, 0xaa, 0x0c, 0xde, 0xeb, 0x30,
+ 0x0e, 0xe9, 0x71, 0x4b, 0xc1, 0x47, 0xfd, 0x34, 0x84, 0x5c, 0x4a, 0xb7,
+ 0xd0, 0xc1, 0xa7, 0x89, 0x6e, 0xb8, 0x84, 0x5b, 0x98, 0xf1, 0x21, 0x05,
+ 0x89, 0x38, 0x89, 0x3e, 0x72, 0xec, 0x1c, 0x45, 0xa7, 0xe6, 0x90, 0x3c,
+ 0x2a, 0xc4, 0x1f, 0x05, 0x63, 0x73, 0xb4, 0xb2, 0x7b, 0x53, 0x96, 0xe6,
+ 0xb0, 0x65, 0x1b, 0x82, 0x1e, 0x15, 0xec, 0x49, 0xdd, 0xe5, 0xc1, 0xc7,
+ 0xce, 0x0a, 0xef, 0x25, 0x94, 0x27, 0x0a, 0x0e, 0xb4, 0xaa, 0x75, 0x27,
+ 0xdc, 0xd0, 0x2b, 0xca, 0x1c, 0xb5, 0x7d, 0x99, 0x20, 0x54, 0x99, 0x3e,
+ 0xf9, 0xc8, 0x00, 0xe5, 0xb2, 0x1d, 0x77, 0x9b, 0x21, 0xa7, 0x24, 0x2a,
+ 0x95, 0x97, 0x94, 0xde, 0x04, 0x7a, 0x45, 0x9c, 0x90, 0xa3, 0x8c, 0x1e,
+ 0x06, 0x1a, 0xa9, 0x6c, 0xf3, 0x30, 0x1b, 0xc0, 0xe4, 0xcb, 0xe4, 0x73,
+ 0x96, 0x8b, 0x5e, 0xa9, 0x7c, 0xae, 0x9d, 0xf0, 0x77, 0x8d, 0xaf, 0x39,
+ 0x97, 0x7d, 0x0d, 0x0c, 0x48, 0xa3, 0x92, 0x6b, 0xd9, 0x35, 0x96, 0xc9,
+ 0x7e, 0x37, 0xc1, 0x7f, 0xc4, 0xcb, 0xf6, 0x91, 0x95, 0x72, 0xe0, 0x51,
+ 0x63, 0x1b, 0xdc, 0x13, 0x33, 0xdc, 0x91, 0xf2, 0x28, 0x6c, 0x8c, 0xb7,
+ 0x57, 0x17, 0x88, 0x56, 0x4d, 0xf2, 0x57, 0x05, 0xc2, 0xeb, 0x39, 0xf8,
+ 0x79, 0x9b, 0xd8, 0xc1, 0x97, 0xf1, 0x94, 0xf0, 0x00, 0x6c, 0x58, 0x7f,
+ 0x0f, 0x6f, 0x62, 0xc4, 0xe6, 0x98, 0x37, 0x95, 0xbd, 0x8f, 0x39, 0xf8,
+ 0x4d, 0x45, 0xef, 0x63, 0x8e, 0x94, 0x66, 0xec, 0x7d, 0xf4, 0x75, 0x3f,
+ 0x53, 0xe7, 0x59, 0x51, 0xee, 0xa6, 0xf7, 0xb2, 0x0a, 0xf8, 0x94, 0x4a,
+ 0x0f, 0xbe, 0x59, 0xb9, 0x22, 0xc2, 0xf4, 0x0f, 0xd8, 0xf6, 0xf5, 0x50,
+ 0x36, 0x32, 0xbd, 0xa5, 0x2d, 0xbc, 0x92, 0xe9, 0xed, 0x62, 0x6e, 0xa9,
+ 0x94, 0xbe, 0xa1, 0x85, 0x1b, 0x90, 0xe0, 0x5f, 0x3a, 0x0a, 0x22, 0x0e,
+ 0xd7, 0x72, 0xae, 0xa0, 0x98, 0xe5, 0x80, 0x5f, 0x2c, 0xd5, 0xfc, 0xef,
+ 0xff, 0xab, 0x40, 0x0a, 0x01, 0x87, 0x94, 0x78, 0xa4, 0x46, 0x76, 0x86,
+ 0x29, 0x41, 0xba, 0x32, 0x74, 0x2c, 0xc2, 0x4b, 0x66, 0xb3, 0x40, 0xfa,
+ 0x2f, 0xed, 0x58, 0xde, 0x66, 0x2d, 0x8c, 0x84, 0x5d, 0xe3, 0x7d, 0x29,
+ 0xfc, 0xb5, 0x97, 0x82, 0xcd, 0x93, 0x74, 0x0a, 0xbc, 0x23, 0x55, 0x9a,
+ 0x28, 0x7d, 0xff, 0xe4, 0x29, 0xc0, 0x8e, 0x4c, 0x2a, 0xdf, 0x93, 0x70,
+ 0x83, 0xf7, 0x89, 0xcf, 0x10, 0xa4, 0x35, 0xf0, 0x5a, 0x98, 0xbc, 0x59,
+ 0xd3, 0x96, 0x70, 0x26, 0x6f, 0xba, 0xe4, 0x21, 0x41, 0x55, 0xc7, 0x6f,
+ 0x8a, 0xbf, 0x54, 0x8a, 0x88, 0x6a, 0x0c, 0x56, 0x53, 0x6c, 0x5b, 0xc4,
+ 0x60, 0xaa, 0x04, 0x8e, 0xc6, 0x9b, 0x23, 0x33, 0x38, 0x26, 0xa1, 0x81,
+ 0x0a, 0x99, 0x49, 0xdd, 0x82, 0x21, 0xa6, 0x54, 0x2a, 0xe2, 0x15, 0x63,
+ 0xf8, 0x56, 0xa9, 0x9f, 0x30, 0x34, 0xe6, 0x66, 0xde, 0x98, 0xaf, 0x7c,
+ 0xcc, 0xd4, 0x5f, 0x55, 0x31, 0xeb, 0x2a, 0x26, 0xd9, 0x30, 0xa8, 0x2a,
+ 0xd6, 0x98, 0xa4, 0xc9, 0x2a, 0x31, 0xb7, 0xc9, 0x33, 0x38, 0x15, 0x96,
+ 0xe3, 0xc7, 0x0b, 0x66, 0x52, 0xf9, 0x80, 0x85, 0x85, 0x7a, 0xc0, 0x94,
+ 0xb4, 0x39, 0x7a, 0xb7, 0xf2, 0xa3, 0xad, 0x9c, 0x5f, 0x36, 0xea, 0x0e,
+ 0x52, 0x8d, 0xec, 0xa0, 0xa9, 0x46, 0xe0, 0x52, 0x59, 0xf5, 0x9f, 0x02,
+ 0xbf, 0x55, 0x2e, 0x0b, 0x20, 0xad, 0x55, 0x41, 0x16, 0x1d, 0x50, 0x16,
+ 0xc0, 0x7a, 0x7f, 0x0b, 0xa9, 0x18, 0xec, 0x1b, 0x7f, 0xad, 0x35, 0xf5,
+ 0x66, 0x72, 0x6e, 0x66, 0xd9, 0xd1, 0x1e, 0x01, 0x74, 0x4d, 0x84, 0x4d,
+ 0x7f, 0x4e, 0x07, 0x5a, 0x76, 0x93, 0x24, 0xd3, 0x19, 0x94, 0xcc, 0x2b,
+ 0x78, 0x28, 0x35, 0x53, 0x2d, 0x23, 0xda, 0xca, 0x56, 0x10, 0x0c, 0x42,
+ 0x06, 0x4c, 0x81, 0x54, 0xb3, 0x4b, 0xce, 0x09, 0x38, 0xad, 0xaa, 0x9d,
+ 0x13, 0x65, 0x9e, 0x6c, 0x46, 0x06, 0x68, 0x5e, 0x98, 0x38, 0x01, 0x1e,
+ 0xfb, 0x72, 0xfa, 0xb5, 0x0a, 0x2c, 0x22, 0x07, 0xb7, 0x10, 0x87, 0xaa,
+ 0x42, 0x0d, 0x50, 0xf7, 0x3e, 0x31, 0x03, 0xb7, 0xa3, 0xb4, 0x23, 0x23,
+ 0x84, 0x3b, 0x4a, 0x3a, 0x2f, 0x20, 0xd2, 0x41, 0xaf, 0xa8, 0x4a, 0x14,
+ 0xd7, 0xce, 0x76, 0xf6, 0x3c, 0xb0, 0x98, 0xfe, 0x36, 0x8d, 0xe4, 0x9f,
+ 0xf2, 0xc9, 0x80, 0xb2, 0xdd, 0xd1, 0xd7, 0x62, 0x77, 0xa9, 0x62, 0x15,
+ 0x9d, 0xf9, 0x9d, 0x65, 0xb3, 0x98, 0x67, 0xf7, 0xb1, 0x17, 0xc9, 0x7d,
+ 0x2c, 0x62, 0x71, 0x07, 0x34, 0x1f, 0x3e, 0xa0, 0xc4, 0x38, 0xc6, 0xec,
+ 0xc4, 0x98, 0x1c, 0xb8, 0xd2, 0xe8, 0x60, 0x13, 0x6c, 0xb7, 0xc1, 0xb3,
+ 0xf0, 0x72, 0xa2, 0xa1, 0xf6, 0x18, 0x25, 0x7d, 0x24, 0x30, 0x79, 0x49,
+ 0x1f, 0xf4, 0x25, 0xe9, 0x6c, 0xef, 0xae, 0x00, 0xfe, 0x5c, 0x0c, 0xd0,
+ 0xa9, 0xb4, 0x4c, 0x0e, 0xb9, 0xa2, 0x03, 0x40, 0x1a, 0xa4, 0xdf, 0x34,
+ 0x2b, 0x28, 0x15, 0x14, 0x03, 0xb4, 0xf9, 0xa1, 0x82, 0xef, 0x61, 0xa8,
+ 0x83, 0x9f, 0xb1, 0x87, 0x2a, 0x89, 0x17, 0xbb, 0xa3, 0x66, 0x8a, 0xcf,
+ 0xab, 0xec, 0x04, 0x6e, 0x74, 0xdd, 0xa2, 0xf4, 0xfa, 0xab, 0xc0, 0x8f,
+ 0xdc, 0xd5, 0x81, 0xae, 0x41, 0x91, 0x7f, 0xc4, 0x3b, 0x40, 0x4e, 0x94,
+ 0x6a, 0xf0, 0x89, 0xfd, 0x29, 0x57, 0xa9, 0xdc, 0xac, 0xa0, 0x30, 0xd9,
+ 0x06, 0x70, 0x1f, 0x2c, 0x53, 0x1e, 0x07, 0xc5, 0x20, 0x0a, 0x82, 0xf2,
+ 0x2a, 0x29, 0xd9, 0x56, 0x4b, 0x38, 0x4d, 0x7d, 0xbc, 0xc9, 0x0d, 0xb6,
+ 0xd8, 0xbf, 0x5c, 0x06, 0x22, 0xdc, 0x30, 0xb3, 0xec, 0x21, 0x7b, 0xc1,
+ 0x0b, 0x75, 0x9b, 0x52, 0xd0, 0x2f, 0xa6, 0x28, 0xe0, 0xe9, 0x0c, 0xde,
+ 0x65, 0xf5, 0xc8, 0x24, 0x93, 0x5e, 0x8a, 0xf7, 0x17, 0xfa, 0xef, 0x55,
+ 0x5c, 0x84, 0x81, 0x1f, 0xab, 0x19, 0xe8, 0x53, 0xb1, 0xe9, 0x3c, 0x26,
+ 0x0e, 0x5e, 0xc8, 0xe3, 0x7d, 0x71, 0x05, 0xdb, 0x99, 0xeb, 0x12, 0xaa,
+ 0xff, 0xc0, 0x04, 0x29, 0x45, 0xf3, 0x61, 0xe0, 0x6e, 0x4e, 0x52, 0x24,
+ 0x58, 0x60, 0x66, 0x1b, 0x32, 0x4b, 0xff, 0x5c, 0xce, 0x8e, 0x43, 0x4b,
+ 0x54, 0x62, 0xa7, 0x86, 0x9e, 0xcb, 0x36, 0xd6, 0x42, 0x39, 0xba, 0x96,
+ 0xab, 0x02, 0xa1, 0xd0, 0xa0, 0xee, 0x9f, 0x31, 0x99, 0xba, 0x1b, 0xb6,
+ 0x6e, 0x8d, 0xbc, 0x41, 0x89, 0x8f, 0xeb, 0x64, 0xbe, 0x9e, 0x55, 0xca,
+ 0xaa, 0x6d, 0xf8, 0xee, 0x52, 0x7e, 0x76, 0x1d, 0x15, 0x57, 0xb0, 0xad,
+ 0x9f, 0x40, 0x9a, 0xc4, 0xa7, 0x9d, 0xad, 0xb1, 0x24, 0x36, 0x12, 0x92,
+ 0x93, 0x90, 0xa5, 0xcc, 0xe6, 0x21, 0xd5, 0x9f, 0x6b, 0x79, 0x46, 0xe1,
+ 0x4a, 0xc9, 0x93, 0x6b, 0xac, 0x1a, 0x4f, 0x18, 0x84, 0x97, 0xfb, 0x2b,
+ 0xa1, 0xc8, 0x18, 0x3c, 0x17, 0x6f, 0xf7, 0x8f, 0x8e, 0xc1, 0x94, 0x3b,
+ 0xc6, 0xa3, 0x13, 0x39, 0x96, 0x4d, 0xb8, 0x79, 0x14, 0x47, 0x0d, 0xe6,
+ 0x70, 0x7e, 0xb6, 0x65, 0xd6, 0xd5, 0x2e, 0x58, 0x17, 0x09, 0xb2, 0xe3,
+ 0xe4, 0x2e, 0x07, 0xcc, 0x5b, 0xd3, 0x81, 0x9c, 0x5c, 0x58, 0xed, 0x23,
+ 0x1f, 0x24, 0xf7, 0xc8, 0x6e, 0xa9, 0x6a, 0x6f, 0xe0, 0x64, 0x2f, 0x43,
+ 0xd7, 0xf5, 0x95, 0x0c, 0x0c, 0x8a, 0x46, 0xa6, 0x04, 0x24, 0x4d, 0xd1,
+ 0xbc, 0xe2, 0xe2, 0xcc, 0xae, 0x24, 0x13, 0x73, 0xe4, 0xfb, 0x07, 0xb8,
+ 0xbb, 0x4d, 0xdd, 0xc8, 0x0d, 0x9f, 0x84, 0x37, 0xb3, 0x15, 0xc7, 0x4d,
+ 0xb3, 0xc8, 0xf3, 0x24, 0x8b, 0x8c, 0x08, 0xd4, 0xd1, 0x8a, 0xf5, 0xbb,
+ 0xc2, 0x0e, 0x2e, 0xf0, 0x49, 0x65, 0x24, 0xdb, 0xbe, 0x77, 0x74, 0x04,
+ 0x25, 0x2b, 0xc2, 0xb0, 0x13, 0xfb, 0x88, 0x4e, 0xf4, 0x16, 0x20, 0xf7,
+ 0x61, 0x1d, 0xd1, 0x87, 0x3e, 0xd7, 0x25, 0xf7, 0x51, 0x3f, 0xa2, 0x8f,
+ 0x6a, 0xa6, 0x70, 0xde, 0xe6, 0xa6, 0x50, 0x40, 0x86, 0x05, 0x2f, 0x20,
+ 0x09, 0xd8, 0x46, 0x63, 0x80, 0x26, 0x61, 0xf0, 0x14, 0xfc, 0x4e, 0x4a,
+ 0x37, 0xe5, 0xc3, 0xa9, 0xb0, 0xef, 0x9b, 0x7c, 0x2c, 0x2c, 0xe1, 0xf6,
+ 0x0f, 0x7a, 0xf2, 0xaa, 0x11, 0xdc, 0xe3, 0x7f, 0x28, 0x6a, 0x1b, 0x6c,
+ 0xfd, 0xf5, 0x0d, 0x73, 0x06, 0x4a, 0x68, 0x58, 0x7d, 0x20, 0x93, 0x6e,
+ 0x83, 0x4a, 0x16, 0xfa, 0x50, 0xe6, 0x1d, 0x30, 0xd7, 0x28, 0x6a, 0xc9,
+ 0x34, 0xff, 0x19, 0xff, 0x40, 0x2b, 0x7f, 0x11, 0x47, 0x57, 0x57, 0x61,
+ 0x78, 0x69, 0x47, 0x9a, 0x05, 0x12, 0x09, 0x34, 0x0c, 0x0c, 0x40, 0x72,
+ 0x6e, 0x10, 0x4d, 0xbc, 0x29, 0xc6, 0xee, 0x1f, 0x95, 0x30, 0x76, 0xd5,
+ 0x99, 0x47, 0x82, 0x1f, 0x9a, 0xb2, 0x74, 0x44, 0x35, 0xa8, 0xbe, 0xb5,
+ 0x52, 0x72, 0xe7, 0xf4, 0x8e, 0x71, 0x7b, 0x6c, 0xc1, 0xd3, 0x79, 0xb6,
+ 0x7b, 0xde, 0xbb, 0x61, 0x40, 0x9f, 0x0c, 0x14, 0x60, 0x6f, 0xa1, 0x82,
+ 0x9e, 0xf3, 0x0e, 0x3f, 0xf6, 0xe7, 0xc8, 0xa1, 0x33, 0x55, 0x8e, 0x86,
+ 0xd6, 0xb1, 0x3f, 0x25, 0xc0, 0x0a, 0xe8, 0xae, 0xfe, 0x9f, 0xf4, 0xe6,
+ 0x3d, 0x7f, 0x07, 0xaf, 0xdf, 0xc4, 0xba, 0x0c, 0x2a, 0x89, 0x70, 0x2b,
+ 0x74, 0x81, 0xcf, 0xf1, 0x8f, 0xde, 0xc3, 0x23, 0xb6, 0x56, 0x96, 0x0f,
+ 0x21, 0xa0, 0x16, 0x39, 0x28, 0x87, 0x2a, 0xf6, 0xaa, 0xe9, 0x6a, 0x1b,
+ 0x3c, 0x9b, 0xf5, 0xa4, 0xb7, 0xe9, 0x04, 0x70, 0x22, 0x79, 0x20, 0x8c,
+ 0xa5, 0xf3, 0xad, 0xdb, 0x22, 0xbd, 0x96, 0x4a, 0x52, 0xa5, 0x1c, 0x51,
+ 0x18, 0x2a, 0x67, 0x78, 0x13, 0x1c, 0x61, 0x01, 0xeb, 0x52, 0x58, 0xea,
+ 0x0c, 0x2a, 0x58, 0xe5, 0x08, 0xde, 0x08, 0x48, 0x18, 0xc6, 0xf0, 0xcd,
+ 0xb0, 0xe0, 0xdf, 0x31, 0x7c, 0x25, 0x0c, 0x5f, 0x10, 0xc6, 0xfa, 0x1d,
+ 0xc3, 0xf7, 0x4f, 0xc1, 0xf0, 0x05, 0x71, 0xbd, 0x4d, 0x30, 0x7c, 0xc1,
+ 0x1b, 0x98, 0xc8, 0x6e, 0xfe, 0xdc, 0x81, 0x9d, 0x8b, 0x01, 0x86, 0x2f,
+ 0xb4, 0xfe, 0x04, 0x8e, 0x46, 0xe0, 0xaf, 0x3b, 0xcf, 0x3f, 0x2d, 0xc5,
+ 0xf1, 0x05, 0x41, 0xc5, 0xff, 0x2e, 0x38, 0xbe, 0xa4, 0xfa, 0xea, 0x08,
+ 0x1c, 0x5f, 0xbb, 0xf5, 0x3d, 0x81, 0x7c, 0xb5, 0xdc, 0xdf, 0x1e, 0xc9,
+ 0x97, 0x44, 0xb1, 0xff, 0x0f, 0x90, 0x7c, 0xd3, 0x77, 0x7d, 0x59, 0xb2,
+ 0x5a, 0x03, 0xea, 0x8b, 0x4e, 0x26, 0xb8, 0x03, 0xeb, 0xe3, 0xb7, 0xc3,
+ 0x4d, 0xb6, 0xdf, 0x0c, 0xe3, 0xb4, 0x6d, 0x80, 0xa8, 0x95, 0x2e, 0xaf,
+ 0xde, 0x64, 0x42, 0x37, 0x34, 0x1c, 0xec, 0x85, 0x62, 0x55, 0x1c, 0x14,
+ 0x18, 0x55, 0xae, 0xf4, 0xdb, 0x25, 0x9d, 0x34, 0xec, 0xe5, 0x7e, 0x5f,
+ 0xb1, 0xea, 0x2f, 0x6d, 0xdb, 0x6e, 0x8a, 0x6d, 0xcd, 0x2a, 0x00, 0xd3,
+ 0xb6, 0x96, 0xdd, 0x11, 0x1b, 0x9b, 0x54, 0x75, 0xf0, 0x9b, 0x1f, 0x29,
+ 0x63, 0x01, 0xe4, 0x23, 0x5a, 0xfa, 0x4d, 0x44, 0x89, 0x23, 0x6b, 0x09,
+ 0x41, 0xe0, 0xf1, 0x38, 0xb2, 0x16, 0xb4, 0x62, 0xaa, 0x02, 0xc9, 0xd2,
+ 0x7b, 0xf8, 0x6a, 0x48, 0x82, 0x45, 0xd6, 0xf4, 0x75, 0x95, 0xc3, 0xf1,
+ 0x5f, 0x39, 0xec, 0xb0, 0x2e, 0xea, 0x05, 0xb5, 0xa9, 0x66, 0x47, 0xcc,
+ 0x41, 0x6b, 0x48, 0x25, 0xcd, 0x2d, 0x0a, 0xb3, 0xa4, 0x31, 0xa6, 0x92,
+ 0xf6, 0xd8, 0xb1, 0x2e, 0x4b, 0x2f, 0xbb, 0x35, 0x82, 0x49, 0xf0, 0x94,
+ 0x4b, 0x05, 0x64, 0x57, 0x17, 0x50, 0x91, 0xed, 0x51, 0x82, 0x02, 0xd8,
+ 0x1c, 0x25, 0x30, 0x80, 0x4f, 0x55, 0xc1, 0x39, 0x22, 0xfa, 0x34, 0x6e,
+ 0x4f, 0xbb, 0x88, 0xa8, 0xf1, 0xd2, 0xd2, 0x0d, 0x47, 0x81, 0x3e, 0x2d,
+ 0x79, 0x3e, 0xb4, 0x76, 0x37, 0x0e, 0xf6, 0x99, 0x62, 0xad, 0x85, 0xd8,
+ 0x09, 0x79, 0x3e, 0xeb, 0x95, 0x21, 0x26, 0x52, 0x0d, 0x70, 0x2e, 0xf5,
+ 0x37, 0xdf, 0x38, 0xec, 0x8f, 0x25, 0xbb, 0x66, 0x1e, 0x55, 0x59, 0x0b,
+ 0xfb, 0xa9, 0x78, 0x31, 0x6b, 0xc6, 0x56, 0x77, 0xd6, 0x53, 0x3b, 0x96,
+ 0xaa, 0x62, 0xb4, 0x0c, 0xc4, 0x68, 0x49, 0x62, 0x2c, 0x83, 0x7e, 0x46,
+ 0x27, 0xfd, 0x60, 0xb7, 0xc3, 0xff, 0x96, 0x7e, 0x6b, 0xf2, 0x20, 0x51,
+ 0xce, 0x49, 0xd3, 0x97, 0x1e, 0xc0, 0xe3, 0x4c, 0x03, 0x3f, 0x9f, 0x3d,
+ 0xcf, 0x92, 0x5e, 0x8b, 0x08, 0x8f, 0x55, 0x0c, 0xb0, 0x9f, 0xe5, 0x07,
+ 0x5a, 0x2a, 0x46, 0x85, 0x72, 0x7d, 0x45, 0x76, 0xb5, 0x76, 0xaf, 0x46,
+ 0xa2, 0x6e, 0x1a, 0x7c, 0x96, 0x3a, 0x65, 0x42, 0x93, 0xa3, 0x24, 0xda,
+ 0xcb, 0x63, 0x52, 0x03, 0x6c, 0x0d, 0x90, 0xf1, 0xf9, 0xd8, 0x52, 0x68,
+ 0xeb, 0xd1, 0xfc, 0x74, 0x34, 0xd4, 0x3a, 0xaa, 0x62, 0xbb, 0x04, 0x12,
+ 0x5b, 0x59, 0x4a, 0x05, 0xe4, 0xc0, 0x8d, 0xa0, 0xb0, 0x9b, 0x05, 0x09,
+ 0x7f, 0x63, 0x58, 0xd7, 0x7c, 0xb3, 0xb0, 0xae, 0xf9, 0x7d, 0xc2, 0xba,
+ 0xe6, 0xbf, 0x43, 0x58, 0x77, 0x45, 0x5f, 0xe1, 0xe8, 0x01, 0x9c, 0x6d,
+ 0x7d, 0x34, 0xc6, 0x58, 0x69, 0x61, 0x9c, 0xf5, 0xc9, 0x10, 0x55, 0xa8,
+ 0x89, 0xae, 0x55, 0xe5, 0x43, 0x85, 0xc1, 0xe9, 0x7c, 0xb1, 0x2a, 0x4d,
+ 0x58, 0x6d, 0x4b, 0xab, 0xe2, 0x8b, 0x65, 0x9f, 0xca, 0x20, 0xc1, 0xcb,
+ 0x7c, 0x71, 0xc5, 0xcd, 0x0c, 0x48, 0x54, 0x42, 0x6f, 0x92, 0x8a, 0x85,
+ 0x15, 0x50, 0x22, 0xda, 0xdb, 0xa3, 0xaf, 0x40, 0x6a, 0xbf, 0x70, 0x81,
+ 0x05, 0x2c, 0x33, 0x08, 0xc3, 0xbe, 0xa1, 0xb2, 0xa2, 0x0c, 0x6d, 0x4a,
+ 0x6e, 0x06, 0x5b, 0xbc, 0x19, 0xb4, 0x3d, 0xc9, 0x94, 0x3a, 0x0c, 0x3f,
+ 0x42, 0xb7, 0x86, 0x6e, 0x7b, 0xb5, 0x2d, 0xa5, 0x46, 0x27, 0x16, 0x7f,
+ 0x84, 0x46, 0xd2, 0xac, 0xee, 0x47, 0xed, 0x4b, 0x29, 0xa1, 0xb1, 0x2d,
+ 0xbc, 0x60, 0xcb, 0xb5, 0x56, 0xbd, 0x9b, 0x22, 0x0d, 0x4e, 0x6e, 0x02,
+ 0xf4, 0x65, 0x3c, 0xe5, 0xc4, 0xcd, 0x32, 0x62, 0x89, 0xb2, 0x65, 0xb0,
+ 0xb0, 0xbb, 0x02, 0xc6, 0x7d, 0x41, 0x93, 0x7a, 0xd7, 0xdb, 0xbb, 0x9c,
+ 0x96, 0x31, 0x30, 0xf0, 0xb8, 0x63, 0xf1, 0x99, 0xdf, 0x46, 0x32, 0x08,
+ 0x65, 0xc2, 0x31, 0x33, 0x88, 0xa4, 0x31, 0x73, 0x29, 0x65, 0x66, 0x11,
+ 0x85, 0x0f, 0xcb, 0xce, 0xc2, 0x21, 0xb5, 0x1e, 0x1d, 0xad, 0x71, 0x10,
+ 0x62, 0x3d, 0x6e, 0xff, 0xc3, 0xd2, 0xaa, 0x2f, 0x1c, 0xfb, 0xe0, 0x69,
+ 0x8d, 0x20, 0x25, 0xd5, 0xea, 0xdb, 0x59, 0x3e, 0x84, 0x56, 0x8b, 0x44,
+ 0x3b, 0x1a, 0x5d, 0xd3, 0x89, 0xd4, 0xc8, 0x45, 0xbf, 0xbb, 0x46, 0x85,
+ 0x56, 0xaa, 0xcf, 0x54, 0x13, 0xc2, 0x66, 0xb3, 0xc9, 0x3f, 0x92, 0xd3,
+ 0x56, 0x10, 0xb2, 0x4f, 0x10, 0x14, 0xc1, 0xf9, 0xf2, 0xf3, 0xb2, 0xac,
+ 0xcd, 0xe2, 0x81, 0xfc, 0x03, 0xcf, 0x6f, 0xa3, 0x7d, 0xe0, 0x47, 0x3e,
+ 0x89, 0xd0, 0xb0, 0x31, 0x9d, 0xee, 0x9d, 0x5f, 0xf8, 0xc0, 0xc8, 0x32,
+ 0x42, 0xe5, 0x43, 0xbf, 0x30, 0x25, 0x2c, 0xbe, 0xf3, 0x03, 0x3b, 0xf7,
+ 0x78, 0xe7, 0x96, 0x5a, 0x5b, 0x8c, 0x8e, 0x53, 0xaa, 0xd4, 0x15, 0xa6,
+ 0x84, 0x79, 0x6c, 0x48, 0x45, 0xef, 0x82, 0xa9, 0x58, 0x2a, 0x1d, 0x91,
+ 0xde, 0x0f, 0x94, 0x6b, 0x4a, 0xa9, 0x52, 0x52, 0x98, 0x12, 0x16, 0xb1,
+ 0xe1, 0x80, 0xde, 0xad, 0xb6, 0x20, 0x78, 0x4b, 0xad, 0x22, 0x46, 0xc7,
+ 0x29, 0x95, 0x3a, 0x4a, 0x09, 0x8b, 0xa0, 0x70, 0x60, 0xef, 0x82, 0xe4,
+ 0x6d, 0xb5, 0x8e, 0x18, 0x1d, 0xa7, 0x54, 0xea, 0x28, 0x25, 0xcc, 0x28,
+ 0x95, 0x3a, 0xa2, 0xbd, 0x8b, 0x8b, 0x54, 0xad, 0x23, 0x46, 0xc7, 0x79,
+ 0x2a, 0x75, 0x94, 0x12, 0x66, 0x94, 0x4a, 0x1d, 0x91, 0xaf, 0x80, 0x48,
+ 0x94, 0x25, 0x3a, 0xea, 0xc8, 0x33, 0x52, 0xeb, 0x28, 0x21, 0xcc, 0x28,
+ 0x95, 0x3a, 0x4a, 0x09, 0x33, 0xbf, 0x53, 0xa6, 0xa3, 0x8e, 0x38, 0xf7,
+ 0x86, 0x5a, 0x47, 0x1d, 0x59, 0xf2, 0x0d, 0xa5, 0x8e, 0x52, 0xc2, 0x8c,
+ 0xb2, 0x4c, 0x47, 0x9d, 0x83, 0x60, 0x21, 0x0d, 0xb5, 0x8e, 0x3a, 0xb2,
+ 0xe4, 0x1b, 0x4a, 0x1d, 0xa5, 0x84, 0x19, 0xa5, 0x4a, 0x47, 0xc4, 0xdb,
+ 0xbf, 0x74, 0xc4, 0xb9, 0x97, 0xe8, 0xe8, 0x45, 0xd2, 0x66, 0x83, 0x5f,
+ 0x0c, 0xa9, 0x77, 0xd8, 0xec, 0xab, 0x37, 0x3b, 0xb0, 0xbc, 0xc3, 0x32,
+ 0x08, 0x4a, 0xaf, 0x7c, 0x1c, 0xd3, 0xf8, 0xce, 0x56, 0xfa, 0x4e, 0xb2,
+ 0x50, 0x06, 0x57, 0x87, 0x77, 0x47, 0xb9, 0x19, 0x38, 0xa5, 0x8e, 0xa7,
+ 0xdd, 0x0d, 0x3b, 0xa2, 0xdb, 0x52, 0xd1, 0x68, 0xf7, 0x40, 0xba, 0x60,
+ 0x35, 0xbb, 0x9a, 0xb4, 0x4e, 0x55, 0x8a, 0xa5, 0x7e, 0x47, 0xb3, 0x7f,
+ 0x51, 0xbf, 0xac, 0xd9, 0x8d, 0x24, 0x77, 0xac, 0x5a, 0x41, 0x74, 0x7b,
+ 0x29, 0x6c, 0x42, 0x65, 0xe7, 0xa4, 0x54, 0xe4, 0xf8, 0x00, 0x59, 0xd4,
+ 0x75, 0x47, 0xa3, 0xa7, 0x1e, 0x3e, 0x95, 0x29, 0xc6, 0xa1, 0x55, 0x93,
+ 0xc1, 0xb7, 0xef, 0xc0, 0x0f, 0xde, 0xa9, 0x53, 0x88, 0xf2, 0x51, 0x44,
+ 0xfe, 0xb8, 0x4c, 0x95, 0xd2, 0xc5, 0xaa, 0x47, 0xaa, 0x0c, 0xfd, 0xf3,
+ 0x8d, 0x8e, 0x54, 0xf7, 0xa7, 0x69, 0x56, 0xe8, 0xc7, 0x1c, 0xa7, 0xbe,
+ 0x01, 0xc6, 0xc2, 0x2e, 0x7c, 0xf2, 0xbc, 0xfa, 0xd9, 0xc9, 0x7e, 0x3f,
+ 0x3b, 0xfd, 0xa0, 0xb3, 0xd3, 0x8f, 0x39, 0xba, 0xac, 0x49, 0x0d, 0xcb,
+ 0x22, 0x2b, 0xf2, 0x15, 0x5d, 0x92, 0x4a, 0x43, 0xb9, 0x26, 0xb6, 0xde,
+ 0x35, 0xe4, 0x5a, 0x58, 0x6d, 0xad, 0x76, 0x68, 0x0b, 0xbb, 0xb9, 0x88,
+ 0xd8, 0xda, 0xea, 0x14, 0x15, 0xf2, 0x77, 0xdd, 0x13, 0x99, 0x2c, 0xcc,
+ 0xe5, 0x6c, 0x2c, 0x5e, 0x48, 0xaa, 0x86, 0x7b, 0x09, 0x3a, 0xf1, 0x1e,
+ 0xfc, 0x20, 0xa4, 0x40, 0x81, 0xe5, 0xbb, 0x4a, 0xca, 0x97, 0x7d, 0xf4,
+ 0x84, 0xa4, 0x6c, 0x23, 0xb4, 0x74, 0xe3, 0x67, 0xf2, 0xc8, 0x20, 0xef,
+ 0x09, 0x51, 0xe2, 0x55, 0x49, 0x82, 0x7a, 0xe6, 0xee, 0x9d, 0x90, 0x7e,
+ 0x57, 0x23, 0x21, 0x61, 0x7f, 0xfb, 0x19, 0x2e, 0xd0, 0xe2, 0x9c, 0xae,
+ 0x06, 0xba, 0x3d, 0xa1, 0x90, 0x2c, 0xfb, 0xc6, 0xfd, 0xe1, 0xad, 0xaf,
+ 0x3f, 0xee, 0xb3, 0x29, 0xf7, 0x29, 0x68, 0xd6, 0x1d, 0x51, 0x51, 0x54,
+ 0xb6, 0x4b, 0x30, 0xec, 0x42, 0xa3, 0x12, 0x2e, 0x40, 0x74, 0x62, 0x37,
+ 0xc0, 0x46, 0xc3, 0x1d, 0x46, 0x56, 0x6b, 0x01, 0x24, 0xe3, 0xd9, 0x88,
+ 0x4f, 0xd3, 0xef, 0x49, 0x02, 0x2c, 0xab, 0x3c, 0xf3, 0xaa, 0xaa, 0x82,
+ 0xc6, 0x5b, 0xdf, 0x40, 0xd1, 0xe1, 0x53, 0x80, 0xcb, 0x08, 0x25, 0x05,
+ 0x21, 0xb2, 0xf8, 0x89, 0x39, 0x0f, 0x5f, 0xf6, 0xf4, 0x45, 0x6f, 0xfc,
+ 0xe8, 0x45, 0xe4, 0xd1, 0xa8, 0x83, 0x5a, 0xec, 0x51, 0x2e, 0x9d, 0xca,
+ 0xff, 0x28, 0x3e, 0xe7, 0xc3, 0x59, 0x23, 0xdf, 0x7d, 0x89, 0x91, 0xdd,
+ 0x94, 0xbe, 0x79, 0x24, 0x14, 0xc9, 0x1a, 0xa0, 0x10, 0x88, 0xdc, 0xb6,
+ 0xc1, 0x33, 0x6a, 0xd8, 0x12, 0xb3, 0x6a, 0x68, 0x04, 0x72, 0x80, 0xe5,
+ 0xff, 0xb0, 0x00, 0xab, 0xf9, 0xbd, 0xb5, 0x67, 0x7f, 0x17, 0xed, 0xed,
+ 0xbc, 0x75, 0x8d, 0x94, 0x21, 0xbf, 0x9d, 0x06, 0x09, 0xc7, 0xbf, 0xad,
+ 0x16, 0x5b, 0xdf, 0x5b, 0x8b, 0x8d, 0x32, 0x17, 0xb8, 0x17, 0x00, 0x30,
+ 0x85, 0xfb, 0xd5, 0x86, 0x76, 0x9a, 0x24, 0xd4, 0xda, 0x4b, 0x41, 0xb2,
+ 0x2e, 0x28, 0x56, 0x28, 0x95, 0xd8, 0x14, 0x0b, 0xbd, 0x52, 0x3e, 0xac,
+ 0x16, 0xfe, 0x2b, 0xda, 0xa4, 0x85, 0x92, 0xa0, 0x49, 0xb1, 0x28, 0x97,
+ 0x7e, 0x76, 0x93, 0x7d, 0xd5, 0x0a, 0xe1, 0x46, 0xd2, 0xfd, 0x16, 0x1e,
+ 0xd0, 0x44, 0x55, 0x53, 0x6b, 0x70, 0xc1, 0x25, 0x5a, 0x18, 0xb5, 0xd7,
+ 0xa4, 0xf0, 0xd9, 0xe4, 0x92, 0xeb, 0xaf, 0x63, 0x5e, 0x6d, 0x03, 0xf3,
+ 0x6a, 0x4a, 0xe6, 0xa5, 0x41, 0x87, 0x66, 0x90, 0xdb, 0x05, 0x80, 0x6b,
+ 0xa1, 0x08, 0xcb, 0xe0, 0x39, 0x1f, 0x65, 0x02, 0xe2, 0x5c, 0x5b, 0x25,
+ 0x25, 0x17, 0x9a, 0x69, 0x64, 0x58, 0x6f, 0xf4, 0x22, 0x79, 0x45, 0x40,
+ 0x55, 0xf9, 0x52, 0x11, 0xd1, 0xaf, 0xe5, 0x0f, 0x66, 0xab, 0x96, 0x06,
+ 0x6f, 0x91, 0x98, 0xda, 0x38, 0x5c, 0x43, 0xdf, 0x7a, 0xe6, 0xf5, 0x80,
+ 0xe5, 0x37, 0xdb, 0x74, 0x40, 0x43, 0xc5, 0x33, 0xad, 0x96, 0x41, 0x31,
+ 0x02, 0xfb, 0x2c, 0x1f, 0xfb, 0x64, 0x32, 0x37, 0xf1, 0xb9, 0x88, 0xa5,
+ 0x5d, 0xad, 0xa0, 0x4e, 0x64, 0x98, 0x55, 0xcd, 0xaa, 0xf8, 0x19, 0x95,
+ 0xd6, 0x81, 0x23, 0x9c, 0x95, 0x40, 0x7e, 0x1b, 0xac, 0x43, 0x70, 0x94,
+ 0x65, 0x3c, 0x8b, 0x65, 0xed, 0x65, 0xca, 0xcd, 0x49, 0x52, 0xfd, 0x79,
+ 0x3d, 0x83, 0x57, 0xf6, 0x19, 0x53, 0x59, 0x9a, 0x6a, 0x9e, 0x55, 0x4d,
+ 0xbc, 0x51, 0x66, 0xe2, 0x19, 0xb2, 0x3f, 0x3e, 0x04, 0xc4, 0xb4, 0xa0,
+ 0xb1, 0xc4, 0xba, 0xf3, 0x28, 0x32, 0xdc, 0x99, 0x19, 0xe1, 0x07, 0xcb,
+ 0x7d, 0xa9, 0x92, 0x4a, 0xda, 0xe9, 0x11, 0xe7, 0x8f, 0xbb, 0x49, 0x1f,
+ 0x9f, 0x30, 0x47, 0x9f, 0x9f, 0x74, 0x4b, 0xe5, 0x9e, 0xb2, 0x8f, 0x1a,
+ 0x24, 0x3a, 0xce, 0x37, 0x6c, 0x7f, 0x28, 0x8c, 0xf6, 0x2a, 0x29, 0xc9,
+ 0x63, 0x1f, 0x01, 0x99, 0x29, 0x5e, 0xfd, 0xdc, 0x46, 0xae, 0xfc, 0x1d,
+ 0x8f, 0xec, 0xbb, 0x08, 0x26, 0x6f, 0xe0, 0x00, 0xc3, 0x20, 0xa5, 0x7b,
+ 0xc1, 0x06, 0x4f, 0x93, 0xc0, 0xb4, 0xb0, 0xe4, 0x09, 0x36, 0x0e, 0x35,
+ 0x77, 0x95, 0x13, 0x04, 0x4f, 0xa5, 0x7f, 0xe3, 0x32, 0xd1, 0x92, 0x6f,
+ 0x40, 0xe7, 0x90, 0x90, 0x4a, 0x3e, 0xb0, 0xc9, 0x5f, 0x70, 0x99, 0x7e,
+ 0xad, 0x59, 0xef, 0xb1, 0x93, 0x3a, 0xb4, 0x95, 0xf2, 0x6b, 0xcd, 0xfa,
+ 0x67, 0x8a, 0x09, 0x4c, 0xce, 0xd5, 0x00, 0x91, 0x2f, 0x52, 0x93, 0x4f,
+ 0x96, 0xf2, 0x77, 0x25, 0x6c, 0x94, 0xc0, 0x66, 0xa2, 0x7f, 0x50, 0x99,
+ 0x01, 0xa8, 0x55, 0x63, 0xac, 0xaf, 0x01, 0x28, 0x8e, 0x37, 0x91, 0xa1,
+ 0x8a, 0xa5, 0xbe, 0x44, 0x0c, 0x1a, 0xab, 0x86, 0xa9, 0xbe, 0x44, 0x30,
+ 0xfb, 0x92, 0x33, 0x63, 0x81, 0xd6, 0x01, 0xf6, 0x04, 0xc9, 0x77, 0x10,
+ 0x80, 0xe5, 0xa9, 0x7f, 0x44, 0x3d, 0x27, 0xdf, 0x91, 0x09, 0x36, 0x1b,
+ 0xe4, 0xd2, 0xf2, 0xba, 0x3f, 0x10, 0xe6, 0x03, 0xbc, 0x99, 0xd5, 0x3f,
+ 0xe8, 0xee, 0xe7, 0xd5, 0x80, 0x1e, 0xf1, 0x01, 0x29, 0x4a, 0x33, 0x32,
+ 0x9e, 0x4f, 0x51, 0x92, 0x08, 0x4a, 0x3c, 0x4b, 0x1a, 0x92, 0x6f, 0x9e,
+ 0xe6, 0xc2, 0xe5, 0xe2, 0x6e, 0xa3, 0xd3, 0xd7, 0x37, 0xf7, 0x64, 0x82,
+ 0xff, 0x23, 0x4b, 0xfc, 0x10, 0x61, 0x89, 0x63, 0x7d, 0x6e, 0xd9, 0xbb,
+ 0x1a, 0xb4, 0x62, 0x0f, 0xa7, 0x3e, 0x93, 0xe5, 0x87, 0xbd, 0xb9, 0x47,
+ 0xdd, 0x40, 0x1c, 0xb0, 0xdf, 0x07, 0xde, 0x1a, 0x7d, 0xb6, 0x81, 0x05,
+ 0x54, 0x58, 0x84, 0xc5, 0x8e, 0x79, 0x69, 0x58, 0xf6, 0x32, 0x16, 0x77,
+ 0x91, 0xfb, 0x94, 0x37, 0x10, 0x54, 0x1c, 0xf3, 0x01, 0x5a, 0x41, 0x52,
+ 0x91, 0x9b, 0x4c, 0xd7, 0x83, 0x3e, 0x7b, 0x7c, 0xd4, 0x57, 0x9b, 0x99,
+ 0x46, 0xf4, 0xcc, 0x2b, 0x7f, 0x88, 0xd7, 0x78, 0xd8, 0x06, 0x5f, 0xe5,
+ 0x95, 0x39, 0x9b, 0x0e, 0xd9, 0xe0, 0x2b, 0xd0, 0x02, 0xc2, 0x6b, 0x72,
+ 0x30, 0x11, 0xc0, 0xe7, 0xe8, 0xf6, 0x50, 0xfe, 0xfd, 0xf5, 0xb2, 0x48,
+ 0xc4, 0xe4, 0x7b, 0xf2, 0x04, 0xe3, 0x2a, 0xb5, 0xf6, 0x73, 0x13, 0xa6,
+ 0x76, 0xed, 0xd9, 0x21, 0x9f, 0x98, 0x26, 0x4e, 0x0b, 0x0e, 0xd8, 0xcf,
+ 0xf5, 0x7e, 0xa5, 0xa9, 0x67, 0xa2, 0xf7, 0x29, 0x93, 0x30, 0xd8, 0x3b,
+ 0x0f, 0xc4, 0x92, 0x6e, 0x9c, 0x1b, 0x68, 0xf5, 0xea, 0x1d, 0x45, 0x5e,
+ 0x42, 0xd5, 0xbe, 0x85, 0x5e, 0x26, 0xa6, 0x4a, 0x5f, 0x76, 0xb7, 0xcd,
+ 0x3e, 0x77, 0x5d, 0xf9, 0xbb, 0xed, 0xc2, 0x67, 0xd4, 0xcb, 0xf8, 0xbe,
+ 0xe5, 0x97, 0xc8, 0xcb, 0xf9, 0x56, 0xfb, 0xce, 0x78, 0x09, 0x6e, 0x5c,
+ 0x69, 0x3c, 0xf4, 0x8d, 0x9f, 0x1e, 0x37, 0xba, 0x05, 0xbd, 0x73, 0x57,
+ 0x8a, 0xc7, 0x39, 0x06, 0x6f, 0x92, 0xc5, 0x0b, 0x1d, 0x9e, 0x05, 0x6a,
+ 0x18, 0xbc, 0x7b, 0xe2, 0x53, 0x7b, 0x24, 0x0f, 0x3a, 0x59, 0xfe, 0x5c,
+ 0x17, 0x86, 0xf2, 0x46, 0x32, 0xbd, 0xea, 0xe2, 0x4b, 0xa0, 0xf7, 0x44,
+ 0xf8, 0x52, 0xd5, 0x45, 0x24, 0xa7, 0x5f, 0xbe, 0xc6, 0xae, 0xf6, 0x2a,
+ 0x92, 0x93, 0x47, 0x8f, 0x41, 0x18, 0x6b, 0xeb, 0x21, 0x38, 0x3d, 0xcd,
+ 0xdf, 0x68, 0x2a, 0x23, 0x24, 0x6a, 0x7b, 0x61, 0xd5, 0xd9, 0xff, 0x81,
+ 0xef, 0xf0, 0x2a, 0xcb, 0x02, 0x66, 0xde, 0x03, 0x39, 0x7b, 0x90, 0xd0,
+ 0x39, 0x7e, 0x85, 0xf0, 0x5f, 0x1b, 0x7a, 0xe7, 0x74, 0x43, 0xee, 0x3b,
+ 0xc9, 0xbb, 0xa4, 0x52, 0x3e, 0x06, 0x38, 0x2a, 0x24, 0x36, 0x74, 0xc8,
+ 0xe6, 0x1e, 0x9f, 0xb2, 0xfc, 0x55, 0x81, 0x49, 0x21, 0x4a, 0x29, 0x8f,
+ 0x16, 0xc8, 0x96, 0x92, 0x85, 0x9c, 0x85, 0xb8, 0x0c, 0x48, 0xbc, 0x55,
+ 0x01, 0xc3, 0xcc, 0xb1, 0xbf, 0x83, 0x22, 0x5a, 0x81, 0xb5, 0xd1, 0x2d,
+ 0x68, 0x12, 0x28, 0x0f, 0xbc, 0x27, 0x2f, 0x02, 0xd1, 0x0d, 0x0c, 0xf6,
+ 0x6f, 0x96, 0x07, 0x2d, 0xdd, 0x49, 0x8d, 0x20, 0x32, 0xc9, 0x9e, 0xff,
+ 0x90, 0x7c, 0xed, 0x30, 0xb9, 0xe9, 0xa7, 0x88, 0x54, 0x91, 0x1b, 0x1f,
+ 0xf6, 0x90, 0x9f, 0xe1, 0x09, 0x49, 0xc1, 0x43, 0x68, 0x4a, 0xed, 0x0b,
+ 0x2c, 0xea, 0x45, 0x67, 0xc1, 0x6b, 0xd0, 0x21, 0x1e, 0x45, 0x26, 0x78,
+ 0xa5, 0x79, 0x7f, 0x00, 0x87, 0x2a, 0xfd, 0x49, 0x85, 0x3c, 0xc5, 0x3f,
+ 0xae, 0x74, 0x1a, 0x07, 0x47, 0x50, 0x52, 0x40, 0x7f, 0x90, 0x8b, 0x24,
+ 0x6d, 0x71, 0x14, 0x5f, 0x8e, 0xd8, 0xa4, 0x7e, 0xdd, 0x00, 0xfa, 0x64,
+ 0xab, 0xa1, 0x77, 0xad, 0x2c, 0x9c, 0xc3, 0x6a, 0x64, 0xe7, 0x65, 0x33,
+ 0x48, 0xf9, 0x5c, 0x23, 0x43, 0x58, 0xf9, 0x5c, 0xab, 0x23, 0xa0, 0xe5,
+ 0xc9, 0xde, 0x04, 0x5c, 0x24, 0x80, 0x59, 0xa5, 0x0a, 0xc6, 0x4c, 0xd7,
+ 0xed, 0xdb, 0xd8, 0xb2, 0x55, 0xcd, 0x0e, 0xf5, 0x47, 0x2d, 0x95, 0x1d,
+ 0xea, 0x7d, 0xa8, 0xc2, 0x0e, 0xf5, 0x4e, 0xdc, 0x3f, 0xec, 0xd2, 0x76,
+ 0x2d, 0xc0, 0x7e, 0xd5, 0x37, 0x11, 0xef, 0xf6, 0xfb, 0x23, 0xec, 0x77,
+ 0xee, 0xbe, 0xc4, 0x07, 0x7c, 0xba, 0x4e, 0xa2, 0x4a, 0x72, 0xa5, 0x87,
+ 0x7f, 0x4f, 0xee, 0x23, 0xea, 0xa8, 0x7f, 0x71, 0xb9, 0xb8, 0x1e, 0x0f,
+ 0x86, 0xbf, 0x5a, 0x60, 0x5e, 0xcc, 0x0d, 0x6b, 0xec, 0x80, 0x4e, 0x5e,
+ 0x18, 0x28, 0x11, 0xe2, 0x1a, 0x85, 0x54, 0x16, 0x10, 0xcb, 0xb9, 0x84,
+ 0x4d, 0x02, 0xbf, 0x44, 0x6e, 0xe7, 0x42, 0x54, 0x04, 0x49, 0x69, 0x02,
+ 0x56, 0xa7, 0x0c, 0x0e, 0xf3, 0x0c, 0xe3, 0x72, 0x86, 0x8d, 0xca, 0x0c,
+ 0xa3, 0x52, 0x86, 0x1c, 0x2c, 0x50, 0x7d, 0x63, 0x87, 0x3e, 0x87, 0xce,
+ 0x5e, 0x7a, 0x1f, 0x09, 0x81, 0xf8, 0xaa, 0x6d, 0x95, 0x36, 0x27, 0x95,
+ 0x51, 0x68, 0x3a, 0x9c, 0x0c, 0xbb, 0x73, 0xad, 0x9d, 0xf2, 0x06, 0xfd,
+ 0x51, 0xf7, 0x7a, 0xa2, 0xb5, 0x50, 0x4e, 0x7f, 0x7d, 0x35, 0x9d, 0x8e,
+ 0xa7, 0xda, 0x98, 0x97, 0x37, 0xe8, 0x8d, 0xa7, 0x83, 0xe1, 0x54, 0x1b,
+ 0xf4, 0xe6, 0x7b, 0x58, 0x8c, 0x6f, 0xfa, 0xc3, 0x62, 0xec, 0x5b, 0xb2,
+ 0x06, 0xe6, 0xa5, 0x52, 0x34, 0x58, 0xf1, 0xef, 0x52, 0x24, 0x91, 0x5d,
+ 0xa9, 0x14, 0x0d, 0xee, 0x08, 0xde, 0xa5, 0x48, 0x22, 0x78, 0xf7, 0x39,
+ 0xa9, 0x98, 0x4b, 0xab, 0x25, 0xe0, 0xb7, 0xca, 0xf2, 0x36, 0xad, 0xdd,
+ 0xed, 0xc9, 0xb1, 0x3c, 0x78, 0x2e, 0xb2, 0xd1, 0xef, 0xd9, 0xfd, 0xc0,
+ 0xdd, 0x6c, 0xbc, 0x95, 0x47, 0x92, 0x33, 0xdb, 0x20, 0xf8, 0xfd, 0xb0,
+ 0x57, 0x8c, 0xc8, 0xfc, 0x93, 0x94, 0xdd, 0x43, 0x1c, 0x8c, 0xc6, 0x83,
+ 0xff, 0x26, 0x49, 0x5a, 0xb4, 0xf4, 0xf0, 0xa1, 0x2a, 0xf0, 0xb7, 0x00,
+ 0x42, 0x84, 0x3e, 0x7d, 0xd5, 0x23, 0x8d, 0xa3, 0xc3, 0x7e, 0xbf, 0xf5,
+ 0xa0, 0x90, 0x44, 0x9f, 0xa8, 0xbf, 0x74, 0xc8, 0x83, 0x42, 0x54, 0xfa,
+ 0x45, 0x5b, 0x7d, 0x64, 0x7e, 0xe1, 0xc6, 0xab, 0xc7, 0x72, 0x26, 0x85,
+ 0x20, 0xbd, 0xc8, 0x65, 0xc4, 0xa4, 0x0b, 0xe7, 0x24, 0xe4, 0x4c, 0x34,
+ 0x68, 0x7c, 0xe9, 0x2e, 0x6c, 0x0f, 0xb4, 0x2b, 0x87, 0x93, 0xa6, 0xe8,
+ 0x1c, 0x9a, 0xa5, 0x93, 0x36, 0x68, 0x0c, 0xb4, 0x8b, 0x26, 0x25, 0xed,
+ 0x1f, 0x96, 0x04, 0xcf, 0x79, 0xaf, 0x5d, 0x34, 0x69, 0x03, 0x6b, 0xa0,
+ 0xcd, 0x58, 0x70, 0x52, 0x06, 0x89, 0xa3, 0x4d, 0x59, 0x64, 0x83, 0x79,
+ 0xf4, 0xb0, 0x9c, 0x69, 0xe9, 0x4d, 0x31, 0x71, 0x21, 0x5d, 0xf6, 0x1d,
+ 0x11, 0xe4, 0x58, 0x9a, 0x20, 0x27, 0x6d, 0x19, 0xd1, 0xa4, 0x58, 0x48,
+ 0x53, 0x61, 0x68, 0x49, 0xc0, 0x7c, 0xaa, 0x7e, 0xb3, 0xa1, 0x90, 0x93,
+ 0x88, 0xc9, 0xbd, 0xd0, 0x33, 0xe1, 0xd7, 0x20, 0xa7, 0xfe, 0x38, 0xe9,
+ 0x09, 0xbe, 0x52, 0xd4, 0x1f, 0x14, 0x45, 0x86, 0xb6, 0x9e, 0xa1, 0xfe,
+ 0x00, 0x29, 0x32, 0xb4, 0xf4, 0x0c, 0xf5, 0x37, 0x96, 0x22, 0xc3, 0xba,
+ 0x9e, 0x21, 0x08, 0xab, 0xaa, 0x55, 0xb1, 0xee, 0xf3, 0x36, 0x23, 0xf7,
+ 0x09, 0x07, 0x72, 0x75, 0x52, 0x06, 0x46, 0x4e, 0x18, 0xab, 0x78, 0xfb,
+ 0x0a, 0x7f, 0xef, 0xd3, 0x2e, 0xa2, 0x80, 0x28, 0x99, 0x91, 0x8f, 0x86,
+ 0xaa, 0xe2, 0x5f, 0xbb, 0xa5, 0x17, 0x75, 0xca, 0xe7, 0xb6, 0xb7, 0x98,
+ 0x74, 0x81, 0x4a, 0x69, 0xdb, 0xa4, 0x62, 0x98, 0x42, 0x4c, 0x51, 0x4e,
+ 0x10, 0x03, 0x7d, 0x02, 0xea, 0x82, 0xe4, 0x29, 0x11, 0x71, 0xcb, 0x40,
+ 0x7a, 0xc8, 0xe6, 0x49, 0x8e, 0xa8, 0xa3, 0x40, 0x3a, 0xbc, 0x76, 0x5e,
+ 0x58, 0xff, 0xb5, 0x60, 0x53, 0x1b, 0xb8, 0xb1, 0xe3, 0x6d, 0x21, 0x4e,
+ 0xf5, 0x8e, 0x9e, 0x93, 0xe7, 0x1b, 0x70, 0xb2, 0xce, 0xed, 0x3c, 0x27,
+ 0x40, 0x2a, 0x41, 0xc8, 0x92, 0xf1, 0xa4, 0xce, 0x8e, 0x3e, 0x88, 0x86,
+ 0x38, 0x9d, 0xe9, 0xd1, 0x67, 0x24, 0x46, 0x4b, 0xa1, 0x82, 0x5a, 0x42,
+ 0xbc, 0xd4, 0x8b, 0x59, 0xe2, 0xf3, 0x40, 0x50, 0xfd, 0x21, 0x46, 0x06,
+ 0xe5, 0x55, 0x12, 0xa3, 0x10, 0x86, 0xd7, 0x37, 0x46, 0x68, 0x98, 0xd1,
+ 0xeb, 0xdc, 0x39, 0xa2, 0xb0, 0x52, 0xab, 0x60, 0x47, 0x0a, 0x7e, 0x42,
+ 0xf7, 0x5f, 0x07, 0xf0, 0xaa, 0xda, 0x32, 0x38, 0xf6, 0x53, 0xae, 0xc3,
+ 0x79, 0x1f, 0xc2, 0x7e, 0xa0, 0xa5, 0x33, 0xea, 0x10, 0x08, 0xe6, 0x95,
+ 0x3a, 0x8b, 0xaf, 0xa8, 0xfb, 0xe2, 0x41, 0x9f, 0x93, 0xb4, 0x0d, 0xee,
+ 0xf6, 0x45, 0x46, 0x5f, 0x94, 0x8c, 0x0c, 0xe0, 0x7f, 0x66, 0xcf, 0xde,
+ 0x1f, 0x7f, 0x90, 0x78, 0x08, 0x88, 0x97, 0x39, 0x72, 0xbd, 0x3a, 0x60,
+ 0x4e, 0xdb, 0x13, 0x0c, 0x68, 0xed, 0x96, 0x9f, 0x12, 0x8f, 0x85, 0xef,
+ 0xe3, 0xaa, 0x76, 0xfb, 0x94, 0x76, 0x2a, 0xdc, 0x39, 0xa8, 0xb6, 0xfb,
+ 0x94, 0xf6, 0x92, 0x59, 0xa1, 0x66, 0xaf, 0x4f, 0xa9, 0x7b, 0xd4, 0xf6,
+ 0x35, 0xbb, 0x7d, 0x26, 0x1f, 0xb6, 0xe2, 0x2a, 0x3c, 0x97, 0xca, 0xba,
+ 0x01, 0x24, 0x6b, 0x19, 0x14, 0xc9, 0xa5, 0x73, 0x82, 0x9a, 0x17, 0xde,
+ 0xcd, 0x29, 0x9b, 0x4f, 0xa1, 0xe6, 0x06, 0x25, 0x8a, 0x0c, 0x4a, 0xb8,
+ 0x80, 0x61, 0x0f, 0x1c, 0xaa, 0xb2, 0x53, 0x00, 0x04, 0x78, 0x0f, 0xe4,
+ 0xb4, 0xa6, 0x97, 0x3d, 0xc0, 0x56, 0xeb, 0x06, 0x30, 0x2a, 0xe9, 0xd6,
+ 0x28, 0xde, 0x9f, 0x08, 0xf9, 0xb4, 0xba, 0x01, 0xf6, 0x18, 0x2b, 0x7c,
+ 0xa6, 0x93, 0xcb, 0x06, 0x21, 0xc0, 0x86, 0xeb, 0xbd, 0xa7, 0x58, 0x39,
+ 0x0d, 0x70, 0x30, 0xf8, 0x44, 0xa4, 0x00, 0x08, 0x04, 0x30, 0x68, 0x19,
+ 0x38, 0x3a, 0x86, 0xc4, 0x98, 0x54, 0x73, 0x9f, 0xb4, 0x9b, 0x35, 0x72,
+ 0xd3, 0x86, 0xf7, 0x04, 0x7a, 0x53, 0xc5, 0x5f, 0x4c, 0x95, 0x21, 0x23,
+ 0xab, 0x85, 0x4b, 0x63, 0xc2, 0x3d, 0x29, 0x15, 0xe3, 0x97, 0x34, 0x95,
+ 0x2f, 0x44, 0x09, 0x98, 0x2e, 0x3e, 0xb0, 0xb1, 0xc3, 0xe3, 0x85, 0xb7,
+ 0x15, 0x79, 0x15, 0x03, 0x2d, 0xa5, 0xa8, 0xae, 0x9d, 0x07, 0x25, 0x1f,
+ 0x15, 0x6e, 0x1c, 0x10, 0x5f, 0xb9, 0xfe, 0x61, 0x97, 0x9b, 0x25, 0x45,
+ 0xb4, 0x64, 0xcf, 0x10, 0xb3, 0xea, 0x6d, 0x28, 0x66, 0x17, 0xe8, 0xd0,
+ 0x14, 0x80, 0x52, 0x31, 0x68, 0x84, 0x66, 0x37, 0xe3, 0xe9, 0x35, 0x80,
+ 0x3d, 0xa6, 0x6d, 0x7a, 0x09, 0xbd, 0x5f, 0x36, 0x68, 0x95, 0xeb, 0xb1,
+ 0x51, 0xa1, 0x6d, 0xaf, 0xdb, 0x01, 0x52, 0x05, 0x46, 0xed, 0x72, 0xbd,
+ 0xb6, 0xaa, 0xb4, 0x16, 0xe1, 0x57, 0xda, 0x95, 0xba, 0xe5, 0x8f, 0x11,
+ 0xcf, 0x2a, 0x75, 0xb8, 0xe8, 0xf2, 0xef, 0xc4, 0x77, 0xaa, 0xb4, 0xb4,
+ 0xea, 0x8b, 0x2e, 0x7f, 0x9e, 0x7e, 0x6e, 0xde, 0x54, 0x78, 0x0e, 0x69,
+ 0x55, 0xb0, 0x1f, 0xab, 0x9d, 0x37, 0xa0, 0x2a, 0x16, 0x24, 0x76, 0x5a,
+ 0xc5, 0x86, 0x8a, 0xbd, 0x56, 0xb3, 0x22, 0xb1, 0xdf, 0x6a, 0x76, 0x54,
+ 0xec, 0xb9, 0x82, 0x25, 0x59, 0xed, 0x0b, 0xde, 0xae, 0x8a, 0x21, 0x49,
+ 0x0d, 0x2b, 0x59, 0x52, 0x57, 0x6a, 0x5a, 0xc1, 0x94, 0x2c, 0xeb, 0x62,
+ 0x71, 0x49, 0xfe, 0x81, 0x4d, 0x4a, 0xe0, 0x50, 0xc1, 0xa2, 0x2e, 0x7b,
+ 0xe7, 0x8b, 0x21, 0x5f, 0x38, 0xb6, 0xb9, 0x55, 0x0d, 0x86, 0x93, 0xf9,
+ 0x27, 0xd4, 0x1f, 0x5f, 0x4f, 0xc4, 0x47, 0xba, 0xe6, 0x86, 0xc5, 0xdb,
+ 0x0b, 0xaf, 0x81, 0x6d, 0x73, 0x03, 0xe3, 0xed, 0x1b, 0x36, 0x9f, 0xbb,
+ 0x6d, 0x6e, 0x63, 0x94, 0x81, 0xdd, 0x5c, 0x7c, 0x11, 0x5c, 0x64, 0x05,
+ 0x3b, 0x13, 0x06, 0x5d, 0xc5, 0xba, 0x78, 0x2b, 0x6e, 0x5b, 0xa4, 0x34,
+ 0x3f, 0xbd, 0x37, 0x3d, 0xf8, 0x1b, 0xba, 0x47, 0xe1, 0xe3, 0x27, 0xbd,
+ 0x71, 0x4b, 0xd3, 0x71, 0xd9, 0xc3, 0x2c, 0x5d, 0x17, 0x33, 0x61, 0x3a,
+ 0x67, 0xba, 0x2e, 0x92, 0x4f, 0xb4, 0x56, 0xee, 0x43, 0x54, 0x79, 0x47,
+ 0xd7, 0x49, 0xf2, 0xa5, 0xd2, 0xaa, 0x9d, 0x4c, 0x25, 0xbd, 0x9e, 0xeb,
+ 0x7a, 0x21, 0x31, 0xcc, 0x31, 0xbd, 0x5c, 0x8a, 0xdd, 0x70, 0xb8, 0x1f,
+ 0xa5, 0xc4, 0xec, 0xe3, 0xd4, 0x42, 0x96, 0xb9, 0xd4, 0x93, 0x90, 0xe1,
+ 0x36, 0x68, 0xdb, 0x59, 0xe0, 0xf3, 0xa3, 0x00, 0x87, 0x56, 0xc5, 0x11,
+ 0xe3, 0xc6, 0x93, 0xdb, 0x9b, 0xfe, 0xa7, 0xf9, 0xa7, 0xe9, 0xf8, 0xf6,
+ 0xf2, 0xd3, 0xa2, 0x3b, 0x9a, 0x7c, 0x12, 0xb6, 0x2e, 0x06, 0x32, 0x64,
+ 0xec, 0x71, 0x16, 0xc3, 0x6e, 0x9f, 0xb7, 0x35, 0x5f, 0x30, 0xb3, 0xab,
+ 0xcb, 0x9b, 0xe1, 0x60, 0x51, 0xe0, 0x50, 0x65, 0x93, 0xcf, 0xb7, 0x35,
+ 0x77, 0xcf, 0x69, 0xef, 0x05, 0x16, 0x67, 0x95, 0xd4, 0xd0, 0x65, 0x7a,
+ 0x90, 0x59, 0x98, 0x3b, 0xec, 0xaf, 0xfd, 0x5e, 0x7f, 0xba, 0x18, 0xdd,
+ 0x5e, 0x77, 0x79, 0x6b, 0x73, 0x67, 0xcd, 0x5a, 0x37, 0xed, 0xfa, 0xa2,
+ 0x8f, 0x35, 0x29, 0xf0, 0x68, 0xd6, 0xcd, 0x27, 0xd1, 0xeb, 0x73, 0xcd,
+ 0x37, 0xcd, 0x47, 0xde, 0x13, 0x8c, 0xaf, 0x69, 0x3e, 0xe4, 0x5e, 0xbf,
+ 0xc1, 0xa3, 0xb9, 0x0a, 0xa3, 0xec, 0xce, 0xe6, 0xfd, 0x45, 0xf3, 0x45,
+ 0xc0, 0xdd, 0x33, 0x37, 0x34, 0xda, 0xb6, 0x25, 0xb6, 0x35, 0x37, 0xb1,
+ 0xa4, 0xad, 0x10, 0x49, 0x9a, 0x9b, 0x18, 0x6d, 0xdb, 0x16, 0xdb, 0x9a,
+ 0x07, 0x01, 0x49, 0x5b, 0xee, 0x55, 0xdb, 0xe6, 0xaa, 0xa1, 0x6d, 0x3b,
+ 0x62, 0xbf, 0xe6, 0xfa, 0x49, 0xda, 0xf2, 0x7e, 0xcf, 0xcc, 0x03, 0x80,
+ 0xa4, 0x2d, 0xdf, 0x6e, 0xce, 0xcc, 0x37, 0x7f, 0xda, 0xd6, 0xaa, 0x0b,
+ 0x83, 0x3e, 0x33, 0xf7, 0x68, 0x69, 0x63, 0x61, 0xd4, 0xe6, 0xbb, 0x7e,
+ 0xda, 0x58, 0x18, 0x76, 0x45, 0xd3, 0xc2, 0x8d, 0xc5, 0xf2, 0xcf, 0x8a,
+ 0xc6, 0x65, 0xd9, 0x52, 0xeb, 0x8a, 0xe6, 0x45, 0x5a, 0xf3, 0x85, 0x78,
+ 0x76, 0x96, 0x1e, 0x55, 0xc9, 0xf1, 0xf4, 0xb7, 0x9f, 0x7e, 0x39, 0x7d,
+ 0x5a, 0xbd, 0xec, 0xb6, 0xbf, 0xfd, 0xf4, 0x7f, 0xf9, 0x0b, 0x6a, 0x39,
+
+};
diff -Nru mesa-17.2.4/src/broadcom/clif/clif_dump.c mesa-17.3.3/src/broadcom/clif/clif_dump.c
--- mesa-17.2.4/src/broadcom/clif/clif_dump.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/clif/clif_dump.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,281 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include
+#include
+#include "clif_dump.h"
+#include "util/list.h"
+#include "util/ralloc.h"
+
+#include "broadcom/cle/v3d_decoder.h"
+
+#define __gen_user_data void
+#define __gen_address_type uint32_t
+#define __gen_address_offset(reloc) (*reloc)
+#define __gen_emit_reloc(cl, reloc)
+#define __gen_unpack_address(cl, s, e) (__gen_unpack_uint(cl, s, e) << (31 - (e - s)))
+
+enum reloc_worklist_type {
+ reloc_gl_shader_state,
+};
+
+struct reloc_worklist_entry {
+ struct list_head link;
+
+ enum reloc_worklist_type type;
+ uint32_t addr;
+
+ union {
+ struct {
+ uint32_t num_attrs;
+ } shader_state;
+ };
+};
+
+struct clif_dump {
+ const struct v3d_device_info *devinfo;
+ bool (*lookup_vaddr)(void *data, uint32_t addr, void **vaddr);
+ FILE *out;
+ /* Opaque data from the caller that is passed to the callbacks. */
+ void *data;
+
+ struct v3d_spec *spec;
+
+ /* List of struct reloc_worklist_entry */
+ struct list_head worklist;
+};
+
+static void
+out(struct clif_dump *clif, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(clif->out, fmt, args);
+ va_end(args);
+}
+
+#include "broadcom/cle/v3d_packet_v33_pack.h"
+
+static struct reloc_worklist_entry *
+clif_dump_add_address_to_worklist(struct clif_dump *clif,
+ enum reloc_worklist_type type,
+ uint32_t addr)
+{
+ struct reloc_worklist_entry *entry =
+ rzalloc(clif, struct reloc_worklist_entry);
+ if (!entry)
+ return NULL;
+
+ entry->type = type;
+ entry->addr = addr;
+
+ list_addtail(&entry->link, &clif->worklist);
+
+ return entry;
+}
+
+struct clif_dump *
+clif_dump_init(const struct v3d_device_info *devinfo,
+ FILE *out,
+ bool (*lookup_vaddr)(void *data, uint32_t addr, void **vaddr),
+ void *data)
+{
+ struct clif_dump *clif = rzalloc(NULL, struct clif_dump);
+
+ clif->devinfo = devinfo;
+ clif->lookup_vaddr = lookup_vaddr;
+ clif->out = out;
+ clif->data = data;
+ clif->spec = v3d_spec_load(devinfo);
+
+ list_inithead(&clif->worklist);
+
+ return clif;
+}
+
+void
+clif_dump_destroy(struct clif_dump *clif)
+{
+ ralloc_free(clif);
+}
+
+#define out_uint(_clif, field) out(_clif, " /* %s = */ %u\n", \
+ #field, values-> field);
+
+static bool
+clif_dump_packet(struct clif_dump *clif, uint32_t offset, const uint8_t *cl,
+ uint32_t *size)
+{
+ struct v3d_group *inst = v3d_spec_find_instruction(clif->spec, cl);
+ if (!inst) {
+ out(clif, "0x%08x: Unknown packet %d!\n", offset, *cl);
+ return false;
+ }
+
+ *size = v3d_group_get_length(inst);
+
+ out(clif, "%s\n", v3d_group_get_name(inst));
+ v3d_print_group(clif->out, inst, 0, cl, "");
+
+ switch (*cl) {
+ case V3D33_GL_SHADER_STATE_opcode: {
+ struct V3D33_GL_SHADER_STATE values;
+ V3D33_GL_SHADER_STATE_unpack(cl, &values);
+
+ struct reloc_worklist_entry *reloc =
+ clif_dump_add_address_to_worklist(clif,
+ reloc_gl_shader_state,
+ values.address);
+ if (reloc) {
+ reloc->shader_state.num_attrs =
+ values.number_of_attribute_arrays;
+ }
+ return true;
+ }
+
+ case V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED_opcode: {
+ struct V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED values;
+ V3D33_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED_unpack(cl, &values);
+
+ if (values.last_tile_of_frame)
+ return false;
+ break;
+ }
+
+ case V3D33_TRANSFORM_FEEDBACK_ENABLE_opcode: {
+ struct V3D33_TRANSFORM_FEEDBACK_ENABLE values;
+ V3D33_TRANSFORM_FEEDBACK_ENABLE_unpack(cl, &values);
+ struct v3d_group *spec = v3d_spec_find_struct(clif->spec,
+ "Transform Feedback Output Data Spec");
+ struct v3d_group *addr = v3d_spec_find_struct(clif->spec,
+ "Transform Feedback Output Address");
+ assert(spec);
+ assert(addr);
+
+ cl += *size;
+
+ for (int i = 0; i < values.number_of_16_bit_output_data_specs_following; i++) {
+ v3d_print_group(clif->out, spec, 0, cl, "");
+ cl += v3d_group_get_length(spec);
+ *size += v3d_group_get_length(spec);
+ }
+
+ for (int i = 0; i < values.number_of_32_bit_output_buffer_address_following; i++) {
+ v3d_print_group(clif->out, addr, 0, cl, "");
+ cl += v3d_group_get_length(addr);
+ *size += v3d_group_get_length(addr);
+ }
+ break;
+ }
+
+ case V3D33_HALT_opcode:
+ return false;
+ }
+
+ return true;
+}
+
+static void
+clif_dump_gl_shader_state_record(struct clif_dump *clif,
+ struct reloc_worklist_entry *reloc, void *vaddr)
+{
+ struct v3d_group *state = v3d_spec_find_struct(clif->spec,
+ "GL Shader State Record");
+ struct v3d_group *attr = v3d_spec_find_struct(clif->spec,
+ "GL Shader State Attribute Record");
+ assert(state);
+ assert(attr);
+
+ out(clif, "GL Shader State Record at 0x%08x\n", reloc->addr);
+ v3d_print_group(clif->out, state, 0, vaddr, "");
+ vaddr += v3d_group_get_length(state);
+
+ for (int i = 0; i < reloc->shader_state.num_attrs; i++) {
+ out(clif, " Attribute %d\n", i);
+ v3d_print_group(clif->out, attr, 0, vaddr, "");
+ vaddr += v3d_group_get_length(attr);
+ }
+}
+
+static void
+clif_process_worklist(struct clif_dump *clif)
+{
+ while (!list_empty(&clif->worklist)) {
+ struct reloc_worklist_entry *reloc =
+ list_first_entry(&clif->worklist,
+ struct reloc_worklist_entry, link);
+ list_del(&reloc->link);
+
+ void *vaddr;
+ if (!clif->lookup_vaddr(clif->data, reloc->addr, &vaddr)) {
+ out(clif, "Failed to look up address 0x%08x\n",
+ reloc->addr);
+ continue;
+ }
+
+ switch (reloc->type) {
+ case reloc_gl_shader_state:
+ clif_dump_gl_shader_state_record(clif, reloc, vaddr);
+ break;
+ }
+ out(clif, "\n");
+ }
+}
+
+void
+clif_dump_add_cl(struct clif_dump *clif, uint32_t start, uint32_t end)
+{
+ uint32_t size;
+
+ void *start_vaddr;
+ if (!clif->lookup_vaddr(clif->data, start, &start_vaddr)) {
+ out(clif, "Failed to look up address 0x%08x\n",
+ start);
+ return;
+ }
+
+ /* The end address is optional (for example, a BRANCH instruction
+ * won't set an end), but is used for BCL/RCL termination.
+ */
+ void *end_vaddr = NULL;
+ if (end && !clif->lookup_vaddr(clif->data, end, &end_vaddr)) {
+ out(clif, "Failed to look up address 0x%08x\n",
+ end);
+ return;
+ }
+
+ uint8_t *cl = start_vaddr;
+ while (clif_dump_packet(clif, start, cl, &size)) {
+ cl += size;
+ start += size;
+
+ if (cl == end_vaddr)
+ break;
+ }
+
+ out(clif, "\n");
+
+ clif_process_worklist(clif);
+}
diff -Nru mesa-17.2.4/src/broadcom/clif/clif_dump.h mesa-17.3.3/src/broadcom/clif/clif_dump.h
--- mesa-17.2.4/src/broadcom/clif/clif_dump.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/clif/clif_dump.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CLIF_DUMP_H
+#define CLIF_DUMP_H
+
+#include
+#include
+
+struct v3d_device_info;
+struct clif_dump;
+
+struct clif_dump *clif_dump_init(const struct v3d_device_info *devinfo,
+ FILE *output,
+ bool (*lookup_vaddr)(void *data, uint32_t addr,
+ void **vaddr),
+ void *data);
+void clif_dump_destroy(struct clif_dump *clif);
+
+void clif_dump_add_cl(struct clif_dump *clif, uint32_t start, uint32_t end);
+
+#endif
diff -Nru mesa-17.2.4/src/broadcom/common/v3d_debug.c mesa-17.3.3/src/broadcom/common/v3d_debug.c
--- mesa-17.2.4/src/broadcom/common/v3d_debug.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/common/v3d_debug.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2003 VMware, Inc.
+ * Copyright © 2006 Intel Corporation
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \file v3d_debug.c
+ *
+ * Support for the V3D_DEBUG environment variable, along with other
+ * miscellaneous debugging code.
+ */
+
+#include
+
+#include "common/v3d_debug.h"
+#include "util/macros.h"
+#include "util/debug.h"
+#include "c11/threads.h"
+
+uint32_t V3D_DEBUG = 0;
+
+static const struct debug_control debug_control[] = {
+ { "cl", V3D_DEBUG_CL},
+ { "qpu", V3D_DEBUG_QPU},
+ { "vir", V3D_DEBUG_VIR},
+ { "nir", V3D_DEBUG_NIR},
+ { "tgsi", V3D_DEBUG_TGSI},
+ { "shaderdb", V3D_DEBUG_SHADERDB},
+ { "surface", V3D_DEBUG_SURFACE},
+ { "perf", V3D_DEBUG_PERF},
+ { "norast", V3D_DEBUG_NORAST},
+ { "fs", V3D_DEBUG_FS},
+ { "vs", V3D_DEBUG_VS},
+ { "cs", V3D_DEBUG_CS},
+ { NULL, 0 }
+};
+
+uint32_t
+v3d_debug_flag_for_shader_stage(gl_shader_stage stage)
+{
+ uint32_t flags[] = {
+ [MESA_SHADER_VERTEX] = V3D_DEBUG_VS,
+ [MESA_SHADER_TESS_CTRL] = 0,
+ [MESA_SHADER_TESS_EVAL] = 0,
+ [MESA_SHADER_GEOMETRY] = 0,
+ [MESA_SHADER_FRAGMENT] = V3D_DEBUG_FS,
+ [MESA_SHADER_COMPUTE] = V3D_DEBUG_CS,
+ };
+ STATIC_ASSERT(MESA_SHADER_STAGES == 6);
+ return flags[stage];
+}
+
+static void
+v3d_process_debug_variable_once(void)
+{
+ V3D_DEBUG = parse_debug_string(getenv("V3D_DEBUG"), debug_control);
+
+ if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
+ V3D_DEBUG |= V3D_DEBUG_NORAST;
+}
+
+void
+v3d_process_debug_variable(void)
+{
+ static once_flag v3d_process_debug_variable_flag = ONCE_FLAG_INIT;
+
+ call_once(&v3d_process_debug_variable_flag,
+ v3d_process_debug_variable_once);
+}
diff -Nru mesa-17.2.4/src/broadcom/common/v3d_debug.h mesa-17.3.3/src/broadcom/common/v3d_debug.h
--- mesa-17.2.4/src/broadcom/common/v3d_debug.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/common/v3d_debug.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2003 VMware, Inc.
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef MESA_V3D_DEBUG_H
+#define MESA_V3D_DEBUG_H
+
+#include
+#include "compiler/shader_enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * \file v3d_debug.h
+ *
+ * Basic V3D_DEBUG environment variable handling. This file defines the
+ * list of debugging flags, as well as some macros for handling them.
+ */
+
+extern uint32_t V3D_DEBUG;
+
+#define V3D_DEBUG_SHADERDB (1 << 0)
+#define V3D_DEBUG_TGSI (1 << 1)
+#define V3D_DEBUG_NIR (1 << 2)
+#define V3D_DEBUG_VIR (1 << 3)
+#define V3D_DEBUG_QPU (1 << 4)
+#define V3D_DEBUG_FS (1 << 5)
+#define V3D_DEBUG_VS (1 << 6)
+#define V3D_DEBUG_CS (1 << 7)
+#define V3D_DEBUG_CL (1 << 8)
+#define V3D_DEBUG_SURFACE (1 << 9)
+#define V3D_DEBUG_PERF (1 << 10)
+#define V3D_DEBUG_NORAST (1 << 11)
+#define V3D_DEBUG_ALWAYS_FLUSH (1 << 12)
+
+#ifdef HAVE_ANDROID_PLATFORM
+#define LOG_TAG "BROADCOM-MESA"
+#include
+#ifndef ALOGW
+#define ALOGW LOGW
+#endif
+#define dbg_printf(...) ALOGW(__VA_ARGS__)
+#else
+#define dbg_printf(...) fprintf(stderr, __VA_ARGS__)
+#endif /* HAVE_ANDROID_PLATFORM */
+
+#define DBG(flag, ...) do { \
+ if (unlikely(V3D_DEBUG & (flag))) \
+ dbg_printf(__VA_ARGS__); \
+} while(0)
+
+extern uint32_t v3d_debug_flag_for_shader_stage(gl_shader_stage stage);
+
+extern void v3d_process_debug_variable(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* V3D_DEBUG_H */
diff -Nru mesa-17.2.4/src/broadcom/compiler/meson.build mesa-17.3.3/src/broadcom/compiler/meson.build
--- mesa-17.2.4/src/broadcom/compiler/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,44 @@
+# Copyright © 2017 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libbroadcom_compiler_files = files(
+ 'nir_to_vir.c',
+ 'vir.c',
+ 'vir_dump.c',
+ 'vir_live_variables.c',
+ 'vir_lower_uniforms.c',
+ 'vir_opt_copy_propagate.c',
+ 'vir_opt_dead_code.c',
+ 'vir_register_allocate.c',
+ 'vir_to_qpu.c',
+ 'qpu_schedule.c',
+ 'qpu_validate.c',
+ 'v3d_compiler.h',
+ 'v3d_nir_lower_io.c',
+)
+
+libbroadcom_compiler = static_library(
+ ['broadcom_compiler', v3d_xml_pack, nir_opcodes_h, nir_builder_opcodes_h],
+ libbroadcom_compiler_files,
+ include_directories : [inc_common, inc_broadcom],
+ c_args : [c_vis_args, no_override_init_args],
+ dependencies : [dep_libdrm, dep_valgrind],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/broadcom/compiler/nir_to_vir.c mesa-17.3.3/src/broadcom/compiler/nir_to_vir.c
--- mesa-17.2.4/src/broadcom/compiler/nir_to_vir.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/nir_to_vir.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,2054 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/ralloc.h"
+#include "util/hash_table.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "v3d_compiler.h"
+
+/* We don't do any address packing. */
+#define __gen_user_data void
+#define __gen_address_type uint32_t
+#define __gen_address_offset(reloc) (*reloc)
+#define __gen_emit_reloc(cl, reloc)
+#include "cle/v3d_packet_v33_pack.h"
+
+static struct qreg
+ntq_get_src(struct v3d_compile *c, nir_src src, int i);
+static void
+ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
+
+static void
+resize_qreg_array(struct v3d_compile *c,
+ struct qreg **regs,
+ uint32_t *size,
+ uint32_t decl_size)
+{
+ if (*size >= decl_size)
+ return;
+
+ uint32_t old_size = *size;
+ *size = MAX2(*size * 2, decl_size);
+ *regs = reralloc(c, *regs, struct qreg, *size);
+ if (!*regs) {
+ fprintf(stderr, "Malloc failure\n");
+ abort();
+ }
+
+ for (uint32_t i = old_size; i < *size; i++)
+ (*regs)[i] = c->undef;
+}
+
+static struct qreg
+vir_SFU(struct v3d_compile *c, int waddr, struct qreg src)
+{
+ vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, waddr), src);
+ return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
+}
+
+static struct qreg
+vir_LDTMU(struct v3d_compile *c)
+{
+ vir_NOP(c)->qpu.sig.ldtmu = true;
+ return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
+}
+
+static struct qreg
+indirect_uniform_load(struct v3d_compile *c, nir_intrinsic_instr *intr)
+{
+ struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0);
+ uint32_t offset = nir_intrinsic_base(intr);
+ struct v3d_ubo_range *range = NULL;
+ unsigned i;
+
+ for (i = 0; i < c->num_ubo_ranges; i++) {
+ range = &c->ubo_ranges[i];
+ if (offset >= range->src_offset &&
+ offset < range->src_offset + range->size) {
+ break;
+ }
+ }
+ /* The driver-location-based offset always has to be within a declared
+ * uniform range.
+ */
+ assert(i != c->num_ubo_ranges);
+ if (!c->ubo_range_used[i]) {
+ c->ubo_range_used[i] = true;
+ range->dst_offset = c->next_ubo_dst_offset;
+ c->next_ubo_dst_offset += range->size;
+ }
+
+ offset -= range->src_offset;
+
+ if (range->dst_offset + offset != 0) {
+ indirect_offset = vir_ADD(c, indirect_offset,
+ vir_uniform_ui(c, range->dst_offset +
+ offset));
+ }
+
+ /* Adjust for where we stored the TGSI register base. */
+ vir_ADD_dest(c,
+ vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUA),
+ vir_uniform(c, QUNIFORM_UBO_ADDR, 0),
+ indirect_offset);
+
+ return vir_LDTMU(c);
+}
+
+static struct qreg *
+ntq_init_ssa_def(struct v3d_compile *c, nir_ssa_def *def)
+{
+ struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
+ def->num_components);
+ _mesa_hash_table_insert(c->def_ht, def, qregs);
+ return qregs;
+}
+
+/**
+ * This function is responsible for getting VIR results into the associated
+ * storage for a NIR instruction.
+ *
+ * If it's a NIR SSA def, then we just set the associated hash table entry to
+ * the new result.
+ *
+ * If it's a NIR reg, then we need to update the existing qreg assigned to the
+ * NIR destination with the incoming value. To do that without introducing
+ * new MOVs, we require that the incoming qreg either be a uniform, or be
+ * SSA-defined by the previous VIR instruction in the block and rewritable by
+ * this function. That lets us sneak ahead and insert the SF flag beforehand
+ * (knowing that the previous instruction doesn't depend on flags) and rewrite
+ * its destination to be the NIR reg's destination
+ */
+static void
+ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
+ struct qreg result)
+{
+ struct qinst *last_inst = NULL;
+ if (!list_empty(&c->cur_block->instructions))
+ last_inst = (struct qinst *)c->cur_block->instructions.prev;
+
+ assert(result.file == QFILE_UNIF ||
+ (result.file == QFILE_TEMP &&
+ last_inst && last_inst == c->defs[result.index]));
+
+ if (dest->is_ssa) {
+ assert(chan < dest->ssa.num_components);
+
+ struct qreg *qregs;
+ struct hash_entry *entry =
+ _mesa_hash_table_search(c->def_ht, &dest->ssa);
+
+ if (entry)
+ qregs = entry->data;
+ else
+ qregs = ntq_init_ssa_def(c, &dest->ssa);
+
+ qregs[chan] = result;
+ } else {
+ nir_register *reg = dest->reg.reg;
+ assert(dest->reg.base_offset == 0);
+ assert(reg->num_array_elems == 0);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(c->def_ht, reg);
+ struct qreg *qregs = entry->data;
+
+ /* Insert a MOV if the source wasn't an SSA def in the
+ * previous instruction.
+ */
+ if (result.file == QFILE_UNIF) {
+ result = vir_MOV(c, result);
+ last_inst = c->defs[result.index];
+ }
+
+ /* We know they're both temps, so just rewrite index. */
+ c->defs[last_inst->dst.index] = NULL;
+ last_inst->dst.index = qregs[chan].index;
+
+ /* If we're in control flow, then make this update of the reg
+ * conditional on the execution mask.
+ */
+ if (c->execute.file != QFILE_NULL) {
+ last_inst->dst.index = qregs[chan].index;
+
+ /* Set the flags to the current exec mask. To insert
+ * the flags push, we temporarily remove our SSA
+ * instruction.
+ */
+ list_del(&last_inst->link);
+ vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+ list_addtail(&last_inst->link,
+ &c->cur_block->instructions);
+
+ vir_set_cond(last_inst, V3D_QPU_COND_IFA);
+ last_inst->cond_is_exec_mask = true;
+ }
+ }
+}
+
+static struct qreg
+ntq_get_src(struct v3d_compile *c, nir_src src, int i)
+{
+ struct hash_entry *entry;
+ if (src.is_ssa) {
+ entry = _mesa_hash_table_search(c->def_ht, src.ssa);
+ assert(i < src.ssa->num_components);
+ } else {
+ nir_register *reg = src.reg.reg;
+ entry = _mesa_hash_table_search(c->def_ht, reg);
+ assert(reg->num_array_elems == 0);
+ assert(src.reg.base_offset == 0);
+ assert(i < reg->num_components);
+ }
+
+ struct qreg *qregs = entry->data;
+ return qregs[i];
+}
+
+static struct qreg
+ntq_get_alu_src(struct v3d_compile *c, nir_alu_instr *instr,
+ unsigned src)
+{
+ assert(util_is_power_of_two(instr->dest.write_mask));
+ unsigned chan = ffs(instr->dest.write_mask) - 1;
+ struct qreg r = ntq_get_src(c, instr->src[src].src,
+ instr->src[src].swizzle[chan]);
+
+ assert(!instr->src[src].abs);
+ assert(!instr->src[src].negate);
+
+ return r;
+};
+
+static inline struct qreg
+vir_SAT(struct v3d_compile *c, struct qreg val)
+{
+ return vir_FMAX(c,
+ vir_FMIN(c, val, vir_uniform_f(c, 1.0)),
+ vir_uniform_f(c, 0.0));
+}
+
+static struct qreg
+ntq_umul(struct v3d_compile *c, struct qreg src0, struct qreg src1)
+{
+ vir_MULTOP(c, src0, src1);
+ return vir_UMUL24(c, src0, src1);
+}
+
+static struct qreg
+ntq_minify(struct v3d_compile *c, struct qreg size, struct qreg level)
+{
+ return vir_MAX(c, vir_SHR(c, size, level), vir_uniform_ui(c, 1));
+}
+
+static void
+ntq_emit_txs(struct v3d_compile *c, nir_tex_instr *instr)
+{
+ unsigned unit = instr->texture_index;
+ int lod_index = nir_tex_instr_src_index(instr, nir_tex_src_lod);
+ int dest_size = nir_tex_instr_dest_size(instr);
+
+ struct qreg lod = c->undef;
+ if (lod_index != -1)
+ lod = ntq_get_src(c, instr->src[lod_index].src, 0);
+
+ for (int i = 0; i < dest_size; i++) {
+ assert(i < 3);
+ enum quniform_contents contents;
+
+ if (instr->is_array && i == dest_size - 1)
+ contents = QUNIFORM_TEXTURE_ARRAY_SIZE;
+ else
+ contents = QUNIFORM_TEXTURE_WIDTH + i;
+
+ struct qreg size = vir_uniform(c, contents, unit);
+
+ switch (instr->sampler_dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ /* Don't minify the array size. */
+ if (!(instr->is_array && i == dest_size - 1)) {
+ size = ntq_minify(c, size, lod);
+ }
+ break;
+
+ case GLSL_SAMPLER_DIM_RECT:
+ /* There's no LOD field for rects */
+ break;
+
+ default:
+ unreachable("Bad sampler type");
+ }
+
+ ntq_store_dest(c, &instr->dest, i, size);
+ }
+}
+
+static void
+ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
+{
+ unsigned unit = instr->texture_index;
+
+ /* Since each texture sampling op requires uploading uniforms to
+ * reference the texture, there's no HW support for texture size and
+ * you just upload uniforms containing the size.
+ */
+ switch (instr->op) {
+ case nir_texop_query_levels:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_TEXTURE_LEVELS, unit));
+ return;
+ case nir_texop_txs:
+ ntq_emit_txs(c, instr);
+ return;
+ default:
+ break;
+ }
+
+ struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = {
+ V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_header,
+
+ .fetch_sample_mode = instr->op == nir_texop_txf,
+ };
+
+ switch (instr->sampler_dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ if (instr->is_array)
+ p0_unpacked.lookup_type = TEXTURE_1D_ARRAY;
+ else
+ p0_unpacked.lookup_type = TEXTURE_1D;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_RECT:
+ if (instr->is_array)
+ p0_unpacked.lookup_type = TEXTURE_2D_ARRAY;
+ else
+ p0_unpacked.lookup_type = TEXTURE_2D;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ p0_unpacked.lookup_type = TEXTURE_3D;
+ break;
+ case GLSL_SAMPLER_DIM_CUBE:
+ p0_unpacked.lookup_type = TEXTURE_CUBE_MAP;
+ break;
+ default:
+ unreachable("Bad sampler type");
+ }
+
+ struct qreg coords[5];
+ int next_coord = 0;
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_coord:
+ for (int j = 0; j < instr->coord_components; j++) {
+ coords[next_coord++] =
+ ntq_get_src(c, instr->src[i].src, j);
+ }
+ if (instr->coord_components < 2)
+ coords[next_coord++] = vir_uniform_f(c, 0.5);
+ break;
+ case nir_tex_src_bias:
+ coords[next_coord++] =
+ ntq_get_src(c, instr->src[i].src, 0);
+
+ p0_unpacked.bias_supplied = true;
+ break;
+ case nir_tex_src_lod:
+ /* XXX: Needs base level addition */
+ coords[next_coord++] =
+ ntq_get_src(c, instr->src[i].src, 0);
+
+ if (instr->op != nir_texop_txf &&
+ instr->op != nir_texop_tg4) {
+ p0_unpacked.disable_autolod_use_bias_only = true;
+ }
+ break;
+ case nir_tex_src_comparator:
+ coords[next_coord++] =
+ ntq_get_src(c, instr->src[i].src, 0);
+
+ p0_unpacked.shadow = true;
+ break;
+
+ case nir_tex_src_offset: {
+ nir_const_value *offset =
+ nir_src_as_const_value(instr->src[i].src);
+ p0_unpacked.texel_offset_for_s_coordinate =
+ offset->i32[0];
+
+ if (instr->coord_components >= 2)
+ p0_unpacked.texel_offset_for_t_coordinate =
+ offset->i32[1];
+
+ if (instr->coord_components >= 3)
+ p0_unpacked.texel_offset_for_r_coordinate =
+ offset->i32[2];
+ break;
+ }
+
+ default:
+ unreachable("unknown texture source");
+ }
+ }
+
+ uint32_t p0_packed;
+ V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
+ (uint8_t *)&p0_packed,
+ &p0_unpacked);
+
+ /* There is no native support for GL texture rectangle coordinates, so
+ * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
+ * 1]).
+ */
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+ coords[0] = vir_FMUL(c, coords[0],
+ vir_uniform(c, QUNIFORM_TEXRECT_SCALE_X,
+ unit));
+ coords[1] = vir_FMUL(c, coords[1],
+ vir_uniform(c, QUNIFORM_TEXRECT_SCALE_Y,
+ unit));
+ }
+
+ struct qreg texture_u[] = {
+ vir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0_0 + unit, p0_packed),
+ vir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
+ };
+ uint32_t next_texture_u = 0;
+
+ for (int i = 0; i < next_coord; i++) {
+ struct qreg dst;
+
+ if (i == next_coord - 1)
+ dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUL);
+ else
+ dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMU);
+
+ struct qinst *tmu = vir_MOV_dest(c, dst, coords[i]);
+
+ if (i < 2) {
+ tmu->has_implicit_uniform = true;
+ tmu->src[vir_get_implicit_uniform_src(tmu)] =
+ texture_u[next_texture_u++];
+ }
+ }
+
+ bool return_16 = (c->key->tex[unit].return_size == 16 ||
+ p0_unpacked.shadow);
+
+ struct qreg return_values[4];
+ for (int i = 0; i < c->key->tex[unit].return_channels; i++)
+ return_values[i] = vir_LDTMU(c);
+ /* Swizzling .zw of an RG texture should give undefined results, not
+ * crash the compiler.
+ */
+ for (int i = c->key->tex[unit].return_channels; i < 4; i++)
+ return_values[i] = c->undef;
+
+ for (int i = 0; i < nir_tex_instr_dest_size(instr); i++) {
+ struct qreg chan;
+
+ if (return_16) {
+ STATIC_ASSERT(PIPE_SWIZZLE_X == 0);
+ chan = return_values[i / 2];
+
+ enum v3d_qpu_input_unpack unpack;
+ if (i & 1)
+ unpack = V3D_QPU_UNPACK_H;
+ else
+ unpack = V3D_QPU_UNPACK_L;
+
+ chan = vir_FMOV(c, chan);
+ vir_set_unpack(c->defs[chan.index], 0, unpack);
+ } else {
+ chan = vir_MOV(c, return_values[i]);
+ }
+ ntq_store_dest(c, &instr->dest, i, chan);
+ }
+}
+
+static struct qreg
+ntq_fsincos(struct v3d_compile *c, struct qreg src, bool is_cos)
+{
+ struct qreg input = vir_FMUL(c, src, vir_uniform_f(c, 1.0f / M_PI));
+ if (is_cos)
+ input = vir_FADD(c, input, vir_uniform_f(c, 0.5));
+
+ struct qreg periods = vir_FROUND(c, input);
+ struct qreg sin_output = vir_SFU(c, V3D_QPU_WADDR_SIN,
+ vir_FSUB(c, input, periods));
+ return vir_XOR(c, sin_output, vir_SHL(c,
+ vir_FTOIN(c, periods),
+ vir_uniform_ui(c, -1)));
+}
+
+static struct qreg
+ntq_fsign(struct v3d_compile *c, struct qreg src)
+{
+ struct qreg t = vir_get_temp(c);
+
+ vir_MOV_dest(c, t, vir_uniform_f(c, 0.0));
+ vir_PF(c, vir_FMOV(c, src), V3D_QPU_PF_PUSHZ);
+ vir_MOV_cond(c, V3D_QPU_COND_IFNA, t, vir_uniform_f(c, 1.0));
+ vir_PF(c, vir_FMOV(c, src), V3D_QPU_PF_PUSHN);
+ vir_MOV_cond(c, V3D_QPU_COND_IFA, t, vir_uniform_f(c, -1.0));
+ return vir_MOV(c, t);
+}
+
+static struct qreg
+ntq_isign(struct v3d_compile *c, struct qreg src)
+{
+ struct qreg t = vir_get_temp(c);
+
+ vir_MOV_dest(c, t, vir_uniform_ui(c, 0));
+ vir_PF(c, vir_MOV(c, src), V3D_QPU_PF_PUSHZ);
+ vir_MOV_cond(c, V3D_QPU_COND_IFNA, t, vir_uniform_ui(c, 1));
+ vir_PF(c, vir_MOV(c, src), V3D_QPU_PF_PUSHN);
+ vir_MOV_cond(c, V3D_QPU_COND_IFA, t, vir_uniform_ui(c, -1));
+ return vir_MOV(c, t);
+}
+
+static void
+emit_fragcoord_input(struct v3d_compile *c, int attr)
+{
+ c->inputs[attr * 4 + 0] = vir_FXCD(c);
+ c->inputs[attr * 4 + 1] = vir_FYCD(c);
+ c->inputs[attr * 4 + 2] = c->payload_z;
+ c->inputs[attr * 4 + 3] = vir_SFU(c, V3D_QPU_WADDR_RECIP,
+ c->payload_w);
+}
+
+static struct qreg
+emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
+ uint8_t swizzle)
+{
+ struct qreg vary = vir_reg(QFILE_VARY, ~0);
+ struct qreg r5 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5);
+
+ /* For gl_PointCoord input or distance along a line, we'll be called
+ * with no nir_variable, and we don't count toward VPM size so we
+ * don't track an input slot.
+ */
+ if (!var) {
+ return vir_FADD(c, vir_FMUL(c, vary, c->payload_w), r5);
+ }
+
+ int i = c->num_inputs++;
+ c->input_slots[i] = v3d_slot_from_slot_and_component(var->data.location,
+ swizzle);
+
+ switch (var->data.interpolation) {
+ case INTERP_MODE_NONE:
+ /* If a gl_FrontColor or gl_BackColor input has no interp
+ * qualifier, then flag it for glShadeModel() handling by the
+ * driver.
+ */
+ switch (var->data.location) {
+ case VARYING_SLOT_COL0:
+ case VARYING_SLOT_COL1:
+ case VARYING_SLOT_BFC0:
+ case VARYING_SLOT_BFC1:
+ BITSET_SET(c->shade_model_flags, i);
+ break;
+ default:
+ break;
+ }
+ /* FALLTHROUGH */
+ case INTERP_MODE_SMOOTH:
+ if (var->data.centroid) {
+ return vir_FADD(c, vir_FMUL(c, vary,
+ c->payload_w_centroid), r5);
+ } else {
+ return vir_FADD(c, vir_FMUL(c, vary, c->payload_w), r5);
+ }
+ case INTERP_MODE_NOPERSPECTIVE:
+ /* C appears after the mov from the varying.
+ XXX: improve ldvary setup.
+ */
+ return vir_FADD(c, vir_MOV(c, vary), r5);
+ case INTERP_MODE_FLAT:
+ BITSET_SET(c->flat_shade_flags, i);
+ vir_MOV_dest(c, c->undef, vary);
+ return vir_MOV(c, r5);
+ default:
+ unreachable("Bad interp mode");
+ }
+}
+
+static void
+emit_fragment_input(struct v3d_compile *c, int attr, nir_variable *var)
+{
+ for (int i = 0; i < glsl_get_vector_elements(var->type); i++) {
+ c->inputs[attr * 4 + i] =
+ emit_fragment_varying(c, var, i);
+ }
+}
+
+static void
+add_output(struct v3d_compile *c,
+ uint32_t decl_offset,
+ uint8_t slot,
+ uint8_t swizzle)
+{
+ uint32_t old_array_size = c->outputs_array_size;
+ resize_qreg_array(c, &c->outputs, &c->outputs_array_size,
+ decl_offset + 1);
+
+ if (old_array_size != c->outputs_array_size) {
+ c->output_slots = reralloc(c,
+ c->output_slots,
+ struct v3d_varying_slot,
+ c->outputs_array_size);
+ }
+
+ c->output_slots[decl_offset] =
+ v3d_slot_from_slot_and_component(slot, swizzle);
+}
+
+static void
+declare_uniform_range(struct v3d_compile *c, uint32_t start, uint32_t size)
+{
+ unsigned array_id = c->num_ubo_ranges++;
+ if (array_id >= c->ubo_ranges_array_size) {
+ c->ubo_ranges_array_size = MAX2(c->ubo_ranges_array_size * 2,
+ array_id + 1);
+ c->ubo_ranges = reralloc(c, c->ubo_ranges,
+ struct v3d_ubo_range,
+ c->ubo_ranges_array_size);
+ c->ubo_range_used = reralloc(c, c->ubo_range_used,
+ bool,
+ c->ubo_ranges_array_size);
+ }
+
+ c->ubo_ranges[array_id].dst_offset = 0;
+ c->ubo_ranges[array_id].src_offset = start;
+ c->ubo_ranges[array_id].size = size;
+ c->ubo_range_used[array_id] = false;
+}
+
+/**
+ * If compare_instr is a valid comparison instruction, emits the
+ * compare_instr's comparison and returns the sel_instr's return value based
+ * on the compare_instr's result.
+ */
+static bool
+ntq_emit_comparison(struct v3d_compile *c, struct qreg *dest,
+ nir_alu_instr *compare_instr,
+ nir_alu_instr *sel_instr)
+{
+ struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
+ struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
+ bool cond_invert = false;
+
+ switch (compare_instr->op) {
+ case nir_op_feq:
+ case nir_op_seq:
+ vir_PF(c, vir_FCMP(c, src0, src1), V3D_QPU_PF_PUSHZ);
+ break;
+ case nir_op_ieq:
+ vir_PF(c, vir_XOR(c, src0, src1), V3D_QPU_PF_PUSHZ);
+ break;
+
+ case nir_op_fne:
+ case nir_op_sne:
+ vir_PF(c, vir_FCMP(c, src0, src1), V3D_QPU_PF_PUSHZ);
+ cond_invert = true;
+ break;
+ case nir_op_ine:
+ vir_PF(c, vir_XOR(c, src0, src1), V3D_QPU_PF_PUSHZ);
+ cond_invert = true;
+ break;
+
+ case nir_op_fge:
+ case nir_op_sge:
+ vir_PF(c, vir_FCMP(c, src1, src0), V3D_QPU_PF_PUSHC);
+ break;
+ case nir_op_ige:
+ vir_PF(c, vir_MIN(c, src1, src0), V3D_QPU_PF_PUSHC);
+ cond_invert = true;
+ break;
+ case nir_op_uge:
+ vir_PF(c, vir_SUB(c, src0, src1), V3D_QPU_PF_PUSHC);
+ cond_invert = true;
+ break;
+
+ case nir_op_slt:
+ case nir_op_flt:
+ vir_PF(c, vir_FCMP(c, src0, src1), V3D_QPU_PF_PUSHN);
+ break;
+ case nir_op_ilt:
+ vir_PF(c, vir_MIN(c, src1, src0), V3D_QPU_PF_PUSHC);
+ break;
+ case nir_op_ult:
+ vir_PF(c, vir_SUB(c, src0, src1), V3D_QPU_PF_PUSHC);
+ break;
+
+ default:
+ return false;
+ }
+
+ enum v3d_qpu_cond cond = (cond_invert ?
+ V3D_QPU_COND_IFNA :
+ V3D_QPU_COND_IFA);
+
+ switch (sel_instr->op) {
+ case nir_op_seq:
+ case nir_op_sne:
+ case nir_op_sge:
+ case nir_op_slt:
+ *dest = vir_SEL(c, cond,
+ vir_uniform_f(c, 1.0), vir_uniform_f(c, 0.0));
+ break;
+
+ case nir_op_bcsel:
+ *dest = vir_SEL(c, cond,
+ ntq_get_alu_src(c, sel_instr, 1),
+ ntq_get_alu_src(c, sel_instr, 2));
+ break;
+
+ default:
+ *dest = vir_SEL(c, cond,
+ vir_uniform_ui(c, ~0), vir_uniform_ui(c, 0));
+ break;
+ }
+
+ /* Make the temporary for nir_store_dest(). */
+ *dest = vir_MOV(c, *dest);
+
+ return true;
+}
+
+/**
+ * Attempts to fold a comparison generating a boolean result into the
+ * condition code for selecting between two values, instead of comparing the
+ * boolean result against 0 to generate the condition code.
+ */
+static struct qreg ntq_emit_bcsel(struct v3d_compile *c, nir_alu_instr *instr,
+ struct qreg *src)
+{
+ if (!instr->src[0].src.is_ssa)
+ goto out;
+ if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
+ goto out;
+ nir_alu_instr *compare =
+ nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+ if (!compare)
+ goto out;
+
+ struct qreg dest;
+ if (ntq_emit_comparison(c, &dest, compare, instr))
+ return dest;
+
+out:
+ vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
+ return vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA, src[1], src[2]));
+}
+
+
+static void
+ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
+{
+ /* This should always be lowered to ALU operations for V3D. */
+ assert(!instr->dest.saturate);
+
+ /* Vectors are special in that they have non-scalarized writemasks,
+ * and just take the first swizzle channel for each argument in order
+ * into each writemask channel.
+ */
+ if (instr->op == nir_op_vec2 ||
+ instr->op == nir_op_vec3 ||
+ instr->op == nir_op_vec4) {
+ struct qreg srcs[4];
+ for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ srcs[i] = ntq_get_src(c, instr->src[i].src,
+ instr->src[i].swizzle[0]);
+ for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ ntq_store_dest(c, &instr->dest.dest, i,
+ vir_MOV(c, srcs[i]));
+ return;
+ }
+
+ /* General case: We can just grab the one used channel per src. */
+ struct qreg src[nir_op_infos[instr->op].num_inputs];
+ for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ src[i] = ntq_get_alu_src(c, instr, i);
+ }
+
+ struct qreg result;
+
+ switch (instr->op) {
+ case nir_op_fmov:
+ case nir_op_imov:
+ result = vir_MOV(c, src[0]);
+ break;
+ case nir_op_fmul:
+ result = vir_FMUL(c, src[0], src[1]);
+ break;
+ case nir_op_fadd:
+ result = vir_FADD(c, src[0], src[1]);
+ break;
+ case nir_op_fsub:
+ result = vir_FSUB(c, src[0], src[1]);
+ break;
+ case nir_op_fmin:
+ result = vir_FMIN(c, src[0], src[1]);
+ break;
+ case nir_op_fmax:
+ result = vir_FMAX(c, src[0], src[1]);
+ break;
+
+ case nir_op_f2i32:
+ result = vir_FTOIZ(c, src[0]);
+ break;
+ case nir_op_f2u32:
+ result = vir_FTOUZ(c, src[0]);
+ break;
+ case nir_op_i2f32:
+ result = vir_ITOF(c, src[0]);
+ break;
+ case nir_op_u2f32:
+ result = vir_UTOF(c, src[0]);
+ break;
+ case nir_op_b2f:
+ result = vir_AND(c, src[0], vir_uniform_f(c, 1.0));
+ break;
+ case nir_op_b2i:
+ result = vir_AND(c, src[0], vir_uniform_ui(c, 1));
+ break;
+ case nir_op_i2b:
+ case nir_op_f2b:
+ vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
+ result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
+ vir_uniform_ui(c, ~0),
+ vir_uniform_ui(c, 0)));
+ break;
+
+ case nir_op_iadd:
+ result = vir_ADD(c, src[0], src[1]);
+ break;
+ case nir_op_ushr:
+ result = vir_SHR(c, src[0], src[1]);
+ break;
+ case nir_op_isub:
+ result = vir_SUB(c, src[0], src[1]);
+ break;
+ case nir_op_ishr:
+ result = vir_ASR(c, src[0], src[1]);
+ break;
+ case nir_op_ishl:
+ result = vir_SHL(c, src[0], src[1]);
+ break;
+ case nir_op_imin:
+ result = vir_MIN(c, src[0], src[1]);
+ break;
+ case nir_op_umin:
+ result = vir_UMIN(c, src[0], src[1]);
+ break;
+ case nir_op_imax:
+ result = vir_MAX(c, src[0], src[1]);
+ break;
+ case nir_op_umax:
+ result = vir_UMAX(c, src[0], src[1]);
+ break;
+ case nir_op_iand:
+ result = vir_AND(c, src[0], src[1]);
+ break;
+ case nir_op_ior:
+ result = vir_OR(c, src[0], src[1]);
+ break;
+ case nir_op_ixor:
+ result = vir_XOR(c, src[0], src[1]);
+ break;
+ case nir_op_inot:
+ result = vir_NOT(c, src[0]);
+ break;
+
+ case nir_op_imul:
+ result = ntq_umul(c, src[0], src[1]);
+ break;
+
+ case nir_op_seq:
+ case nir_op_sne:
+ case nir_op_sge:
+ case nir_op_slt:
+ case nir_op_feq:
+ case nir_op_fne:
+ case nir_op_fge:
+ case nir_op_flt:
+ case nir_op_ieq:
+ case nir_op_ine:
+ case nir_op_ige:
+ case nir_op_uge:
+ case nir_op_ilt:
+ case nir_op_ult:
+ if (!ntq_emit_comparison(c, &result, instr, instr)) {
+ fprintf(stderr, "Bad comparison instruction\n");
+ }
+ break;
+
+ case nir_op_bcsel:
+ result = ntq_emit_bcsel(c, instr, src);
+ break;
+ case nir_op_fcsel:
+ vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
+ result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
+ src[1], src[2]));
+ break;
+
+ case nir_op_frcp:
+ result = vir_SFU(c, V3D_QPU_WADDR_RECIP, src[0]);
+ break;
+ case nir_op_frsq:
+ result = vir_SFU(c, V3D_QPU_WADDR_RSQRT, src[0]);
+ break;
+ case nir_op_fexp2:
+ result = vir_SFU(c, V3D_QPU_WADDR_EXP, src[0]);
+ break;
+ case nir_op_flog2:
+ result = vir_SFU(c, V3D_QPU_WADDR_LOG, src[0]);
+ break;
+
+ case nir_op_fceil:
+ result = vir_FCEIL(c, src[0]);
+ break;
+ case nir_op_ffloor:
+ result = vir_FFLOOR(c, src[0]);
+ break;
+ case nir_op_fround_even:
+ result = vir_FROUND(c, src[0]);
+ break;
+ case nir_op_ftrunc:
+ result = vir_FTRUNC(c, src[0]);
+ break;
+ case nir_op_ffract:
+ result = vir_FSUB(c, src[0], vir_FFLOOR(c, src[0]));
+ break;
+
+ case nir_op_fsin:
+ result = ntq_fsincos(c, src[0], false);
+ break;
+ case nir_op_fcos:
+ result = ntq_fsincos(c, src[0], true);
+ break;
+
+ case nir_op_fsign:
+ result = ntq_fsign(c, src[0]);
+ break;
+ case nir_op_isign:
+ result = ntq_isign(c, src[0]);
+ break;
+
+ case nir_op_fabs: {
+ result = vir_FMOV(c, src[0]);
+ vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_ABS);
+ break;
+ }
+
+ case nir_op_iabs:
+ result = vir_MAX(c, src[0],
+ vir_SUB(c, vir_uniform_ui(c, 0), src[0]));
+ break;
+
+ case nir_op_fddx:
+ case nir_op_fddx_coarse:
+ case nir_op_fddx_fine:
+ result = vir_FDX(c, src[0]);
+ break;
+
+ case nir_op_fddy:
+ case nir_op_fddy_coarse:
+ case nir_op_fddy_fine:
+ result = vir_FDY(c, src[0]);
+ break;
+
+ default:
+ fprintf(stderr, "unknown NIR ALU inst: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+
+ /* We have a scalar result, so the instruction should only have a
+ * single channel written to.
+ */
+ assert(util_is_power_of_two(instr->dest.write_mask));
+ ntq_store_dest(c, &instr->dest.dest,
+ ffs(instr->dest.write_mask) - 1, result);
+}
+
+/* Each TLB read/write setup (a render target or depth buffer) takes an 8-bit
+ * specifier. They come from a register that's preloaded with 0xffffffff
+ * (0xff gets you normal vec4 f16 RT0 writes), and when one is neaded the low
+ * 8 bits are shifted off the bottom and 0xff shifted in from the top.
+ */
+#define TLB_TYPE_F16_COLOR (3 << 6)
+#define TLB_TYPE_I32_COLOR (1 << 6)
+#define TLB_TYPE_F32_COLOR (0 << 6)
+#define TLB_RENDER_TARGET_SHIFT 3 /* Reversed! 7 = RT 0, 0 = RT 7. */
+#define TLB_SAMPLE_MODE_PER_SAMPLE (0 << 2)
+#define TLB_SAMPLE_MODE_PER_PIXEL (1 << 2)
+#define TLB_F16_SWAP_HI_LO (1 << 1)
+#define TLB_VEC_SIZE_4_F16 (1 << 0)
+#define TLB_VEC_SIZE_2_F16 (0 << 0)
+#define TLB_VEC_SIZE_MINUS_1_SHIFT 0
+
+/* Triggers Z/Stencil testing, used when the shader state's "FS modifies Z"
+ * flag is set.
+ */
+#define TLB_TYPE_DEPTH ((2 << 6) | (0 << 4))
+#define TLB_DEPTH_TYPE_INVARIANT (0 << 2) /* Unmodified sideband input used */
+#define TLB_DEPTH_TYPE_PER_PIXEL (1 << 2) /* QPU result used */
+
+/* Stencil is a single 32-bit write. */
+#define TLB_TYPE_STENCIL_ALPHA ((2 << 6) | (1 << 4))
+
+static void
+emit_frag_end(struct v3d_compile *c)
+{
+ /* XXX
+ if (c->output_sample_mask_index != -1) {
+ vir_MS_MASK(c, c->outputs[c->output_sample_mask_index]);
+ }
+ */
+
+ if (c->output_position_index != -1) {
+ struct qinst *inst = vir_MOV_dest(c,
+ vir_reg(QFILE_TLBU, 0),
+ c->outputs[c->output_position_index]);
+
+ inst->src[vir_get_implicit_uniform_src(inst)] =
+ vir_uniform_ui(c,
+ TLB_TYPE_DEPTH |
+ TLB_DEPTH_TYPE_PER_PIXEL |
+ 0xffffff00);
+ } else if (c->s->info.fs.uses_discard) {
+ struct qinst *inst = vir_MOV_dest(c,
+ vir_reg(QFILE_TLBU, 0),
+ vir_reg(QFILE_NULL, 0));
+
+ inst->src[vir_get_implicit_uniform_src(inst)] =
+ vir_uniform_ui(c,
+ TLB_TYPE_DEPTH |
+ TLB_DEPTH_TYPE_INVARIANT |
+ 0xffffff00);
+ }
+
+ /* XXX: Performance improvement: Merge Z write and color writes TLB
+ * uniform setup
+ */
+
+ for (int rt = 0; rt < c->fs_key->nr_cbufs; rt++) {
+ if (!c->output_color_var[rt])
+ continue;
+
+ nir_variable *var = c->output_color_var[rt];
+ struct qreg *color = &c->outputs[var->data.driver_location * 4];
+ int num_components = glsl_get_vector_elements(var->type);
+ uint32_t conf = 0xffffff00;
+ struct qinst *inst;
+
+ conf |= TLB_SAMPLE_MODE_PER_PIXEL;
+ conf |= (7 - rt) << TLB_RENDER_TARGET_SHIFT;
+
+ assert(num_components != 0);
+ switch (glsl_get_base_type(var->type)) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ conf |= TLB_TYPE_I32_COLOR;
+ conf |= ((num_components - 1) <<
+ TLB_VEC_SIZE_MINUS_1_SHIFT);
+
+ inst = vir_MOV_dest(c, vir_reg(QFILE_TLBU, 0), color[0]);
+ inst->src[vir_get_implicit_uniform_src(inst)] =
+ vir_uniform_ui(c, conf);
+
+ for (int i = 1; i < num_components; i++) {
+ inst = vir_MOV_dest(c, vir_reg(QFILE_TLB, 0),
+ color[i]);
+ }
+ break;
+
+ default: {
+ struct qreg r = color[0];
+ struct qreg g = color[1];
+ struct qreg b = color[2];
+ struct qreg a = color[3];
+
+ if (c->fs_key->f32_color_rb) {
+ conf |= TLB_TYPE_F32_COLOR;
+ conf |= ((num_components - 1) <<
+ TLB_VEC_SIZE_MINUS_1_SHIFT);
+ } else {
+ conf |= TLB_TYPE_F16_COLOR;
+ conf |= TLB_F16_SWAP_HI_LO;
+ if (num_components >= 3)
+ conf |= TLB_VEC_SIZE_4_F16;
+ else
+ conf |= TLB_VEC_SIZE_2_F16;
+ }
+
+ if (c->fs_key->swap_color_rb & (1 << rt)) {
+ r = color[2];
+ b = color[0];
+ }
+
+ if (c->fs_key->f32_color_rb & (1 << rt)) {
+ inst = vir_MOV_dest(c, vir_reg(QFILE_TLBU, 0), color[0]);
+ inst->src[vir_get_implicit_uniform_src(inst)] =
+ vir_uniform_ui(c, conf);
+
+ for (int i = 1; i < num_components; i++) {
+ inst = vir_MOV_dest(c, vir_reg(QFILE_TLB, 0),
+ color[i]);
+ }
+ } else {
+ inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), r, g);
+ if (conf != ~0) {
+ inst->dst.file = QFILE_TLBU;
+ inst->src[vir_get_implicit_uniform_src(inst)] =
+ vir_uniform_ui(c, conf);
+ }
+
+ inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), b, a);
+ }
+ break;
+ }
+ }
+ }
+}
+
+static void
+emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w)
+{
+ for (int i = 0; i < 2; i++) {
+ struct qreg coord = c->outputs[c->output_position_index + i];
+ coord = vir_FMUL(c, coord,
+ vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i,
+ 0));
+ coord = vir_FMUL(c, coord, rcp_w);
+ vir_FTOIN_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM),
+ coord);
+ }
+
+}
+
+static void
+emit_zs_write(struct v3d_compile *c, struct qreg rcp_w)
+{
+ struct qreg zscale = vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
+ struct qreg zoffset = vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
+
+ vir_FADD_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM),
+ vir_FMUL(c, vir_FMUL(c,
+ c->outputs[c->output_position_index + 2],
+ zscale),
+ rcp_w),
+ zoffset);
+}
+
+static void
+emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w)
+{
+ vir_VPM_WRITE(c, rcp_w);
+}
+
+static void
+emit_point_size_write(struct v3d_compile *c)
+{
+ struct qreg point_size;
+
+ if (c->output_point_size_index != -1)
+ point_size = c->outputs[c->output_point_size_index];
+ else
+ point_size = vir_uniform_f(c, 1.0);
+
+ /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
+ * BCM21553).
+ */
+ point_size = vir_FMAX(c, point_size, vir_uniform_f(c, .125));
+
+ vir_VPM_WRITE(c, point_size);
+}
+
+static void
+emit_vpm_write_setup(struct v3d_compile *c)
+{
+ uint32_t packed;
+ struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = {
+ V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header,
+
+ .horiz = true,
+ .laned = false,
+ .segs = true,
+ .stride = 1,
+ .size = VPM_SETUP_SIZE_32_BIT,
+ .addr = 0,
+ };
+
+ V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_pack(NULL,
+ (uint8_t *)&packed,
+ &unpacked);
+ vir_VPMSETUP(c, vir_uniform_ui(c, packed));
+}
+
+static void
+emit_vert_end(struct v3d_compile *c)
+{
+ struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP,
+ c->outputs[c->output_position_index + 3]);
+
+ emit_vpm_write_setup(c);
+
+ if (c->vs_key->is_coord) {
+ for (int i = 0; i < 4; i++)
+ vir_VPM_WRITE(c, c->outputs[c->output_position_index + i]);
+ emit_scaled_viewport_write(c, rcp_w);
+ if (c->vs_key->per_vertex_point_size) {
+ emit_point_size_write(c);
+ /* emit_rcp_wc_write(c, rcp_w); */
+ }
+ /* XXX: Z-only rendering */
+ if (0)
+ emit_zs_write(c, rcp_w);
+ } else {
+ emit_scaled_viewport_write(c, rcp_w);
+ emit_zs_write(c, rcp_w);
+ emit_rcp_wc_write(c, rcp_w);
+ if (c->vs_key->per_vertex_point_size)
+ emit_point_size_write(c);
+ }
+
+ for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
+ struct v3d_varying_slot input = c->vs_key->fs_inputs[i];
+ int j;
+
+ for (j = 0; j < c->num_outputs; j++) {
+ struct v3d_varying_slot output = c->output_slots[j];
+
+ if (!memcmp(&input, &output, sizeof(input))) {
+ vir_VPM_WRITE(c, c->outputs[j]);
+ break;
+ }
+ }
+ /* Emit padding if we didn't find a declared VS output for
+ * this FS input.
+ */
+ if (j == c->num_outputs)
+ vir_VPM_WRITE(c, vir_uniform_f(c, 0.0));
+ }
+}
+
+void
+v3d_optimize_nir(struct nir_shader *s)
+{
+ bool progress;
+
+ do {
+ progress = false;
+
+ NIR_PASS_V(s, nir_lower_vars_to_ssa);
+ NIR_PASS(progress, s, nir_lower_alu_to_scalar);
+ NIR_PASS(progress, s, nir_lower_phis_to_scalar);
+ NIR_PASS(progress, s, nir_copy_prop);
+ NIR_PASS(progress, s, nir_opt_remove_phis);
+ NIR_PASS(progress, s, nir_opt_dce);
+ NIR_PASS(progress, s, nir_opt_dead_cf);
+ NIR_PASS(progress, s, nir_opt_cse);
+ NIR_PASS(progress, s, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, s, nir_opt_algebraic);
+ NIR_PASS(progress, s, nir_opt_constant_folding);
+ NIR_PASS(progress, s, nir_opt_undef);
+ } while (progress);
+}
+
+static int
+driver_location_compare(const void *in_a, const void *in_b)
+{
+ const nir_variable *const *a = in_a;
+ const nir_variable *const *b = in_b;
+
+ return (*a)->data.driver_location - (*b)->data.driver_location;
+}
+
+static struct qreg
+ntq_emit_vpm_read(struct v3d_compile *c,
+ uint32_t *num_components_queued,
+ uint32_t *remaining,
+ uint32_t vpm_index)
+{
+ struct qreg vpm = vir_reg(QFILE_VPM, vpm_index);
+
+ if (*num_components_queued != 0) {
+ (*num_components_queued)--;
+ c->num_inputs++;
+ return vir_MOV(c, vpm);
+ }
+
+ uint32_t num_components = MIN2(*remaining, 32);
+
+ struct V3D33_VPM_GENERIC_BLOCK_READ_SETUP unpacked = {
+ V3D33_VPM_GENERIC_BLOCK_READ_SETUP_header,
+
+ .horiz = true,
+ .laned = false,
+ /* If the field is 0, that means a read count of 32. */
+ .num = num_components & 31,
+ .segs = true,
+ .stride = 1,
+ .size = VPM_SETUP_SIZE_32_BIT,
+ .addr = c->num_inputs,
+ };
+
+ uint32_t packed;
+ V3D33_VPM_GENERIC_BLOCK_READ_SETUP_pack(NULL,
+ (uint8_t *)&packed,
+ &unpacked);
+ vir_VPMSETUP(c, vir_uniform_ui(c, packed));
+
+ *num_components_queued = num_components - 1;
+ *remaining -= num_components;
+ c->num_inputs++;
+
+ return vir_MOV(c, vpm);
+}
+
+static void
+ntq_setup_inputs(struct v3d_compile *c)
+{
+ unsigned num_entries = 0;
+ unsigned num_components = 0;
+ nir_foreach_variable(var, &c->s->inputs) {
+ num_entries++;
+ num_components += glsl_get_components(var->type);
+ }
+
+ nir_variable *vars[num_entries];
+
+ unsigned i = 0;
+ nir_foreach_variable(var, &c->s->inputs)
+ vars[i++] = var;
+
+ /* Sort the variables so that we emit the input setup in
+ * driver_location order. This is required for VPM reads, whose data
+ * is fetched into the VPM in driver_location (TGSI register index)
+ * order.
+ */
+ qsort(&vars, num_entries, sizeof(*vars), driver_location_compare);
+
+ uint32_t vpm_components_queued = 0;
+ if (c->s->info.stage == MESA_SHADER_VERTEX) {
+ bool uses_iid = c->s->info.system_values_read &
+ (1ull << SYSTEM_VALUE_INSTANCE_ID);
+ bool uses_vid = c->s->info.system_values_read &
+ (1ull << SYSTEM_VALUE_VERTEX_ID);
+
+ num_components += uses_iid;
+ num_components += uses_vid;
+
+ if (uses_iid) {
+ c->iid = ntq_emit_vpm_read(c, &vpm_components_queued,
+ &num_components, ~0);
+ }
+
+ if (uses_vid) {
+ c->vid = ntq_emit_vpm_read(c, &vpm_components_queued,
+ &num_components, ~0);
+ }
+ }
+
+ for (unsigned i = 0; i < num_entries; i++) {
+ nir_variable *var = vars[i];
+ unsigned array_len = MAX2(glsl_get_length(var->type), 1);
+ unsigned loc = var->data.driver_location;
+
+ assert(array_len == 1);
+ (void)array_len;
+ resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
+ (loc + 1) * 4);
+
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ if (var->data.location == VARYING_SLOT_POS) {
+ emit_fragcoord_input(c, loc);
+ } else if (var->data.location == VARYING_SLOT_PNTC ||
+ (var->data.location >= VARYING_SLOT_VAR0 &&
+ (c->fs_key->point_sprite_mask &
+ (1 << (var->data.location -
+ VARYING_SLOT_VAR0))))) {
+ c->inputs[loc * 4 + 0] = c->point_x;
+ c->inputs[loc * 4 + 1] = c->point_y;
+ } else {
+ emit_fragment_input(c, loc, var);
+ }
+ } else {
+ int var_components = glsl_get_components(var->type);
+
+ for (int i = 0; i < var_components; i++) {
+ c->inputs[loc * 4 + i] =
+ ntq_emit_vpm_read(c,
+ &vpm_components_queued,
+ &num_components,
+ loc * 4 + i);
+
+ }
+ c->vattr_sizes[loc] = var_components;
+ }
+ }
+
+ if (c->s->info.stage == MESA_SHADER_VERTEX) {
+ assert(vpm_components_queued == 0);
+ assert(num_components == 0);
+ }
+}
+
+static void
+ntq_setup_outputs(struct v3d_compile *c)
+{
+ nir_foreach_variable(var, &c->s->outputs) {
+ unsigned array_len = MAX2(glsl_get_length(var->type), 1);
+ unsigned loc = var->data.driver_location * 4;
+
+ assert(array_len == 1);
+ (void)array_len;
+
+ for (int i = 0; i < 4; i++)
+ add_output(c, loc + i, var->data.location, i);
+
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ switch (var->data.location) {
+ case FRAG_RESULT_COLOR:
+ c->output_color_var[0] = var;
+ c->output_color_var[1] = var;
+ c->output_color_var[2] = var;
+ c->output_color_var[3] = var;
+ break;
+ case FRAG_RESULT_DATA0:
+ case FRAG_RESULT_DATA1:
+ case FRAG_RESULT_DATA2:
+ case FRAG_RESULT_DATA3:
+ c->output_color_var[var->data.location -
+ FRAG_RESULT_DATA0] = var;
+ break;
+ case FRAG_RESULT_DEPTH:
+ c->output_position_index = loc;
+ break;
+ case FRAG_RESULT_SAMPLE_MASK:
+ c->output_sample_mask_index = loc;
+ break;
+ }
+ } else {
+ switch (var->data.location) {
+ case VARYING_SLOT_POS:
+ c->output_position_index = loc;
+ break;
+ case VARYING_SLOT_PSIZ:
+ c->output_point_size_index = loc;
+ break;
+ }
+ }
+ }
+}
+
+static void
+ntq_setup_uniforms(struct v3d_compile *c)
+{
+ nir_foreach_variable(var, &c->s->uniforms) {
+ uint32_t vec4_count = glsl_count_attribute_slots(var->type,
+ false);
+ unsigned vec4_size = 4 * sizeof(float);
+
+ declare_uniform_range(c, var->data.driver_location * vec4_size,
+ vec4_count * vec4_size);
+
+ }
+}
+
+/**
+ * Sets up the mapping from nir_register to struct qreg *.
+ *
+ * Each nir_register gets a struct qreg per 32-bit component being stored.
+ */
+static void
+ntq_setup_registers(struct v3d_compile *c, struct exec_list *list)
+{
+ foreach_list_typed(nir_register, nir_reg, node, list) {
+ unsigned array_len = MAX2(nir_reg->num_array_elems, 1);
+ struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
+ array_len *
+ nir_reg->num_components);
+
+ _mesa_hash_table_insert(c->def_ht, nir_reg, qregs);
+
+ for (int i = 0; i < array_len * nir_reg->num_components; i++)
+ qregs[i] = vir_get_temp(c);
+ }
+}
+
+static void
+ntq_emit_load_const(struct v3d_compile *c, nir_load_const_instr *instr)
+{
+ struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
+ for (int i = 0; i < instr->def.num_components; i++)
+ qregs[i] = vir_uniform_ui(c, instr->value.u32[i]);
+
+ _mesa_hash_table_insert(c->def_ht, &instr->def, qregs);
+}
+
+static void
+ntq_emit_ssa_undef(struct v3d_compile *c, nir_ssa_undef_instr *instr)
+{
+ struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
+
+ /* VIR needs there to be *some* value, so pick 0 (same as for
+ * ntq_setup_registers().
+ */
+ for (int i = 0; i < instr->def.num_components; i++)
+ qregs[i] = vir_uniform_ui(c, 0);
+}
+
+static void
+ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ nir_const_value *const_offset;
+ unsigned offset;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_uniform:
+ assert(instr->num_components == 1);
+ const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset = nir_intrinsic_base(instr) + const_offset->u32[0];
+ assert(offset % 4 == 0);
+ /* We need dwords */
+ offset = offset / 4;
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_UNIFORM,
+ offset));
+ } else {
+ ntq_store_dest(c, &instr->dest, 0,
+ indirect_uniform_load(c, instr));
+ }
+ break;
+
+ case nir_intrinsic_load_ubo:
+ for (int i = 0; i < instr->num_components; i++) {
+ int ubo = nir_src_as_const_value(instr->src[0])->u32[0];
+
+ /* Adjust for where we stored the TGSI register base. */
+ vir_ADD_dest(c,
+ vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUA),
+ vir_uniform(c, QUNIFORM_UBO_ADDR, 1 + ubo),
+ vir_ADD(c,
+ ntq_get_src(c, instr->src[1], 0),
+ vir_uniform_ui(c, i * 4)));
+
+ ntq_store_dest(c, &instr->dest, i, vir_LDTMU(c));
+ }
+ break;
+
+ const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset = nir_intrinsic_base(instr) + const_offset->u32[0];
+ assert(offset % 4 == 0);
+ /* We need dwords */
+ offset = offset / 4;
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_UNIFORM,
+ offset));
+ } else {
+ ntq_store_dest(c, &instr->dest, 0,
+ indirect_uniform_load(c, instr));
+ }
+ break;
+
+ case nir_intrinsic_load_user_clip_plane:
+ for (int i = 0; i < instr->num_components; i++) {
+ ntq_store_dest(c, &instr->dest, i,
+ vir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+ nir_intrinsic_ucp_id(instr) *
+ 4 + i));
+ }
+ break;
+
+ case nir_intrinsic_load_alpha_ref_float:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_ALPHA_REF, 0));
+ break;
+
+ case nir_intrinsic_load_sample_mask_in:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_SAMPLE_MASK, 0));
+ break;
+
+ case nir_intrinsic_load_front_face:
+ /* The register contains 0 (front) or 1 (back), and we need to
+ * turn it into a NIR bool where true means front.
+ */
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_ADD(c,
+ vir_uniform_ui(c, -1),
+ vir_REVF(c)));
+ break;
+
+ case nir_intrinsic_load_instance_id:
+ ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, c->iid));
+ break;
+
+ case nir_intrinsic_load_vertex_id:
+ ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, c->vid));
+ break;
+
+ case nir_intrinsic_load_input:
+ const_offset = nir_src_as_const_value(instr->src[0]);
+ assert(const_offset && "v3d doesn't support indirect inputs");
+ for (int i = 0; i < instr->num_components; i++) {
+ offset = nir_intrinsic_base(instr) + const_offset->u32[0];
+ int comp = nir_intrinsic_component(instr) + i;
+ ntq_store_dest(c, &instr->dest, i,
+ vir_MOV(c, c->inputs[offset * 4 + comp]));
+ }
+ break;
+
+ case nir_intrinsic_store_output:
+ const_offset = nir_src_as_const_value(instr->src[1]);
+ assert(const_offset && "v3d doesn't support indirect outputs");
+ offset = ((nir_intrinsic_base(instr) +
+ const_offset->u32[0]) * 4 +
+ nir_intrinsic_component(instr));
+
+ for (int i = 0; i < instr->num_components; i++) {
+ c->outputs[offset + i] =
+ vir_MOV(c, ntq_get_src(c, instr->src[0], i));
+ }
+ c->num_outputs = MAX2(c->num_outputs,
+ offset + instr->num_components);
+ break;
+
+ case nir_intrinsic_discard:
+ if (c->execute.file != QFILE_NULL) {
+ vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+ vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+ vir_uniform_ui(c, 0)),
+ V3D_QPU_COND_IFA);
+ } else {
+ vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+ vir_uniform_ui(c, 0));
+ }
+ break;
+
+ case nir_intrinsic_discard_if: {
+ /* true (~0) if we're discarding */
+ struct qreg cond = ntq_get_src(c, instr->src[0], 0);
+
+ if (c->execute.file != QFILE_NULL) {
+ /* execute == 0 means the channel is active. Invert
+ * the condition so that we can use zero as "executing
+ * and discarding."
+ */
+ vir_PF(c, vir_AND(c, c->execute, vir_NOT(c, cond)),
+ V3D_QPU_PF_PUSHZ);
+ vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+ vir_uniform_ui(c, 0)),
+ V3D_QPU_COND_IFA);
+ } else {
+ vir_PF(c, cond, V3D_QPU_PF_PUSHZ);
+ vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+ vir_uniform_ui(c, 0)),
+ V3D_QPU_COND_IFNA);
+ }
+
+ break;
+ }
+
+ default:
+ fprintf(stderr, "Unknown intrinsic: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ break;
+ }
+}
+
+/* Clears (activates) the execute flags for any channels whose jump target
+ * matches this block.
+ */
+static void
+ntq_activate_execute_for_block(struct v3d_compile *c)
+{
+ vir_PF(c, vir_SUB(c, c->execute, vir_uniform_ui(c, c->cur_block->index)),
+ V3D_QPU_PF_PUSHZ);
+
+ vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, 0));
+}
+
+static void
+ntq_emit_if(struct v3d_compile *c, nir_if *if_stmt)
+{
+ nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
+ bool empty_else_block =
+ (nir_else_block == nir_if_last_else_block(if_stmt) &&
+ exec_list_is_empty(&nir_else_block->instr_list));
+
+ struct qblock *then_block = vir_new_block(c);
+ struct qblock *after_block = vir_new_block(c);
+ struct qblock *else_block;
+ if (empty_else_block)
+ else_block = after_block;
+ else
+ else_block = vir_new_block(c);
+
+ bool was_top_level = false;
+ if (c->execute.file == QFILE_NULL) {
+ c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
+ was_top_level = true;
+ }
+
+ /* Set A for executing (execute == 0) and jumping (if->condition ==
+ * 0) channels, and then update execute flags for those to point to
+ * the ELSE block.
+ */
+ vir_PF(c, vir_OR(c,
+ c->execute,
+ ntq_get_src(c, if_stmt->condition, 0)),
+ V3D_QPU_PF_PUSHZ);
+ vir_MOV_cond(c, V3D_QPU_COND_IFA,
+ c->execute,
+ vir_uniform_ui(c, else_block->index));
+
+ /* Jump to ELSE if nothing is active for THEN, otherwise fall
+ * through.
+ */
+ vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+ vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLNA);
+ vir_link_blocks(c->cur_block, else_block);
+ vir_link_blocks(c->cur_block, then_block);
+
+ /* Process the THEN block. */
+ vir_set_emit_block(c, then_block);
+ ntq_emit_cf_list(c, &if_stmt->then_list);
+
+ if (!empty_else_block) {
+ /* Handle the end of the THEN block. First, all currently
+ * active channels update their execute flags to point to
+ * ENDIF
+ */
+ vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+ vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
+ vir_uniform_ui(c, after_block->index));
+
+ /* If everything points at ENDIF, then jump there immediately. */
+ vir_PF(c, vir_SUB(c, c->execute,
+ vir_uniform_ui(c, after_block->index)),
+ V3D_QPU_PF_PUSHZ);
+ vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLA);
+ vir_link_blocks(c->cur_block, after_block);
+ vir_link_blocks(c->cur_block, else_block);
+
+ vir_set_emit_block(c, else_block);
+ ntq_activate_execute_for_block(c);
+ ntq_emit_cf_list(c, &if_stmt->else_list);
+ }
+
+ vir_link_blocks(c->cur_block, after_block);
+
+ vir_set_emit_block(c, after_block);
+ if (was_top_level)
+ c->execute = c->undef;
+ else
+ ntq_activate_execute_for_block(c);
+}
+
+static void
+ntq_emit_jump(struct v3d_compile *c, nir_jump_instr *jump)
+{
+ switch (jump->type) {
+ case nir_jump_break:
+ vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+ vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
+ vir_uniform_ui(c, c->loop_break_block->index));
+ break;
+
+ case nir_jump_continue:
+ vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+ vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
+ vir_uniform_ui(c, c->loop_cont_block->index));
+ break;
+
+ case nir_jump_return:
+ unreachable("All returns shouold be lowered\n");
+ }
+}
+
+static void
+ntq_emit_instr(struct v3d_compile *c, nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ ntq_emit_alu(c, nir_instr_as_alu(instr));
+ break;
+
+ case nir_instr_type_intrinsic:
+ ntq_emit_intrinsic(c, nir_instr_as_intrinsic(instr));
+ break;
+
+ case nir_instr_type_load_const:
+ ntq_emit_load_const(c, nir_instr_as_load_const(instr));
+ break;
+
+ case nir_instr_type_ssa_undef:
+ ntq_emit_ssa_undef(c, nir_instr_as_ssa_undef(instr));
+ break;
+
+ case nir_instr_type_tex:
+ ntq_emit_tex(c, nir_instr_as_tex(instr));
+ break;
+
+ case nir_instr_type_jump:
+ ntq_emit_jump(c, nir_instr_as_jump(instr));
+ break;
+
+ default:
+ fprintf(stderr, "Unknown NIR instr type: ");
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+}
+
+static void
+ntq_emit_block(struct v3d_compile *c, nir_block *block)
+{
+ nir_foreach_instr(instr, block) {
+ ntq_emit_instr(c, instr);
+ }
+}
+
+static void ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
+
+static void
+ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
+{
+ bool was_top_level = false;
+ if (c->execute.file == QFILE_NULL) {
+ c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
+ was_top_level = true;
+ }
+
+ struct qblock *save_loop_cont_block = c->loop_cont_block;
+ struct qblock *save_loop_break_block = c->loop_break_block;
+
+ c->loop_cont_block = vir_new_block(c);
+ c->loop_break_block = vir_new_block(c);
+
+ vir_link_blocks(c->cur_block, c->loop_cont_block);
+ vir_set_emit_block(c, c->loop_cont_block);
+ ntq_activate_execute_for_block(c);
+
+ ntq_emit_cf_list(c, &loop->body);
+
+ /* Re-enable any previous continues now, so our ANYA check below
+ * works.
+ *
+ * XXX: Use the .ORZ flags update, instead.
+ */
+ vir_PF(c, vir_SUB(c,
+ c->execute,
+ vir_uniform_ui(c, c->loop_cont_block->index)),
+ V3D_QPU_PF_PUSHZ);
+ vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, 0));
+
+ vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+
+ vir_BRANCH(c, V3D_QPU_BRANCH_COND_ANYA);
+ vir_link_blocks(c->cur_block, c->loop_cont_block);
+ vir_link_blocks(c->cur_block, c->loop_break_block);
+
+ vir_set_emit_block(c, c->loop_break_block);
+ if (was_top_level)
+ c->execute = c->undef;
+ else
+ ntq_activate_execute_for_block(c);
+
+ c->loop_break_block = save_loop_break_block;
+ c->loop_cont_block = save_loop_cont_block;
+}
+
+static void
+ntq_emit_function(struct v3d_compile *c, nir_function_impl *func)
+{
+ fprintf(stderr, "FUNCTIONS not handled.\n");
+ abort();
+}
+
+static void
+ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list)
+{
+ foreach_list_typed(nir_cf_node, node, node, list) {
+ switch (node->type) {
+ case nir_cf_node_block:
+ ntq_emit_block(c, nir_cf_node_as_block(node));
+ break;
+
+ case nir_cf_node_if:
+ ntq_emit_if(c, nir_cf_node_as_if(node));
+ break;
+
+ case nir_cf_node_loop:
+ ntq_emit_loop(c, nir_cf_node_as_loop(node));
+ break;
+
+ case nir_cf_node_function:
+ ntq_emit_function(c, nir_cf_node_as_function(node));
+ break;
+
+ default:
+ fprintf(stderr, "Unknown NIR node type\n");
+ abort();
+ }
+ }
+}
+
+static void
+ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl)
+{
+ ntq_setup_registers(c, &impl->registers);
+ ntq_emit_cf_list(c, &impl->body);
+}
+
+static void
+nir_to_vir(struct v3d_compile *c)
+{
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
+ c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
+ c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
+
+ if (c->fs_key->is_points) {
+ c->point_x = emit_fragment_varying(c, NULL, 0);
+ c->point_y = emit_fragment_varying(c, NULL, 0);
+ } else if (c->fs_key->is_lines) {
+ c->line_x = emit_fragment_varying(c, NULL, 0);
+ }
+ }
+
+ ntq_setup_inputs(c);
+ ntq_setup_outputs(c);
+ ntq_setup_uniforms(c);
+ ntq_setup_registers(c, &c->s->registers);
+
+ /* Find the main function and emit the body. */
+ nir_foreach_function(function, c->s) {
+ assert(strcmp(function->name, "main") == 0);
+ assert(function->impl);
+ ntq_emit_impl(c, function->impl);
+ }
+}
+
+const nir_shader_compiler_options v3d_nir_options = {
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_bitfield_insert = true,
+ .lower_bitfield_extract = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_unorm_4x8 = true,
+ .lower_pack_snorm_4x8 = true,
+ .lower_ffma = true,
+ .lower_flrp32 = true,
+ .lower_fpow = true,
+ .lower_fsat = true,
+ .lower_fsqrt = true,
+ .lower_negate = true,
+ .native_integers = true,
+};
+
+
+#if 0
+static int
+count_nir_instrs(nir_shader *nir)
+{
+ int count = 0;
+ nir_foreach_function(function, nir) {
+ if (!function->impl)
+ continue;
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block)
+ count++;
+ }
+ }
+ return count;
+}
+#endif
+
+void
+v3d_nir_to_vir(struct v3d_compile *c)
+{
+ if (V3D_DEBUG & (V3D_DEBUG_NIR |
+ v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
+ fprintf(stderr, "%s prog %d/%d NIR:\n",
+ vir_get_stage_name(c),
+ c->program_id, c->variant_id);
+ nir_print_shader(c->s, stderr);
+ }
+
+ nir_to_vir(c);
+
+ switch (c->s->info.stage) {
+ case MESA_SHADER_FRAGMENT:
+ emit_frag_end(c);
+ break;
+ case MESA_SHADER_VERTEX:
+ emit_vert_end(c);
+ break;
+ default:
+ unreachable("bad stage");
+ }
+
+ if (V3D_DEBUG & (V3D_DEBUG_VIR |
+ v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
+ fprintf(stderr, "%s prog %d/%d pre-opt VIR:\n",
+ vir_get_stage_name(c),
+ c->program_id, c->variant_id);
+ vir_dump(c);
+ fprintf(stderr, "\n");
+ }
+
+ vir_optimize(c);
+ vir_lower_uniforms(c);
+
+ /* XXX: vir_schedule_instructions(c); */
+
+ if (V3D_DEBUG & (V3D_DEBUG_VIR |
+ v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
+ fprintf(stderr, "%s prog %d/%d VIR:\n",
+ vir_get_stage_name(c),
+ c->program_id, c->variant_id);
+ vir_dump(c);
+ fprintf(stderr, "\n");
+ }
+
+ v3d_vir_to_qpu(c);
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/qpu_schedule.c mesa-17.3.3/src/broadcom/compiler/qpu_schedule.c
--- mesa-17.2.4/src/broadcom/compiler/qpu_schedule.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/qpu_schedule.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,1365 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ *
+ * The basic model of the list scheduler is to take a basic block, compute a
+ * DAG of the dependencies, and make a list of the DAG heads. Heuristically
+ * pick a DAG head, then put all the children that are now DAG heads into the
+ * list of things to schedule.
+ *
+ * The goal of scheduling here is to pack pairs of operations together in a
+ * single QPU instruction.
+ */
+
+#include "qpu/qpu_disasm.h"
+#include "v3d_compiler.h"
+#include "util/ralloc.h"
+
+static bool debug;
+
+struct schedule_node_child;
+
+struct schedule_node {
+ struct list_head link;
+ struct qinst *inst;
+ struct schedule_node_child *children;
+ uint32_t child_count;
+ uint32_t child_array_size;
+ uint32_t parent_count;
+
+ /* Longest cycles + instruction_latency() of any parent of this node. */
+ uint32_t unblocked_time;
+
+ /**
+ * Minimum number of cycles from scheduling this instruction until the
+ * end of the program, based on the slowest dependency chain through
+ * the children.
+ */
+ uint32_t delay;
+
+ /**
+ * cycles between this instruction being scheduled and when its result
+ * can be consumed.
+ */
+ uint32_t latency;
+};
+
+struct schedule_node_child {
+ struct schedule_node *node;
+ bool write_after_read;
+};
+
+/* When walking the instructions in reverse, we need to swap before/after in
+ * add_dep().
+ */
+enum direction { F, R };
+
+struct schedule_state {
+ struct schedule_node *last_r[6];
+ struct schedule_node *last_rf[64];
+ struct schedule_node *last_sf;
+ struct schedule_node *last_vpm_read;
+ struct schedule_node *last_tmu_write;
+ struct schedule_node *last_tlb;
+ struct schedule_node *last_vpm;
+ struct schedule_node *last_unif;
+ struct schedule_node *last_rtop;
+ enum direction dir;
+ /* Estimated cycle when the current instruction would start. */
+ uint32_t time;
+};
+
+static void
+add_dep(struct schedule_state *state,
+ struct schedule_node *before,
+ struct schedule_node *after,
+ bool write)
+{
+ bool write_after_read = !write && state->dir == R;
+
+ if (!before || !after)
+ return;
+
+ assert(before != after);
+
+ if (state->dir == R) {
+ struct schedule_node *t = before;
+ before = after;
+ after = t;
+ }
+
+ for (int i = 0; i < before->child_count; i++) {
+ if (before->children[i].node == after &&
+ (before->children[i].write_after_read == write_after_read)) {
+ return;
+ }
+ }
+
+ if (before->child_array_size <= before->child_count) {
+ before->child_array_size = MAX2(before->child_array_size * 2, 16);
+ before->children = reralloc(before, before->children,
+ struct schedule_node_child,
+ before->child_array_size);
+ }
+
+ before->children[before->child_count].node = after;
+ before->children[before->child_count].write_after_read =
+ write_after_read;
+ before->child_count++;
+ after->parent_count++;
+}
+
+static void
+add_read_dep(struct schedule_state *state,
+ struct schedule_node *before,
+ struct schedule_node *after)
+{
+ add_dep(state, before, after, false);
+}
+
+static void
+add_write_dep(struct schedule_state *state,
+ struct schedule_node **before,
+ struct schedule_node *after)
+{
+ add_dep(state, *before, after, true);
+ *before = after;
+}
+
+static bool
+qpu_inst_is_tlb(const struct v3d_qpu_instr *inst)
+{
+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+ return false;
+
+ if (inst->alu.add.magic_write &&
+ (inst->alu.add.waddr == V3D_QPU_WADDR_TLB ||
+ inst->alu.add.waddr == V3D_QPU_WADDR_TLBU))
+ return true;
+
+ if (inst->alu.mul.magic_write &&
+ (inst->alu.mul.waddr == V3D_QPU_WADDR_TLB ||
+ inst->alu.mul.waddr == V3D_QPU_WADDR_TLBU))
+ return true;
+
+ return false;
+}
+
+static void
+process_mux_deps(struct schedule_state *state, struct schedule_node *n,
+ enum v3d_qpu_mux mux)
+{
+ switch (mux) {
+ case V3D_QPU_MUX_A:
+ add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
+ break;
+ case V3D_QPU_MUX_B:
+ add_read_dep(state, state->last_rf[n->inst->qpu.raddr_b], n);
+ break;
+ default:
+ add_read_dep(state, state->last_r[mux - V3D_QPU_MUX_R0], n);
+ break;
+ }
+}
+
+
+static void
+process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
+ uint32_t waddr, bool magic)
+{
+ if (!magic) {
+ add_write_dep(state, &state->last_rf[waddr], n);
+ } else if (v3d_qpu_magic_waddr_is_tmu(waddr)) {
+ add_write_dep(state, &state->last_tmu_write, n);
+ } else if (v3d_qpu_magic_waddr_is_sfu(waddr)) {
+ /* Handled by v3d_qpu_writes_r4() check. */
+ } else {
+ switch (waddr) {
+ case V3D_QPU_WADDR_R0:
+ case V3D_QPU_WADDR_R1:
+ case V3D_QPU_WADDR_R2:
+ case V3D_QPU_WADDR_R3:
+ case V3D_QPU_WADDR_R4:
+ case V3D_QPU_WADDR_R5:
+ add_write_dep(state,
+ &state->last_r[waddr - V3D_QPU_WADDR_R0],
+ n);
+ break;
+
+ case V3D_QPU_WADDR_VPM:
+ case V3D_QPU_WADDR_VPMU:
+ add_write_dep(state, &state->last_vpm, n);
+ break;
+
+ case V3D_QPU_WADDR_TLB:
+ case V3D_QPU_WADDR_TLBU:
+ add_write_dep(state, &state->last_tlb, n);
+ break;
+
+ case V3D_QPU_WADDR_NOP:
+ break;
+
+ default:
+ fprintf(stderr, "Unknown waddr %d\n", waddr);
+ abort();
+ }
+ }
+}
+
+static void
+process_cond_deps(struct schedule_state *state, struct schedule_node *n,
+ enum v3d_qpu_cond cond)
+{
+ if (cond != V3D_QPU_COND_NONE)
+ add_read_dep(state, state->last_sf, n);
+}
+
+static void
+process_pf_deps(struct schedule_state *state, struct schedule_node *n,
+ enum v3d_qpu_pf pf)
+{
+ if (pf != V3D_QPU_PF_NONE)
+ add_write_dep(state, &state->last_sf, n);
+}
+
+static void
+process_uf_deps(struct schedule_state *state, struct schedule_node *n,
+ enum v3d_qpu_uf uf)
+{
+ if (uf != V3D_QPU_UF_NONE)
+ add_write_dep(state, &state->last_sf, n);
+}
+
+/**
+ * Common code for dependencies that need to be tracked both forward and
+ * backward.
+ *
+ * This is for things like "all reads of r4 have to happen between the r4
+ * writes that surround them".
+ */
+static void
+calculate_deps(struct schedule_state *state, struct schedule_node *n)
+{
+ struct qinst *qinst = n->inst;
+ struct v3d_qpu_instr *inst = &qinst->qpu;
+
+ if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
+ if (inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS)
+ add_read_dep(state, state->last_sf, n);
+
+ /* XXX: BDI */
+ /* XXX: BDU */
+ /* XXX: ub */
+ /* XXX: raddr_a */
+
+ add_write_dep(state, &state->last_unif, n);
+ return;
+ }
+
+ assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
+
+ /* XXX: LOAD_IMM */
+
+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0)
+ process_mux_deps(state, n, inst->alu.add.a);
+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1)
+ process_mux_deps(state, n, inst->alu.add.b);
+
+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0)
+ process_mux_deps(state, n, inst->alu.mul.a);
+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1)
+ process_mux_deps(state, n, inst->alu.mul.b);
+
+ switch (inst->alu.add.op) {
+ case V3D_QPU_A_VPMSETUP:
+ /* Could distinguish read/write by unpacking the uniform. */
+ add_write_dep(state, &state->last_vpm, n);
+ add_write_dep(state, &state->last_vpm_read, n);
+ break;
+
+ case V3D_QPU_A_STVPMV:
+ case V3D_QPU_A_STVPMD:
+ case V3D_QPU_A_STVPMP:
+ add_write_dep(state, &state->last_vpm, n);
+ break;
+
+ case V3D_QPU_A_MSF:
+ add_read_dep(state, state->last_tlb, n);
+ break;
+
+ case V3D_QPU_A_SETMSF:
+ case V3D_QPU_A_SETREVF:
+ add_write_dep(state, &state->last_tlb, n);
+ break;
+
+ case V3D_QPU_A_FLAPUSH:
+ case V3D_QPU_A_FLBPUSH:
+ case V3D_QPU_A_VFLA:
+ case V3D_QPU_A_VFLNA:
+ case V3D_QPU_A_VFLB:
+ case V3D_QPU_A_VFLNB:
+ add_read_dep(state, state->last_sf, n);
+ break;
+
+ case V3D_QPU_A_FLBPOP:
+ add_write_dep(state, &state->last_sf, n);
+ break;
+
+ default:
+ break;
+ }
+
+ switch (inst->alu.mul.op) {
+ case V3D_QPU_M_MULTOP:
+ case V3D_QPU_M_UMUL24:
+ /* MULTOP sets rtop, and UMUL24 implicitly reads rtop and
+ * resets it to 0. We could possibly reorder umul24s relative
+ * to each other, but for now just keep all the MUL parts in
+ * order.
+ */
+ add_write_dep(state, &state->last_rtop, n);
+ break;
+ default:
+ break;
+ }
+
+ if (inst->alu.add.op != V3D_QPU_A_NOP) {
+ process_waddr_deps(state, n, inst->alu.add.waddr,
+ inst->alu.add.magic_write);
+ }
+ if (inst->alu.mul.op != V3D_QPU_M_NOP) {
+ process_waddr_deps(state, n, inst->alu.mul.waddr,
+ inst->alu.mul.magic_write);
+ }
+
+ if (v3d_qpu_writes_r3(inst))
+ add_write_dep(state, &state->last_r[3], n);
+ if (v3d_qpu_writes_r4(inst))
+ add_write_dep(state, &state->last_r[4], n);
+ if (v3d_qpu_writes_r5(inst))
+ add_write_dep(state, &state->last_r[5], n);
+
+ if (inst->sig.thrsw) {
+ /* All accumulator contents and flags are undefined after the
+ * switch.
+ */
+ for (int i = 0; i < ARRAY_SIZE(state->last_r); i++)
+ add_write_dep(state, &state->last_r[i], n);
+ add_write_dep(state, &state->last_sf, n);
+
+ /* Scoreboard-locking operations have to stay after the last
+ * thread switch.
+ */
+ add_write_dep(state, &state->last_tlb, n);
+
+ add_write_dep(state, &state->last_tmu_write, n);
+ }
+
+ if (inst->sig.ldtmu) {
+ /* TMU loads are coming from a FIFO, so ordering is important.
+ */
+ add_write_dep(state, &state->last_tmu_write, n);
+ }
+
+ if (inst->sig.ldtlb | inst->sig.ldtlbu)
+ add_read_dep(state, state->last_tlb, n);
+
+ if (inst->sig.ldvpm)
+ add_write_dep(state, &state->last_vpm_read, n);
+
+ /* inst->sig.ldunif or sideband uniform read */
+ if (qinst->uniform != ~0)
+ add_write_dep(state, &state->last_unif, n);
+
+ process_cond_deps(state, n, inst->flags.ac);
+ process_cond_deps(state, n, inst->flags.mc);
+ process_pf_deps(state, n, inst->flags.apf);
+ process_pf_deps(state, n, inst->flags.mpf);
+ process_uf_deps(state, n, inst->flags.auf);
+ process_uf_deps(state, n, inst->flags.muf);
+}
+
+static void
+calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list)
+{
+ struct schedule_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.dir = F;
+
+ list_for_each_entry(struct schedule_node, node, schedule_list, link)
+ calculate_deps(&state, node);
+}
+
+static void
+calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list)
+{
+ struct list_head *node;
+ struct schedule_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.dir = R;
+
+ for (node = schedule_list->prev; schedule_list != node; node = node->prev) {
+ calculate_deps(&state, (struct schedule_node *)node);
+ }
+}
+
+struct choose_scoreboard {
+ int tick;
+ int last_sfu_write_tick;
+ int last_ldvary_tick;
+ int last_uniforms_reset_tick;
+ uint32_t last_waddr_add, last_waddr_mul;
+ bool tlb_locked;
+};
+
+static bool
+mux_reads_too_soon(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
+{
+ switch (mux) {
+ case V3D_QPU_MUX_A:
+ if (scoreboard->last_waddr_add == inst->raddr_a ||
+ scoreboard->last_waddr_mul == inst->raddr_a) {
+ return true;
+ }
+ break;
+
+ case V3D_QPU_MUX_B:
+ if (scoreboard->last_waddr_add == inst->raddr_b ||
+ scoreboard->last_waddr_mul == inst->raddr_b) {
+ return true;
+ }
+ break;
+
+ case V3D_QPU_MUX_R4:
+ if (scoreboard->tick - scoreboard->last_sfu_write_tick <= 2)
+ return true;
+ break;
+
+ case V3D_QPU_MUX_R5:
+ if (scoreboard->tick - scoreboard->last_ldvary_tick <= 1)
+ return true;
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static bool
+reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
+ struct qinst *qinst)
+{
+ const struct v3d_qpu_instr *inst = &qinst->qpu;
+
+ /* XXX: Branching off of raddr. */
+ if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
+ return false;
+
+ assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
+
+ if (inst->alu.add.op != V3D_QPU_A_NOP) {
+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0 &&
+ mux_reads_too_soon(scoreboard, inst, inst->alu.add.a)) {
+ return true;
+ }
+ if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1 &&
+ mux_reads_too_soon(scoreboard, inst, inst->alu.add.b)) {
+ return true;
+ }
+ }
+
+ if (inst->alu.mul.op != V3D_QPU_M_NOP) {
+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0 &&
+ mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a)) {
+ return true;
+ }
+ if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1 &&
+ mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b)) {
+ return true;
+ }
+ }
+
+ /* XXX: imm */
+
+ return false;
+}
+
+static bool
+writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
+ struct qinst *qinst)
+{
+ const struct v3d_qpu_instr *inst = &qinst->qpu;
+
+ /* Don't schedule any other r4 write too soon after an SFU write.
+ * This would normally be prevented by dependency tracking, but might
+ * occur if a dead SFU computation makes it to scheduling.
+ */
+ if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
+ v3d_qpu_writes_r4(inst))
+ return true;
+
+ return false;
+}
+
+static bool
+pixel_scoreboard_too_soon(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst));
+}
+
+static int
+get_instruction_priority(const struct v3d_qpu_instr *inst)
+{
+ uint32_t baseline_score;
+ uint32_t next_score = 0;
+
+ /* Schedule TLB operations as late as possible, to get more
+ * parallelism between shaders.
+ */
+ if (qpu_inst_is_tlb(inst))
+ return next_score;
+ next_score++;
+
+ /* Schedule texture read results collection late to hide latency. */
+ if (inst->sig.ldtmu)
+ return next_score;
+ next_score++;
+
+ /* Default score for things that aren't otherwise special. */
+ baseline_score = next_score;
+ next_score++;
+
+ /* Schedule texture read setup early to hide their latency better. */
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
+ ((inst->alu.add.magic_write &&
+ v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) ||
+ (inst->alu.mul.magic_write &&
+ v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr)))) {
+ return next_score;
+ }
+ next_score++;
+
+ return baseline_score;
+}
+
+static bool
+qpu_magic_waddr_is_periph(enum v3d_qpu_waddr waddr)
+{
+ return (v3d_qpu_magic_waddr_is_tmu(waddr) ||
+ v3d_qpu_magic_waddr_is_sfu(waddr) ||
+ v3d_qpu_magic_waddr_is_tlb(waddr) ||
+ v3d_qpu_magic_waddr_is_vpm(waddr) ||
+ v3d_qpu_magic_waddr_is_tsy(waddr));
+}
+
+static bool
+qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
+{
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+ if (inst->alu.add.op != V3D_QPU_A_NOP &&
+ inst->alu.add.magic_write &&
+ qpu_magic_waddr_is_periph(inst->alu.add.waddr)) {
+ return true;
+ }
+
+ if (inst->alu.add.op == V3D_QPU_A_VPMSETUP)
+ return true;
+
+ if (inst->alu.mul.op != V3D_QPU_M_NOP &&
+ inst->alu.mul.magic_write &&
+ qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) {
+ return true;
+ }
+ }
+
+ return (inst->sig.ldvpm ||
+ inst->sig.ldtmu ||
+ inst->sig.ldtlb ||
+ inst->sig.ldtlbu);
+}
+
+static bool
+qpu_merge_inst(const struct v3d_device_info *devinfo,
+ struct v3d_qpu_instr *result,
+ const struct v3d_qpu_instr *a,
+ const struct v3d_qpu_instr *b)
+{
+ if (a->type != V3D_QPU_INSTR_TYPE_ALU ||
+ b->type != V3D_QPU_INSTR_TYPE_ALU) {
+ return false;
+ }
+
+ /* Can't do more than one peripheral access in an instruction. */
+ if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
+ return false;
+
+ struct v3d_qpu_instr merge = *a;
+
+ if (b->alu.add.op != V3D_QPU_A_NOP) {
+ if (a->alu.add.op != V3D_QPU_A_NOP)
+ return false;
+ merge.alu.add = b->alu.add;
+
+ merge.flags.ac = b->flags.ac;
+ merge.flags.apf = b->flags.apf;
+ merge.flags.auf = b->flags.auf;
+ }
+
+ if (b->alu.mul.op != V3D_QPU_M_NOP) {
+ if (a->alu.mul.op != V3D_QPU_M_NOP)
+ return false;
+ merge.alu.mul = b->alu.mul;
+
+ merge.flags.mc = b->flags.mc;
+ merge.flags.mpf = b->flags.mpf;
+ merge.flags.muf = b->flags.muf;
+ }
+
+ if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_A)) {
+ if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_A) &&
+ a->raddr_a != b->raddr_a) {
+ return false;
+ }
+ merge.raddr_a = b->raddr_a;
+ }
+
+ if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_B)) {
+ if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_B) &&
+ a->raddr_b != b->raddr_b) {
+ return false;
+ }
+ merge.raddr_b = b->raddr_b;
+ }
+
+ merge.sig.thrsw |= b->sig.thrsw;
+ merge.sig.ldunif |= b->sig.ldunif;
+ merge.sig.ldtmu |= b->sig.ldtmu;
+ merge.sig.ldvary |= b->sig.ldvary;
+ merge.sig.ldvpm |= b->sig.ldvpm;
+ merge.sig.small_imm |= b->sig.small_imm;
+ merge.sig.ldtlb |= b->sig.ldtlb;
+ merge.sig.ldtlbu |= b->sig.ldtlbu;
+ merge.sig.ucb |= b->sig.ucb;
+ merge.sig.rotate |= b->sig.rotate;
+ merge.sig.wrtmuc |= b->sig.wrtmuc;
+
+ uint64_t packed;
+ bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
+
+ *result = merge;
+ /* No modifying the real instructions on failure. */
+ assert(ok || (a != result && b != result));
+
+ return ok;
+}
+
+static struct schedule_node *
+choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
+ struct choose_scoreboard *scoreboard,
+ struct list_head *schedule_list,
+ struct schedule_node *prev_inst)
+{
+ struct schedule_node *chosen = NULL;
+ int chosen_prio = 0;
+
+ /* Don't pair up anything with a thread switch signal -- emit_thrsw()
+ * will handle pairing it along with filling the delay slots.
+ */
+ if (prev_inst) {
+ if (prev_inst->inst->qpu.sig.thrsw)
+ return NULL;
+ }
+
+ list_for_each_entry(struct schedule_node, n, schedule_list, link) {
+ const struct v3d_qpu_instr *inst = &n->inst->qpu;
+
+ /* Don't choose the branch instruction until it's the last one
+ * left. We'll move it up to fit its delay slots after we
+ * choose it.
+ */
+ if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH &&
+ !list_is_singular(schedule_list)) {
+ continue;
+ }
+
+ /* "An instruction must not read from a location in physical
+ * regfile A or B that was written to by the previous
+ * instruction."
+ */
+ if (reads_too_soon_after_write(scoreboard, n->inst))
+ continue;
+
+ if (writes_too_soon_after_write(scoreboard, n->inst))
+ continue;
+
+ /* "A scoreboard wait must not occur in the first two
+ * instructions of a fragment shader. This is either the
+ * explicit Wait for Scoreboard signal or an implicit wait
+ * with the first tile-buffer read or write instruction."
+ */
+ if (pixel_scoreboard_too_soon(scoreboard, inst))
+ continue;
+
+ /* ldunif and ldvary both write r5, but ldunif does so a tick
+ * sooner. If the ldvary's r5 wasn't used, then ldunif might
+ * otherwise get scheduled so ldunif and ldvary try to update
+ * r5 in the same tick.
+ */
+ if (inst->sig.ldunif &&
+ scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
+ continue;
+ }
+
+ /* If we're trying to pair with another instruction, check
+ * that they're compatible.
+ */
+ if (prev_inst) {
+ /* Don't pair up a thread switch signal -- we'll
+ * handle pairing it when we pick it on its own.
+ */
+ if (inst->sig.thrsw)
+ continue;
+
+ if (prev_inst->inst->uniform != -1 &&
+ n->inst->uniform != -1)
+ continue;
+
+ /* Don't merge in something that will lock the TLB.
+ * Hopwefully what we have in inst will release some
+ * other instructions, allowing us to delay the
+ * TLB-locking instruction until later.
+ */
+ if (!scoreboard->tlb_locked && qpu_inst_is_tlb(inst))
+ continue;
+
+ struct v3d_qpu_instr merged_inst;
+ if (!qpu_merge_inst(devinfo, &merged_inst,
+ &prev_inst->inst->qpu, inst)) {
+ continue;
+ }
+ }
+
+ int prio = get_instruction_priority(inst);
+
+ /* Found a valid instruction. If nothing better comes along,
+ * this one works.
+ */
+ if (!chosen) {
+ chosen = n;
+ chosen_prio = prio;
+ continue;
+ }
+
+ if (prio > chosen_prio) {
+ chosen = n;
+ chosen_prio = prio;
+ } else if (prio < chosen_prio) {
+ continue;
+ }
+
+ if (n->delay > chosen->delay) {
+ chosen = n;
+ chosen_prio = prio;
+ } else if (n->delay < chosen->delay) {
+ continue;
+ }
+ }
+
+ return chosen;
+}
+
+static void
+update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
+ enum v3d_qpu_waddr waddr)
+{
+ if (v3d_qpu_magic_waddr_is_sfu(waddr))
+ scoreboard->last_sfu_write_tick = scoreboard->tick;
+}
+
+static void
+update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ scoreboard->last_waddr_add = ~0;
+ scoreboard->last_waddr_mul = ~0;
+
+ if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
+ return;
+
+ assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
+
+ if (inst->alu.add.op != V3D_QPU_A_NOP) {
+ if (inst->alu.add.magic_write) {
+ update_scoreboard_for_magic_waddr(scoreboard,
+ inst->alu.add.waddr);
+ } else {
+ scoreboard->last_waddr_add = inst->alu.add.waddr;
+ }
+ }
+
+ if (inst->alu.mul.op != V3D_QPU_M_NOP) {
+ if (inst->alu.mul.magic_write) {
+ update_scoreboard_for_magic_waddr(scoreboard,
+ inst->alu.mul.waddr);
+ } else {
+ scoreboard->last_waddr_mul = inst->alu.mul.waddr;
+ }
+ }
+
+ if (inst->sig.ldvary)
+ scoreboard->last_ldvary_tick = scoreboard->tick;
+
+ if (qpu_inst_is_tlb(inst))
+ scoreboard->tlb_locked = true;
+}
+
+static void
+dump_state(const struct v3d_device_info *devinfo,
+ struct list_head *schedule_list)
+{
+ list_for_each_entry(struct schedule_node, n, schedule_list, link) {
+ fprintf(stderr, " t=%4d: ", n->unblocked_time);
+ v3d_qpu_dump(devinfo, &n->inst->qpu);
+ fprintf(stderr, "\n");
+
+ for (int i = 0; i < n->child_count; i++) {
+ struct schedule_node *child = n->children[i].node;
+ if (!child)
+ continue;
+
+ fprintf(stderr, " - ");
+ v3d_qpu_dump(devinfo, &child->inst->qpu);
+ fprintf(stderr, " (%d parents, %c)\n",
+ child->parent_count,
+ n->children[i].write_after_read ? 'w' : 'r');
+ }
+ }
+}
+
+static uint32_t magic_waddr_latency(enum v3d_qpu_waddr waddr,
+ const struct v3d_qpu_instr *after)
+{
+ /* Apply some huge latency between texture fetch requests and getting
+ * their results back.
+ *
+ * FIXME: This is actually pretty bogus. If we do:
+ *
+ * mov tmu0_s, a
+ *
+ * mov tmu0_s, b
+ * load_tmu0
+ *
+ * load_tmu0
+ *
+ * we count that as worse than
+ *
+ * mov tmu0_s, a
+ * mov tmu0_s, b
+ *
+ * load_tmu0
+ *
+ * load_tmu0
+ *
+ * because we associate the first load_tmu0 with the *second* tmu0_s.
+ */
+ if (v3d_qpu_magic_waddr_is_tmu(waddr) && after->sig.ldtmu)
+ return 100;
+
+ /* Assume that anything depending on us is consuming the SFU result. */
+ if (v3d_qpu_magic_waddr_is_sfu(waddr))
+ return 3;
+
+ return 1;
+}
+
+static uint32_t
+instruction_latency(struct schedule_node *before, struct schedule_node *after)
+{
+ const struct v3d_qpu_instr *before_inst = &before->inst->qpu;
+ const struct v3d_qpu_instr *after_inst = &after->inst->qpu;
+ uint32_t latency = 1;
+
+ if (before_inst->type != V3D_QPU_INSTR_TYPE_ALU ||
+ after_inst->type != V3D_QPU_INSTR_TYPE_ALU)
+ return latency;
+
+ if (before_inst->alu.add.magic_write) {
+ latency = MAX2(latency,
+ magic_waddr_latency(before_inst->alu.add.waddr,
+ after_inst));
+ }
+
+ if (before_inst->alu.mul.magic_write) {
+ latency = MAX2(latency,
+ magic_waddr_latency(before_inst->alu.mul.waddr,
+ after_inst));
+ }
+
+ return latency;
+}
+
+/** Recursive computation of the delay member of a node. */
+static void
+compute_delay(struct schedule_node *n)
+{
+ if (!n->child_count) {
+ n->delay = 1;
+ } else {
+ for (int i = 0; i < n->child_count; i++) {
+ if (!n->children[i].node->delay)
+ compute_delay(n->children[i].node);
+ n->delay = MAX2(n->delay,
+ n->children[i].node->delay +
+ instruction_latency(n, n->children[i].node));
+ }
+ }
+}
+
+static void
+mark_instruction_scheduled(struct list_head *schedule_list,
+ uint32_t time,
+ struct schedule_node *node,
+ bool war_only)
+{
+ if (!node)
+ return;
+
+ for (int i = node->child_count - 1; i >= 0; i--) {
+ struct schedule_node *child =
+ node->children[i].node;
+
+ if (!child)
+ continue;
+
+ if (war_only && !node->children[i].write_after_read)
+ continue;
+
+ /* If the requirement is only that the node not appear before
+ * the last read of its destination, then it can be scheduled
+ * immediately after (or paired with!) the thing reading the
+ * destination.
+ */
+ uint32_t latency = 0;
+ if (!war_only) {
+ latency = instruction_latency(node,
+ node->children[i].node);
+ }
+
+ child->unblocked_time = MAX2(child->unblocked_time,
+ time + latency);
+ child->parent_count--;
+ if (child->parent_count == 0)
+ list_add(&child->link, schedule_list);
+
+ node->children[i].node = NULL;
+ }
+}
+
+static struct qinst *
+vir_nop()
+{
+ struct qreg undef = { QFILE_NULL, 0 };
+ struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
+
+ return qinst;
+}
+
+#if 0
+static struct qinst *
+nop_after(struct qinst *inst)
+{
+ struct qinst *q = vir_nop();
+
+ list_add(&q->link, &inst->link);
+
+ return q;
+}
+
+/**
+ * Emits a THRSW/LTHRSW signal in the stream, trying to move it up to pair
+ * with another instruction.
+ */
+static void
+emit_thrsw(struct v3d_compile *c,
+ struct choose_scoreboard *scoreboard,
+ const struct v3d_qpu_instr *inst)
+{
+ /* There should be nothing in a thrsw inst being scheduled other than
+ * the signal bits.
+ */
+ assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
+ assert(inst->alu.add.op == V3D_QPU_A_NOP);
+ assert(inst->alu.mul.op == V3D_QPU_M_NOP);
+
+ /* Try to find an earlier scheduled instruction that we can merge the
+ * thrsw into.
+ */
+ int thrsw_ip = c->qpu_inst_count;
+ for (int i = 1; i <= MIN2(c->qpu_inst_count, 3); i++) {
+ uint64_t prev_instr = c->qpu_insts[c->qpu_inst_count - i];
+ uint32_t prev_sig = QPU_GET_FIELD(prev_instr, QPU_SIG);
+
+ if (prev_sig == QPU_SIG_NONE)
+ thrsw_ip = c->qpu_inst_count - i;
+ }
+
+ if (thrsw_ip != c->qpu_inst_count) {
+ /* Merge the thrsw into the existing instruction. */
+ c->qpu_insts[thrsw_ip] =
+ QPU_UPDATE_FIELD(c->qpu_insts[thrsw_ip], sig, QPU_SIG);
+ } else {
+ qpu_serialize_one_inst(c, inst);
+ update_scoreboard_for_chosen(scoreboard, inst);
+ }
+
+ /* Fill the delay slots. */
+ while (c->qpu_inst_count < thrsw_ip + 3) {
+ update_scoreboard_for_chosen(scoreboard, v3d_qpu_nop());
+ qpu_serialize_one_inst(c, v3d_qpu_nop());
+ }
+}
+#endif
+
+static uint32_t
+schedule_instructions(struct v3d_compile *c,
+ struct choose_scoreboard *scoreboard,
+ struct qblock *block,
+ struct list_head *schedule_list,
+ enum quniform_contents *orig_uniform_contents,
+ uint32_t *orig_uniform_data,
+ uint32_t *next_uniform)
+{
+ const struct v3d_device_info *devinfo = c->devinfo;
+ uint32_t time = 0;
+
+ if (debug) {
+ fprintf(stderr, "initial deps:\n");
+ dump_state(devinfo, schedule_list);
+ fprintf(stderr, "\n");
+ }
+
+ /* Remove non-DAG heads from the list. */
+ list_for_each_entry_safe(struct schedule_node, n, schedule_list, link) {
+ if (n->parent_count != 0)
+ list_del(&n->link);
+ }
+
+ while (!list_empty(schedule_list)) {
+ struct schedule_node *chosen =
+ choose_instruction_to_schedule(devinfo,
+ scoreboard,
+ schedule_list,
+ NULL);
+ struct schedule_node *merge = NULL;
+
+ /* If there are no valid instructions to schedule, drop a NOP
+ * in.
+ */
+ struct qinst *qinst = chosen ? chosen->inst : vir_nop();
+ struct v3d_qpu_instr *inst = &qinst->qpu;
+
+ if (debug) {
+ fprintf(stderr, "t=%4d: current list:\n",
+ time);
+ dump_state(devinfo, schedule_list);
+ fprintf(stderr, "t=%4d: chose: ", time);
+ v3d_qpu_dump(devinfo, inst);
+ fprintf(stderr, "\n");
+ }
+
+ /* Schedule this instruction onto the QPU list. Also try to
+ * find an instruction to pair with it.
+ */
+ if (chosen) {
+ time = MAX2(chosen->unblocked_time, time);
+ list_del(&chosen->link);
+ mark_instruction_scheduled(schedule_list, time,
+ chosen, true);
+
+ merge = choose_instruction_to_schedule(devinfo,
+ scoreboard,
+ schedule_list,
+ chosen);
+ if (merge) {
+ time = MAX2(merge->unblocked_time, time);
+ list_del(&merge->link);
+ (void)qpu_merge_inst(devinfo, inst,
+ inst, &merge->inst->qpu);
+ if (merge->inst->uniform != -1) {
+ chosen->inst->uniform =
+ merge->inst->uniform;
+ }
+
+ if (debug) {
+ fprintf(stderr, "t=%4d: merging: ",
+ time);
+ v3d_qpu_dump(devinfo, &merge->inst->qpu);
+ fprintf(stderr, "\n");
+ fprintf(stderr, " result: ");
+ v3d_qpu_dump(devinfo, inst);
+ fprintf(stderr, "\n");
+ }
+ }
+ }
+
+ /* Update the uniform index for the rewritten location --
+ * branch target updating will still need to change
+ * c->uniform_data[] using this index.
+ */
+ if (qinst->uniform != -1) {
+ if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
+ block->branch_uniform = *next_uniform;
+
+ c->uniform_data[*next_uniform] =
+ orig_uniform_data[qinst->uniform];
+ c->uniform_contents[*next_uniform] =
+ orig_uniform_contents[qinst->uniform];
+ qinst->uniform = *next_uniform;
+ (*next_uniform)++;
+ }
+
+ if (debug) {
+ fprintf(stderr, "\n");
+ }
+
+ /* Now that we've scheduled a new instruction, some of its
+ * children can be promoted to the list of instructions ready to
+ * be scheduled. Update the children's unblocked time for this
+ * DAG edge as we do so.
+ */
+ mark_instruction_scheduled(schedule_list, time, chosen, false);
+
+ if (merge) {
+ mark_instruction_scheduled(schedule_list, time, merge,
+ false);
+
+ /* The merged VIR instruction doesn't get re-added to the
+ * block, so free it now.
+ */
+ free(merge->inst);
+ }
+
+ if (0 && inst->sig.thrsw) {
+ /* XXX emit_thrsw(c, scoreboard, qinst); */
+ } else {
+ c->qpu_inst_count++;
+ list_addtail(&qinst->link, &block->instructions);
+ update_scoreboard_for_chosen(scoreboard, inst);
+ }
+
+ scoreboard->tick++;
+ time++;
+
+ if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH ||
+ inst->sig.thrsw /* XXX */) {
+ block->branch_qpu_ip = c->qpu_inst_count - 1;
+ /* Fill the delay slots.
+ *
+ * We should fill these with actual instructions,
+ * instead, but that will probably need to be done
+ * after this, once we know what the leading
+ * instructions of the successors are (so we can
+ * handle A/B register file write latency)
+ */
+ /* XXX: scoreboard */
+ int slots = (inst->type == V3D_QPU_INSTR_TYPE_BRANCH ?
+ 3 : 2);
+ for (int i = 0; i < slots; i++) {
+ struct qinst *nop = vir_nop();
+ list_addtail(&nop->link, &block->instructions);
+
+ update_scoreboard_for_chosen(scoreboard,
+ &nop->qpu);
+ c->qpu_inst_count++;
+ scoreboard->tick++;
+ time++;
+ }
+ }
+ }
+
+ return time;
+}
+
+static uint32_t
+qpu_schedule_instructions_block(struct v3d_compile *c,
+ struct choose_scoreboard *scoreboard,
+ struct qblock *block,
+ enum quniform_contents *orig_uniform_contents,
+ uint32_t *orig_uniform_data,
+ uint32_t *next_uniform)
+{
+ void *mem_ctx = ralloc_context(NULL);
+ struct list_head schedule_list;
+
+ list_inithead(&schedule_list);
+
+ /* Wrap each instruction in a scheduler structure. */
+ while (!list_empty(&block->instructions)) {
+ struct qinst *qinst = (struct qinst *)block->instructions.next;
+ struct schedule_node *n =
+ rzalloc(mem_ctx, struct schedule_node);
+
+ n->inst = qinst;
+
+ list_del(&qinst->link);
+ list_addtail(&n->link, &schedule_list);
+ }
+
+ calculate_forward_deps(c, &schedule_list);
+ calculate_reverse_deps(c, &schedule_list);
+
+ list_for_each_entry(struct schedule_node, n, &schedule_list, link) {
+ compute_delay(n);
+ }
+
+ uint32_t cycles = schedule_instructions(c, scoreboard, block,
+ &schedule_list,
+ orig_uniform_contents,
+ orig_uniform_data,
+ next_uniform);
+
+ ralloc_free(mem_ctx);
+
+ return cycles;
+}
+
+static void
+qpu_set_branch_targets(struct v3d_compile *c)
+{
+ vir_for_each_block(block, c) {
+ /* The end block of the program has no branch. */
+ if (!block->successors[0])
+ continue;
+
+ /* If there was no branch instruction, then the successor
+ * block must follow immediately after this one.
+ */
+ if (block->branch_qpu_ip == ~0) {
+ assert(block->end_qpu_ip + 1 ==
+ block->successors[0]->start_qpu_ip);
+ continue;
+ }
+
+ /* Walk back through the delay slots to find the branch
+ * instr.
+ */
+ struct list_head *entry = block->instructions.prev;
+ for (int i = 0; i < 3; i++)
+ entry = entry->prev;
+ struct qinst *branch = container_of(entry, branch, link);
+ assert(branch->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
+
+ /* Make sure that the if-we-don't-jump
+ * successor was scheduled just after the
+ * delay slots.
+ */
+ assert(!block->successors[1] ||
+ block->successors[1]->start_qpu_ip ==
+ block->branch_qpu_ip + 4);
+
+ branch->qpu.branch.offset =
+ ((block->successors[0]->start_qpu_ip -
+ (block->branch_qpu_ip + 4)) *
+ sizeof(uint64_t));
+
+ /* Set up the relative offset to jump in the
+ * uniform stream.
+ *
+ * Use a temporary here, because
+ * uniform_data[inst->uniform] may be shared
+ * between multiple instructions.
+ */
+ assert(c->uniform_contents[branch->uniform] == QUNIFORM_CONSTANT);
+ c->uniform_data[branch->uniform] =
+ (block->successors[0]->start_uniform -
+ (block->branch_uniform + 1)) * 4;
+ }
+}
+
+uint32_t
+v3d_qpu_schedule_instructions(struct v3d_compile *c)
+{
+ const struct v3d_device_info *devinfo = c->devinfo;
+
+ /* We reorder the uniforms as we schedule instructions, so save the
+ * old data off and replace it.
+ */
+ uint32_t *uniform_data = c->uniform_data;
+ enum quniform_contents *uniform_contents = c->uniform_contents;
+ c->uniform_contents = ralloc_array(c, enum quniform_contents,
+ c->num_uniforms);
+ c->uniform_data = ralloc_array(c, uint32_t, c->num_uniforms);
+ c->uniform_array_size = c->num_uniforms;
+ uint32_t next_uniform = 0;
+
+ struct choose_scoreboard scoreboard;
+ memset(&scoreboard, 0, sizeof(scoreboard));
+ scoreboard.last_waddr_add = ~0;
+ scoreboard.last_waddr_mul = ~0;
+ scoreboard.last_ldvary_tick = -10;
+ scoreboard.last_sfu_write_tick = -10;
+ scoreboard.last_uniforms_reset_tick = -10;
+
+ if (debug) {
+ fprintf(stderr, "Pre-schedule instructions\n");
+ vir_for_each_block(block, c) {
+ fprintf(stderr, "BLOCK %d\n", block->index);
+ list_for_each_entry(struct qinst, qinst,
+ &block->instructions, link) {
+ v3d_qpu_dump(devinfo, &qinst->qpu);
+ fprintf(stderr, "\n");
+ }
+ }
+ fprintf(stderr, "\n");
+ }
+
+ uint32_t cycles = 0;
+ vir_for_each_block(block, c) {
+ block->start_qpu_ip = c->qpu_inst_count;
+ block->branch_qpu_ip = ~0;
+ block->start_uniform = next_uniform;
+
+ cycles += qpu_schedule_instructions_block(c,
+ &scoreboard,
+ block,
+ uniform_contents,
+ uniform_data,
+ &next_uniform);
+
+ block->end_qpu_ip = c->qpu_inst_count - 1;
+ }
+
+ qpu_set_branch_targets(c);
+
+ assert(next_uniform == c->num_uniforms);
+
+ return cycles;
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/qpu_validate.c mesa-17.3.3/src/broadcom/compiler/qpu_validate.c
--- mesa-17.2.4/src/broadcom/compiler/qpu_validate.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/qpu_validate.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,208 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ *
+ * Validates the QPU instruction sequence after register allocation and
+ * scheduling.
+ */
+
+#include
+#include
+#include
+#include "v3d_compiler.h"
+#include "qpu/qpu_disasm.h"
+
+struct v3d_qpu_validate_state {
+ struct v3d_compile *c;
+ const struct v3d_qpu_instr *last;
+ int ip;
+ int last_sfu_write;
+};
+
+static void
+fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
+{
+ struct v3d_compile *c = state->c;
+
+ fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
+
+ int dump_ip = 0;
+ vir_for_each_inst_inorder(inst, c) {
+ v3d_qpu_dump(c->devinfo, &inst->qpu);
+
+ if (dump_ip++ == state->ip)
+ fprintf(stderr, " *** ERROR ***");
+
+ fprintf(stderr, "\n");
+ }
+
+ fprintf(stderr, "\n");
+ abort();
+}
+
+static bool
+qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
+ bool (*predicate)(enum v3d_qpu_waddr waddr))
+{
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
+ return false;
+
+ if (inst->alu.add.op != V3D_QPU_A_NOP &&
+ inst->alu.add.magic_write &&
+ predicate(inst->alu.add.waddr))
+ return true;
+
+ if (inst->alu.mul.op != V3D_QPU_M_NOP &&
+ inst->alu.mul.magic_write &&
+ predicate(inst->alu.mul.waddr))
+ return true;
+
+ return false;
+}
+
+static void
+qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
+{
+ const struct v3d_qpu_instr *inst = &qinst->qpu;
+
+ if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+ return;
+
+ /* LDVARY writes r5 two instructions later and LDUNIF writes
+ * r5 one instruction later, which is illegal to have
+ * together.
+ */
+ if (state->last && state->last->sig.ldvary && inst->sig.ldunif) {
+ fail_instr(state, "LDUNIF after a LDVARY");
+ }
+
+ int tmu_writes = 0;
+ int sfu_writes = 0;
+ int vpm_writes = 0;
+ int tlb_writes = 0;
+ int tsy_writes = 0;
+
+ if (inst->alu.add.op != V3D_QPU_A_NOP) {
+ if (inst->alu.add.magic_write) {
+ if (v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr))
+ tmu_writes++;
+ if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
+ sfu_writes++;
+ if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
+ vpm_writes++;
+ if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
+ tlb_writes++;
+ if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
+ tsy_writes++;
+ }
+ }
+
+ if (inst->alu.mul.op != V3D_QPU_M_NOP) {
+ if (inst->alu.mul.magic_write) {
+ if (v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))
+ tmu_writes++;
+ if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
+ sfu_writes++;
+ if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
+ vpm_writes++;
+ if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
+ tlb_writes++;
+ if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
+ tsy_writes++;
+ }
+ }
+
+ (void)qpu_magic_waddr_matches; /* XXX */
+
+ /* SFU r4 results come back two instructions later. No doing
+ * r4 read/writes or other SFU lookups until it's done.
+ */
+ if (state->ip - state->last_sfu_write < 2) {
+ if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
+ fail_instr(state, "R4 read too soon after SFU");
+
+ if (v3d_qpu_writes_r4(inst))
+ fail_instr(state, "R4 write too soon after SFU");
+
+ if (sfu_writes)
+ fail_instr(state, "SFU write too soon after SFU");
+ }
+
+ /* XXX: The docs say VPM can happen with the others, but the simulator
+ * disagrees.
+ */
+ if (tmu_writes +
+ sfu_writes +
+ vpm_writes +
+ tlb_writes +
+ tsy_writes +
+ inst->sig.ldtmu +
+ inst->sig.ldtlb +
+ inst->sig.ldvpm +
+ inst->sig.ldtlbu > 1) {
+ fail_instr(state,
+ "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
+ }
+
+ if (sfu_writes)
+ state->last_sfu_write = state->ip;
+}
+
+static void
+qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
+{
+ vir_for_each_inst(qinst, block) {
+ qpu_validate_inst(state, qinst);
+
+ state->last = &qinst->qpu;
+ state->ip++;
+ }
+}
+
+/**
+ * Checks for the instruction restrictions from page 37 ("Summary of
+ * Instruction Restrictions").
+ */
+void
+qpu_validate(struct v3d_compile *c)
+{
+ /* We don't want to do validation in release builds, but we want to
+ * keep compiling the validation code to make sure it doesn't get
+ * broken.
+ */
+#ifndef DEBUG
+ return;
+#endif
+
+ struct v3d_qpu_validate_state state = {
+ .c = c,
+ .last_sfu_write = -10,
+ .ip = 0,
+ };
+
+ vir_for_each_block(block, c) {
+ qpu_validate_block(&state, block);
+ }
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/v3d_compiler.h mesa-17.3.3/src/broadcom/compiler/v3d_compiler.h
--- mesa-17.2.4/src/broadcom/compiler/v3d_compiler.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/v3d_compiler.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,934 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef V3D_COMPILER_H
+#define V3D_COMPILER_H
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "util/macros.h"
+#include "common/v3d_debug.h"
+#include "compiler/nir/nir.h"
+#include "util/list.h"
+#include "util/u_math.h"
+
+#include "qpu/qpu_instr.h"
+#include "pipe/p_state.h"
+
+#define V3D_MAX_TEXTURE_SAMPLERS 32
+#define V3D_MAX_SAMPLES 4
+#define V3D_MAX_FS_INPUTS 64
+#define V3D_MAX_VS_INPUTS 64
+
+struct nir_builder;
+
+struct v3d_fs_inputs {
+ /**
+ * Array of the meanings of the VPM inputs this shader needs.
+ *
+ * It doesn't include those that aren't part of the VPM, like
+ * point/line coordinates.
+ */
+ struct v3d_varying_slot *input_slots;
+ uint32_t num_inputs;
+};
+
+enum qfile {
+ /** An unused source or destination register. */
+ QFILE_NULL,
+
+ /** A physical register, such as the W coordinate payload. */
+ QFILE_REG,
+ /** One of the regsiters for fixed function interactions. */
+ QFILE_MAGIC,
+
+ /**
+ * A virtual register, that will be allocated to actual accumulator
+ * or physical registers later.
+ */
+ QFILE_TEMP,
+ QFILE_VARY,
+ QFILE_UNIF,
+ QFILE_TLB,
+ QFILE_TLBU,
+
+ /**
+ * VPM reads use this with an index value to say what part of the VPM
+ * is being read.
+ */
+ QFILE_VPM,
+
+ /**
+ * Stores an immediate value in the index field that will be used
+ * directly by qpu_load_imm().
+ */
+ QFILE_LOAD_IMM,
+
+ /**
+ * Stores an immediate value in the index field that can be turned
+ * into a small immediate field by qpu_encode_small_immediate().
+ */
+ QFILE_SMALL_IMM,
+};
+
+/**
+ * A reference to a QPU register or a virtual temp register.
+ */
+struct qreg {
+ enum qfile file;
+ uint32_t index;
+};
+
+static inline struct qreg vir_reg(enum qfile file, uint32_t index)
+{
+ return (struct qreg){file, index};
+}
+
+/**
+ * A reference to an actual register at the QPU level, for register
+ * allocation.
+ */
+struct qpu_reg {
+ bool magic;
+ int index;
+};
+
+struct qinst {
+ /** Entry in qblock->instructions */
+ struct list_head link;
+
+ /**
+ * The instruction being wrapped. Its condition codes, pack flags,
+ * signals, etc. will all be used, with just the register references
+ * being replaced by the contents of qinst->dst and qinst->src[].
+ */
+ struct v3d_qpu_instr qpu;
+
+ /* Pre-register-allocation references to src/dst registers */
+ struct qreg dst;
+ struct qreg src[3];
+ bool cond_is_exec_mask;
+ bool has_implicit_uniform;
+
+ /* After vir_to_qpu.c: If instr reads a uniform, which uniform from
+ * the uncompiled stream it is.
+ */
+ int uniform;
+};
+
+enum quniform_contents {
+ /**
+ * Indicates that a constant 32-bit value is copied from the program's
+ * uniform contents.
+ */
+ QUNIFORM_CONSTANT,
+ /**
+ * Indicates that the program's uniform contents are used as an index
+ * into the GL uniform storage.
+ */
+ QUNIFORM_UNIFORM,
+
+ /** @{
+ * Scaling factors from clip coordinates to relative to the viewport
+ * center.
+ *
+ * This is used by the coordinate and vertex shaders to produce the
+ * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
+ * point offsets from the viewport ccenter.
+ */
+ QUNIFORM_VIEWPORT_X_SCALE,
+ QUNIFORM_VIEWPORT_Y_SCALE,
+ /** @} */
+
+ QUNIFORM_VIEWPORT_Z_OFFSET,
+ QUNIFORM_VIEWPORT_Z_SCALE,
+
+ QUNIFORM_USER_CLIP_PLANE,
+
+ /**
+ * A reference to a texture config parameter 0 uniform.
+ *
+ * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
+ * defines texture type, miplevels, and such. It will be found as a
+ * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
+ */
+ QUNIFORM_TEXTURE_CONFIG_P0_0,
+ QUNIFORM_TEXTURE_CONFIG_P0_1,
+ QUNIFORM_TEXTURE_CONFIG_P0_2,
+ QUNIFORM_TEXTURE_CONFIG_P0_3,
+ QUNIFORM_TEXTURE_CONFIG_P0_4,
+ QUNIFORM_TEXTURE_CONFIG_P0_5,
+ QUNIFORM_TEXTURE_CONFIG_P0_6,
+ QUNIFORM_TEXTURE_CONFIG_P0_7,
+ QUNIFORM_TEXTURE_CONFIG_P0_8,
+ QUNIFORM_TEXTURE_CONFIG_P0_9,
+ QUNIFORM_TEXTURE_CONFIG_P0_10,
+ QUNIFORM_TEXTURE_CONFIG_P0_11,
+ QUNIFORM_TEXTURE_CONFIG_P0_12,
+ QUNIFORM_TEXTURE_CONFIG_P0_13,
+ QUNIFORM_TEXTURE_CONFIG_P0_14,
+ QUNIFORM_TEXTURE_CONFIG_P0_15,
+ QUNIFORM_TEXTURE_CONFIG_P0_16,
+ QUNIFORM_TEXTURE_CONFIG_P0_17,
+ QUNIFORM_TEXTURE_CONFIG_P0_18,
+ QUNIFORM_TEXTURE_CONFIG_P0_19,
+ QUNIFORM_TEXTURE_CONFIG_P0_20,
+ QUNIFORM_TEXTURE_CONFIG_P0_21,
+ QUNIFORM_TEXTURE_CONFIG_P0_22,
+ QUNIFORM_TEXTURE_CONFIG_P0_23,
+ QUNIFORM_TEXTURE_CONFIG_P0_24,
+ QUNIFORM_TEXTURE_CONFIG_P0_25,
+ QUNIFORM_TEXTURE_CONFIG_P0_26,
+ QUNIFORM_TEXTURE_CONFIG_P0_27,
+ QUNIFORM_TEXTURE_CONFIG_P0_28,
+ QUNIFORM_TEXTURE_CONFIG_P0_29,
+ QUNIFORM_TEXTURE_CONFIG_P0_30,
+ QUNIFORM_TEXTURE_CONFIG_P0_31,
+ QUNIFORM_TEXTURE_CONFIG_P0_32,
+
+ /**
+ * A reference to a texture config parameter 1 uniform.
+ *
+ * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
+ * defines texture width, height, filters, and wrap modes. It will be
+ * found as a parameter to the second QOP_TEX_[STRB] instruction in a
+ * sequence.
+ */
+ QUNIFORM_TEXTURE_CONFIG_P1,
+
+ QUNIFORM_TEXTURE_FIRST_LEVEL,
+
+ QUNIFORM_TEXTURE_WIDTH,
+ QUNIFORM_TEXTURE_HEIGHT,
+ QUNIFORM_TEXTURE_DEPTH,
+ QUNIFORM_TEXTURE_ARRAY_SIZE,
+ QUNIFORM_TEXTURE_LEVELS,
+
+ QUNIFORM_TEXTURE_MSAA_ADDR,
+
+ QUNIFORM_UBO_ADDR,
+
+ QUNIFORM_TEXRECT_SCALE_X,
+ QUNIFORM_TEXRECT_SCALE_Y,
+
+ QUNIFORM_TEXTURE_BORDER_COLOR,
+
+ QUNIFORM_STENCIL,
+
+ QUNIFORM_ALPHA_REF,
+ QUNIFORM_SAMPLE_MASK,
+};
+
+struct v3d_varying_slot {
+ uint8_t slot_and_component;
+};
+
+static inline struct v3d_varying_slot
+v3d_slot_from_slot_and_component(uint8_t slot, uint8_t component)
+{
+ assert(slot < 255 / 4);
+ return (struct v3d_varying_slot){ (slot << 2) + component };
+}
+
+static inline uint8_t v3d_slot_get_slot(struct v3d_varying_slot slot)
+{
+ return slot.slot_and_component >> 2;
+}
+
+static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot)
+{
+ return slot.slot_and_component & 3;
+}
+
+struct v3d_ubo_range {
+ /**
+ * offset in bytes from the start of the ubo where this range is
+ * uploaded.
+ *
+ * Only set once used is set.
+ */
+ uint32_t dst_offset;
+
+ /**
+ * offset in bytes from the start of the gallium uniforms where the
+ * data comes from.
+ */
+ uint32_t src_offset;
+
+ /** size in bytes of this ubo range */
+ uint32_t size;
+};
+
+struct v3d_key {
+ void *shader_state;
+ struct {
+ uint8_t swizzle[4];
+ uint8_t return_size;
+ uint8_t return_channels;
+ union {
+ struct {
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned wrap_s:3;
+ unsigned wrap_t:3;
+ };
+ struct {
+ uint16_t msaa_width, msaa_height;
+ };
+ };
+ } tex[V3D_MAX_TEXTURE_SAMPLERS];
+ uint8_t ucp_enables;
+};
+
+struct v3d_fs_key {
+ struct v3d_key base;
+ bool depth_enabled;
+ bool is_points;
+ bool is_lines;
+ bool alpha_test;
+ bool point_coord_upper_left;
+ bool light_twoside;
+ bool msaa;
+ bool sample_coverage;
+ bool sample_alpha_to_coverage;
+ bool sample_alpha_to_one;
+ bool clamp_color;
+ uint8_t nr_cbufs;
+ uint8_t swap_color_rb;
+ /* Mask of which render targets need to be written as 32-bit floats */
+ uint8_t f32_color_rb;
+ uint8_t alpha_test_func;
+ uint8_t logicop_func;
+ uint32_t point_sprite_mask;
+
+ struct pipe_rt_blend_state blend;
+};
+
+struct v3d_vs_key {
+ struct v3d_key base;
+
+ struct v3d_varying_slot fs_inputs[V3D_MAX_FS_INPUTS];
+ uint8_t num_fs_inputs;
+
+ bool is_coord;
+ bool per_vertex_point_size;
+ bool clamp_color;
+};
+
+/** A basic block of VIR intructions. */
+struct qblock {
+ struct list_head link;
+
+ struct list_head instructions;
+
+ struct set *predecessors;
+ struct qblock *successors[2];
+
+ int index;
+
+ /* Instruction IPs for the first and last instruction of the block.
+ * Set by qpu_schedule.c.
+ */
+ uint32_t start_qpu_ip;
+ uint32_t end_qpu_ip;
+
+ /* Instruction IP for the branch instruction of the block. Set by
+ * qpu_schedule.c.
+ */
+ uint32_t branch_qpu_ip;
+
+ /** Offset within the uniform stream at the start of the block. */
+ uint32_t start_uniform;
+ /** Offset within the uniform stream of the branch instruction */
+ uint32_t branch_uniform;
+
+ /** @{ used by v3d_vir_live_variables.c */
+ BITSET_WORD *def;
+ BITSET_WORD *use;
+ BITSET_WORD *live_in;
+ BITSET_WORD *live_out;
+ int start_ip, end_ip;
+ /** @} */
+};
+
+/**
+ * Compiler state saved across compiler invocations, for any expensive global
+ * setup.
+ */
+struct v3d_compiler {
+ const struct v3d_device_info *devinfo;
+ struct ra_regs *regs;
+ unsigned int reg_class[3];
+};
+
+struct v3d_compile {
+ const struct v3d_device_info *devinfo;
+ nir_shader *s;
+ nir_function_impl *impl;
+ struct exec_list *cf_node_list;
+ const struct v3d_compiler *compiler;
+
+ /**
+ * Mapping from nir_register * or nir_ssa_def * to array of struct
+ * qreg for the values.
+ */
+ struct hash_table *def_ht;
+
+ /* For each temp, the instruction generating its value. */
+ struct qinst **defs;
+ uint32_t defs_array_size;
+
+ /**
+ * Inputs to the shader, arranged by TGSI declaration order.
+ *
+ * Not all fragment shader QFILE_VARY reads are present in this array.
+ */
+ struct qreg *inputs;
+ struct qreg *outputs;
+ bool msaa_per_sample_output;
+ struct qreg color_reads[V3D_MAX_SAMPLES];
+ struct qreg sample_colors[V3D_MAX_SAMPLES];
+ uint32_t inputs_array_size;
+ uint32_t outputs_array_size;
+ uint32_t uniforms_array_size;
+
+ /* Booleans for whether the corresponding QFILE_VARY[i] is
+ * flat-shaded. This doesn't count gl_FragColor flat-shading, which is
+ * controlled by shader->color_inputs and rasterizer->flatshade in the
+ * gallium driver.
+ */
+ BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
+
+ /* Booleans for whether the corresponding QFILE_VARY[i] uses the
+ * default glShadeModel() behavior.
+ */
+ BITSET_WORD shade_model_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
+
+ struct v3d_ubo_range *ubo_ranges;
+ bool *ubo_range_used;
+ uint32_t ubo_ranges_array_size;
+ /** Number of uniform areas tracked in ubo_ranges. */
+ uint32_t num_ubo_ranges;
+ uint32_t next_ubo_dst_offset;
+
+ /* State for whether we're executing on each channel currently. 0 if
+ * yes, otherwise a block number + 1 that the channel jumped to.
+ */
+ struct qreg execute;
+
+ struct qreg line_x, point_x, point_y;
+
+ /**
+ * Instance ID, which comes in before the vertex attribute payload if
+ * the shader record requests it.
+ */
+ struct qreg iid;
+
+ /**
+ * Vertex ID, which comes in before the vertex attribute payload
+ * (after Instance ID) if the shader record requests it.
+ */
+ struct qreg vid;
+
+ /* Fragment shader payload regs. */
+ struct qreg payload_w, payload_w_centroid, payload_z;
+
+ uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
+ uint32_t num_vpm_writes;
+
+ /**
+ * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
+ *
+ * This includes those that aren't part of the VPM varyings, like
+ * point/line coordinates.
+ */
+ struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
+
+ /**
+ * An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
+ * of the output is. Used to emit from the VS in the order that the
+ * FS needs.
+ */
+ struct v3d_varying_slot *output_slots;
+
+ struct pipe_shader_state *shader_state;
+ struct v3d_key *key;
+ struct v3d_fs_key *fs_key;
+ struct v3d_vs_key *vs_key;
+
+ /* Live ranges of temps. */
+ int *temp_start, *temp_end;
+
+ uint32_t *uniform_data;
+ enum quniform_contents *uniform_contents;
+ uint32_t uniform_array_size;
+ uint32_t num_uniforms;
+ uint32_t num_outputs;
+ uint32_t output_position_index;
+ nir_variable *output_color_var[4];
+ uint32_t output_point_size_index;
+ uint32_t output_sample_mask_index;
+
+ struct qreg undef;
+ uint32_t num_temps;
+
+ struct list_head blocks;
+ int next_block_index;
+ struct qblock *cur_block;
+ struct qblock *loop_cont_block;
+ struct qblock *loop_break_block;
+
+ uint64_t *qpu_insts;
+ uint32_t qpu_inst_count;
+ uint32_t qpu_inst_size;
+
+ /* For the FS, the number of varying inputs not counting the
+ * point/line varyings payload
+ */
+ uint32_t num_inputs;
+
+ /**
+ * Number of inputs from num_inputs remaining to be queued to the read
+ * FIFO in the VS/CS.
+ */
+ uint32_t num_inputs_remaining;
+
+ /* Number of inputs currently in the read FIFO for the VS/CS */
+ uint32_t num_inputs_in_fifo;
+
+ /** Next offset in the VPM to read from in the VS/CS */
+ uint32_t vpm_read_offset;
+
+ uint32_t program_id;
+ uint32_t variant_id;
+
+ /* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH
+ * is used to hide texturing latency at the cost of limiting ourselves
+ * to the bottom half of physical reg space.
+ */
+ bool fs_threaded;
+
+ bool last_thrsw_at_top_level;
+
+ bool failed;
+};
+
+struct v3d_uniform_list {
+ enum quniform_contents *contents;
+ uint32_t *data;
+ uint32_t count;
+};
+
+struct v3d_prog_data {
+ struct v3d_uniform_list uniforms;
+
+ struct v3d_ubo_range *ubo_ranges;
+ uint32_t num_ubo_ranges;
+ uint32_t ubo_size;
+
+ uint8_t num_inputs;
+
+};
+
+struct v3d_vs_prog_data {
+ struct v3d_prog_data base;
+
+ bool uses_iid, uses_vid;
+
+ /* Number of components read from each vertex attribute. */
+ uint8_t vattr_sizes[32];
+
+ /* Total number of components read, for the shader state record. */
+ uint32_t vpm_input_size;
+
+ /* Total number of components written, for the shader state record. */
+ uint32_t vpm_output_size;
+};
+
+struct v3d_fs_prog_data {
+ struct v3d_prog_data base;
+
+ struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
+
+ /* Bitmask for whether the corresponding input is flat-shaded,
+ * independent of rasterizer (gl_FragColor) flat-shading.
+ */
+ BITSET_WORD flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
+ /* Bitmask for whether the corresponding input uses the default
+ * glShadeModel() behavior.
+ */
+ BITSET_WORD shade_model_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
+
+ bool writes_z;
+ bool discard;
+};
+
+/* Special nir_load_input intrinsic index for loading the current TLB
+ * destination color.
+ */
+#define V3D_NIR_TLB_COLOR_READ_INPUT 2000000000
+
+#define V3D_NIR_MS_MASK_OUTPUT 2000000000
+
+extern const nir_shader_compiler_options v3d_nir_options;
+
+const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
+void v3d_compiler_free(const struct v3d_compiler *compiler);
+void v3d_optimize_nir(struct nir_shader *s);
+
+uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
+ struct v3d_vs_key *key,
+ struct v3d_vs_prog_data *prog_data,
+ nir_shader *s,
+ int program_id, int variant_id,
+ uint32_t *final_assembly_size);
+
+uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
+ struct v3d_fs_key *key,
+ struct v3d_fs_prog_data *prog_data,
+ nir_shader *s,
+ int program_id, int variant_id,
+ uint32_t *final_assembly_size);
+
+void v3d_nir_to_vir(struct v3d_compile *c);
+
+void vir_compile_destroy(struct v3d_compile *c);
+const char *vir_get_stage_name(struct v3d_compile *c);
+struct qblock *vir_new_block(struct v3d_compile *c);
+void vir_set_emit_block(struct v3d_compile *c, struct qblock *block);
+void vir_link_blocks(struct qblock *predecessor, struct qblock *successor);
+struct qblock *vir_entry_block(struct v3d_compile *c);
+struct qblock *vir_exit_block(struct v3d_compile *c);
+struct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst,
+ struct qreg src0, struct qreg src1);
+struct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst,
+ struct qreg src0, struct qreg src1);
+struct qinst *vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src0);
+void vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst);
+struct qreg vir_uniform(struct v3d_compile *c,
+ enum quniform_contents contents,
+ uint32_t data);
+void vir_schedule_instructions(struct v3d_compile *c);
+struct v3d_qpu_instr v3d_qpu_nop(void);
+
+struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst);
+struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
+void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
+void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf);
+void vir_set_unpack(struct qinst *inst, int src,
+ enum v3d_qpu_input_unpack unpack);
+
+struct qreg vir_get_temp(struct v3d_compile *c);
+void vir_calculate_live_intervals(struct v3d_compile *c);
+bool vir_has_implicit_uniform(struct qinst *inst);
+int vir_get_implicit_uniform_src(struct qinst *inst);
+int vir_get_non_sideband_nsrc(struct qinst *inst);
+int vir_get_nsrc(struct qinst *inst);
+bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst);
+bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op);
+bool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op);
+bool vir_is_raw_mov(struct qinst *inst);
+bool vir_is_tex(struct qinst *inst);
+bool vir_is_add(struct qinst *inst);
+bool vir_is_mul(struct qinst *inst);
+bool vir_is_float_input(struct qinst *inst);
+bool vir_depends_on_flags(struct qinst *inst);
+bool vir_writes_r3(struct qinst *inst);
+bool vir_writes_r4(struct qinst *inst);
+struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
+uint8_t vir_channels_written(struct qinst *inst);
+
+void vir_dump(struct v3d_compile *c);
+void vir_dump_inst(struct v3d_compile *c, struct qinst *inst);
+
+void vir_validate(struct v3d_compile *c);
+
+void vir_optimize(struct v3d_compile *c);
+bool vir_opt_algebraic(struct v3d_compile *c);
+bool vir_opt_constant_folding(struct v3d_compile *c);
+bool vir_opt_copy_propagate(struct v3d_compile *c);
+bool vir_opt_dead_code(struct v3d_compile *c);
+bool vir_opt_peephole_sf(struct v3d_compile *c);
+bool vir_opt_small_immediates(struct v3d_compile *c);
+bool vir_opt_vpm(struct v3d_compile *c);
+void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
+void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
+void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
+void vir_lower_uniforms(struct v3d_compile *c);
+
+void v3d_vir_to_qpu(struct v3d_compile *c);
+uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
+void qpu_validate(struct v3d_compile *c);
+struct qpu_reg *v3d_register_allocate(struct v3d_compile *c);
+bool vir_init_reg_sets(struct v3d_compiler *compiler);
+
+void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf);
+
+static inline bool
+quniform_contents_is_texture_p0(enum quniform_contents contents)
+{
+ return (contents >= QUNIFORM_TEXTURE_CONFIG_P0_0 &&
+ contents < (QUNIFORM_TEXTURE_CONFIG_P0_0 +
+ V3D_MAX_TEXTURE_SAMPLERS));
+}
+
+static inline struct qreg
+vir_uniform_ui(struct v3d_compile *c, uint32_t ui)
+{
+ return vir_uniform(c, QUNIFORM_CONSTANT, ui);
+}
+
+static inline struct qreg
+vir_uniform_f(struct v3d_compile *c, float f)
+{
+ return vir_uniform(c, QUNIFORM_CONSTANT, fui(f));
+}
+
+#define VIR_ALU0(name, vir_inst, op) \
+static inline struct qreg \
+vir_##name(struct v3d_compile *c) \
+{ \
+ return vir_emit_def(c, vir_inst(op, c->undef, \
+ c->undef, c->undef)); \
+} \
+static inline struct qinst * \
+vir_##name##_dest(struct v3d_compile *c, struct qreg dest) \
+{ \
+ return vir_emit_nondef(c, vir_inst(op, dest, \
+ c->undef, c->undef)); \
+}
+
+#define VIR_ALU1(name, vir_inst, op) \
+static inline struct qreg \
+vir_##name(struct v3d_compile *c, struct qreg a) \
+{ \
+ return vir_emit_def(c, vir_inst(op, c->undef, \
+ a, c->undef)); \
+} \
+static inline struct qinst * \
+vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
+ struct qreg a) \
+{ \
+ return vir_emit_nondef(c, vir_inst(op, dest, a, \
+ c->undef)); \
+}
+
+#define VIR_ALU2(name, vir_inst, op) \
+static inline struct qreg \
+vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
+{ \
+ return vir_emit_def(c, vir_inst(op, c->undef, a, b)); \
+} \
+static inline struct qinst * \
+vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
+ struct qreg a, struct qreg b) \
+{ \
+ return vir_emit_nondef(c, vir_inst(op, dest, a, b)); \
+}
+
+#define VIR_NODST_1(name, vir_inst, op) \
+static inline struct qinst * \
+vir_##name(struct v3d_compile *c, struct qreg a) \
+{ \
+ return vir_emit_nondef(c, vir_inst(op, c->undef, \
+ a, c->undef)); \
+}
+
+#define VIR_NODST_2(name, vir_inst, op) \
+static inline struct qinst * \
+vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
+{ \
+ return vir_emit_nondef(c, vir_inst(op, c->undef, \
+ a, b)); \
+}
+
+#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
+#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name)
+#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name)
+#define VIR_M_ALU1(name) VIR_ALU1(name, vir_mul_inst, V3D_QPU_M_##name)
+#define VIR_A_ALU0(name) VIR_ALU0(name, vir_add_inst, V3D_QPU_A_##name)
+#define VIR_M_ALU0(name) VIR_ALU0(name, vir_mul_inst, V3D_QPU_M_##name)
+#define VIR_A_NODST_2(name) VIR_NODST_2(name, vir_add_inst, V3D_QPU_A_##name)
+#define VIR_M_NODST_2(name) VIR_NODST_2(name, vir_mul_inst, V3D_QPU_M_##name)
+#define VIR_A_NODST_1(name) VIR_NODST_1(name, vir_add_inst, V3D_QPU_A_##name)
+#define VIR_M_NODST_1(name) VIR_NODST_1(name, vir_mul_inst, V3D_QPU_M_##name)
+
+VIR_A_ALU2(FADD)
+VIR_A_ALU2(VFPACK)
+VIR_A_ALU2(FSUB)
+VIR_A_ALU2(FMIN)
+VIR_A_ALU2(FMAX)
+
+VIR_A_ALU2(ADD)
+VIR_A_ALU2(SUB)
+VIR_A_ALU2(SHL)
+VIR_A_ALU2(SHR)
+VIR_A_ALU2(ASR)
+VIR_A_ALU2(ROR)
+VIR_A_ALU2(MIN)
+VIR_A_ALU2(MAX)
+VIR_A_ALU2(UMIN)
+VIR_A_ALU2(UMAX)
+VIR_A_ALU2(AND)
+VIR_A_ALU2(OR)
+VIR_A_ALU2(XOR)
+VIR_A_ALU2(VADD)
+VIR_A_ALU2(VSUB)
+VIR_A_ALU1(NOT)
+VIR_A_ALU1(NEG)
+VIR_A_ALU1(FLAPUSH)
+VIR_A_ALU1(FLBPUSH)
+VIR_A_ALU1(FLBPOP)
+VIR_A_ALU1(SETMSF)
+VIR_A_ALU1(SETREVF)
+VIR_A_ALU1(TIDX)
+VIR_A_ALU1(EIDX)
+
+VIR_A_ALU0(FXCD)
+VIR_A_ALU0(XCD)
+VIR_A_ALU0(FYCD)
+VIR_A_ALU0(YCD)
+VIR_A_ALU0(MSF)
+VIR_A_ALU0(REVF)
+VIR_A_NODST_1(VPMSETUP)
+VIR_A_ALU2(FCMP)
+VIR_A_ALU2(VFMAX)
+
+VIR_A_ALU1(FROUND)
+VIR_A_ALU1(FTOIN)
+VIR_A_ALU1(FTRUNC)
+VIR_A_ALU1(FTOIZ)
+VIR_A_ALU1(FFLOOR)
+VIR_A_ALU1(FTOUZ)
+VIR_A_ALU1(FCEIL)
+VIR_A_ALU1(FTOC)
+
+VIR_A_ALU1(FDX)
+VIR_A_ALU1(FDY)
+
+VIR_A_ALU1(ITOF)
+VIR_A_ALU1(CLZ)
+VIR_A_ALU1(UTOF)
+
+VIR_M_ALU2(UMUL24)
+VIR_M_ALU2(FMUL)
+VIR_M_ALU2(SMUL24)
+VIR_M_NODST_2(MULTOP)
+
+VIR_M_ALU1(MOV)
+VIR_M_ALU1(FMOV)
+
+static inline struct qinst *
+vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
+ struct qreg dest, struct qreg src)
+{
+ struct qinst *mov = vir_MOV_dest(c, dest, src);
+ vir_set_cond(mov, cond);
+ return mov;
+}
+
+static inline struct qreg
+vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond,
+ struct qreg src0, struct qreg src1)
+{
+ struct qreg t = vir_get_temp(c);
+ vir_MOV_dest(c, t, src1);
+ vir_MOV_cond(c, cond, t, src0);
+ return t;
+}
+
+static inline void
+vir_VPM_WRITE(struct v3d_compile *c, struct qreg val)
+{
+ vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
+}
+
+static inline struct qinst *
+vir_NOP(struct v3d_compile *c)
+{
+ return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP,
+ c->undef, c->undef, c->undef));
+}
+/*
+static inline struct qreg
+vir_LOAD_IMM(struct v3d_compile *c, uint32_t val)
+{
+ return vir_emit_def(c, vir_inst(QOP_LOAD_IMM, c->undef,
+ vir_reg(QFILE_LOAD_IMM, val), c->undef));
+}
+
+static inline struct qreg
+vir_LOAD_IMM_U2(struct v3d_compile *c, uint32_t val)
+{
+ return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_U2, c->undef,
+ vir_reg(QFILE_LOAD_IMM, val),
+ c->undef));
+}
+static inline struct qreg
+vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val)
+{
+ return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_I2, c->undef,
+ vir_reg(QFILE_LOAD_IMM, val),
+ c->undef));
+}
+*/
+
+static inline struct qinst *
+vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond)
+{
+ /* The actual uniform_data value will be set at scheduling time */
+ return vir_emit_nondef(c, vir_branch_inst(cond, vir_uniform_ui(c, 0)));
+}
+
+#define vir_for_each_block(block, c) \
+ list_for_each_entry(struct qblock, block, &c->blocks, link)
+
+#define vir_for_each_block_rev(block, c) \
+ list_for_each_entry_rev(struct qblock, block, &c->blocks, link)
+
+/* Loop over the non-NULL members of the successors array. */
+#define vir_for_each_successor(succ, block) \
+ for (struct qblock *succ = block->successors[0]; \
+ succ != NULL; \
+ succ = (succ == block->successors[1] ? NULL : \
+ block->successors[1]))
+
+#define vir_for_each_inst(inst, block) \
+ list_for_each_entry(struct qinst, inst, &block->instructions, link)
+
+#define vir_for_each_inst_rev(inst, block) \
+ list_for_each_entry_rev(struct qinst, inst, &block->instructions, link)
+
+#define vir_for_each_inst_safe(inst, block) \
+ list_for_each_entry_safe(struct qinst, inst, &block->instructions, link)
+
+#define vir_for_each_inst_inorder(inst, c) \
+ vir_for_each_block(_block, c) \
+ vir_for_each_inst(inst, _block)
+
+#endif /* V3D_COMPILER_H */
diff -Nru mesa-17.2.4/src/broadcom/compiler/v3d_nir_lower_io.c mesa-17.3.3/src/broadcom/compiler/v3d_nir_lower_io.c
--- mesa-17.2.4/src/broadcom/compiler/v3d_nir_lower_io.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/v3d_nir_lower_io.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,176 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/v3d_compiler.h"
+#include "compiler/nir/nir_builder.h"
+
+/**
+ * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
+ * intrinsics into something amenable to the V3D architecture.
+ *
+ * Currently, it splits VS inputs and uniforms into scalars, drops any
+ * non-position outputs in coordinate shaders, and fixes up the addressing on
+ * indirect uniform loads. FS input and VS output scalarization is handled by
+ * nir_lower_io_to_scalar().
+ */
+
+static void
+replace_intrinsic_with_vec(nir_builder *b, nir_intrinsic_instr *intr,
+ nir_ssa_def **comps)
+{
+
+ /* Batch things back together into a vector. This will get split by
+ * the later ALU scalarization pass.
+ */
+ nir_ssa_def *vec = nir_vec(b, comps, intr->num_components);
+
+ /* Replace the old intrinsic with a reference to our reconstructed
+ * vector.
+ */
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec));
+ nir_instr_remove(&intr->instr);
+}
+
+static void
+v3d_nir_lower_output(struct v3d_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ nir_variable *output_var = NULL;
+ nir_foreach_variable(var, &c->s->outputs) {
+ if (var->data.driver_location == nir_intrinsic_base(intr)) {
+ output_var = var;
+ break;
+ }
+ }
+ assert(output_var);
+
+ if (c->vs_key) {
+ int slot = output_var->data.location;
+ bool used = false;
+
+ switch (slot) {
+ case VARYING_SLOT_PSIZ:
+ case VARYING_SLOT_POS:
+ used = true;
+ break;
+
+ default:
+ for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
+ if (v3d_slot_get_slot(c->vs_key->fs_inputs[i]) == slot) {
+ used = true;
+ break;
+ }
+ }
+ break;
+ }
+
+ if (!used)
+ nir_instr_remove(&intr->instr);
+ }
+}
+
+static void
+v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+
+ /* Generate scalar loads equivalent to the original vector. */
+ nir_ssa_def *dests[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, intr->intrinsic);
+ intr_comp->num_components = 1;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
+
+ /* Convert the uniform offset to bytes. If it happens
+ * to be a constant, constant-folding will clean up
+ * the shift for us.
+ */
+ nir_intrinsic_set_base(intr_comp,
+ nir_intrinsic_base(intr) * 16 +
+ i * 4);
+
+ intr_comp->src[0] =
+ nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
+ nir_imm_int(b, 4)));
+
+ dests[i] = &intr_comp->dest.ssa;
+
+ nir_builder_instr_insert(b, &intr_comp->instr);
+ }
+
+ replace_intrinsic_with_vec(b, intr, dests);
+}
+
+static void
+v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
+ struct nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
+ break;
+
+ case nir_intrinsic_store_output:
+ v3d_nir_lower_output(c, b, intr);
+ break;
+
+ case nir_intrinsic_load_uniform:
+ v3d_nir_lower_uniform(c, b, intr);
+ break;
+
+ case nir_intrinsic_load_user_clip_plane:
+ default:
+ break;
+ }
+}
+
+static bool
+v3d_nir_lower_io_impl(struct v3d_compile *c, nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block)
+ v3d_nir_lower_io_instr(c, &b, instr);
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return true;
+}
+
+void
+v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
+{
+ nir_foreach_function(function, s) {
+ if (function->impl)
+ v3d_nir_lower_io_impl(c, function->impl);
+ }
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/vir.c mesa-17.3.3/src/broadcom/compiler/vir.c
--- mesa-17.2.4/src/broadcom/compiler/vir.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/vir.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,898 @@
+/*
+ * Copyright © 2016-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3d_compiler.h"
+
+int
+vir_get_non_sideband_nsrc(struct qinst *inst)
+{
+ switch (inst->qpu.type) {
+ case V3D_QPU_INSTR_TYPE_BRANCH:
+ return 0;
+ case V3D_QPU_INSTR_TYPE_ALU:
+ if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
+ return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
+ else
+ return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
+ }
+
+ return 0;
+}
+
+int
+vir_get_nsrc(struct qinst *inst)
+{
+ int nsrc = vir_get_non_sideband_nsrc(inst);
+
+ if (vir_has_implicit_uniform(inst))
+ nsrc++;
+
+ return nsrc;
+}
+
+bool
+vir_has_implicit_uniform(struct qinst *inst)
+{
+ switch (inst->qpu.type) {
+ case V3D_QPU_INSTR_TYPE_BRANCH:
+ return true;
+ case V3D_QPU_INSTR_TYPE_ALU:
+ switch (inst->dst.file) {
+ case QFILE_TLBU:
+ return true;
+ default:
+ return inst->has_implicit_uniform;
+ }
+ }
+ return false;
+}
+
+/* The sideband uniform for textures gets stored after the normal ALU
+ * arguments.
+ */
+int
+vir_get_implicit_uniform_src(struct qinst *inst)
+{
+ return vir_get_nsrc(inst) - 1;
+}
+
+/**
+ * Returns whether the instruction has any side effects that must be
+ * preserved.
+ */
+bool
+vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
+{
+ switch (inst->qpu.type) {
+ case V3D_QPU_INSTR_TYPE_BRANCH:
+ return true;
+ case V3D_QPU_INSTR_TYPE_ALU:
+ switch (inst->qpu.alu.add.op) {
+ case V3D_QPU_A_SETREVF:
+ case V3D_QPU_A_SETMSF:
+ case V3D_QPU_A_VPMSETUP:
+ return true;
+ default:
+ break;
+ }
+
+ switch (inst->qpu.alu.mul.op) {
+ case V3D_QPU_M_MULTOP:
+ return true;
+ default:
+ break;
+ }
+ }
+
+ if (inst->qpu.sig.ldtmu)
+ return true;
+
+ return false;
+}
+
+bool
+vir_is_float_input(struct qinst *inst)
+{
+ /* XXX: More instrs */
+ switch (inst->qpu.type) {
+ case V3D_QPU_INSTR_TYPE_BRANCH:
+ return false;
+ case V3D_QPU_INSTR_TYPE_ALU:
+ switch (inst->qpu.alu.add.op) {
+ case V3D_QPU_A_FADD:
+ case V3D_QPU_A_FSUB:
+ case V3D_QPU_A_FMIN:
+ case V3D_QPU_A_FMAX:
+ case V3D_QPU_A_FTOIN:
+ return true;
+ default:
+ break;
+ }
+
+ switch (inst->qpu.alu.mul.op) {
+ case V3D_QPU_M_FMOV:
+ case V3D_QPU_M_VFMUL:
+ case V3D_QPU_M_FMUL:
+ return true;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
+bool
+vir_is_raw_mov(struct qinst *inst)
+{
+ if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
+ (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
+ inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
+ return false;
+ }
+
+ if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
+ inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
+ return false;
+ }
+
+ if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
+ inst->qpu.flags.mc != V3D_QPU_COND_NONE)
+ return false;
+
+ return true;
+}
+
+bool
+vir_is_add(struct qinst *inst)
+{
+ return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
+ inst->qpu.alu.add.op != V3D_QPU_A_NOP);
+}
+
+bool
+vir_is_mul(struct qinst *inst)
+{
+ return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
+ inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
+}
+
+bool
+vir_is_tex(struct qinst *inst)
+{
+ if (inst->dst.file == QFILE_MAGIC)
+ return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
+
+ return false;
+}
+
+bool
+vir_depends_on_flags(struct qinst *inst)
+{
+ if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
+ return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
+ } else {
+ return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
+ inst->qpu.flags.mc != V3D_QPU_COND_NONE);
+ }
+}
+
+bool
+vir_writes_r3(struct qinst *inst)
+{
+ for (int i = 0; i < vir_get_nsrc(inst); i++) {
+ switch (inst->src[i].file) {
+ case QFILE_VARY:
+ case QFILE_VPM:
+ return true;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
+bool
+vir_writes_r4(struct qinst *inst)
+{
+ switch (inst->dst.file) {
+ case QFILE_MAGIC:
+ switch (inst->dst.index) {
+ case V3D_QPU_WADDR_RECIP:
+ case V3D_QPU_WADDR_RSQRT:
+ case V3D_QPU_WADDR_EXP:
+ case V3D_QPU_WADDR_LOG:
+ case V3D_QPU_WADDR_SIN:
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (inst->qpu.sig.ldtmu)
+ return true;
+
+ return false;
+}
+
+void
+vir_set_unpack(struct qinst *inst, int src,
+ enum v3d_qpu_input_unpack unpack)
+{
+ assert(src == 0 || src == 1);
+
+ if (vir_is_add(inst)) {
+ if (src == 0)
+ inst->qpu.alu.add.a_unpack = unpack;
+ else
+ inst->qpu.alu.add.b_unpack = unpack;
+ } else {
+ assert(vir_is_mul(inst));
+ if (src == 0)
+ inst->qpu.alu.mul.a_unpack = unpack;
+ else
+ inst->qpu.alu.mul.b_unpack = unpack;
+ }
+}
+
+void
+vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
+{
+ if (vir_is_add(inst)) {
+ inst->qpu.flags.ac = cond;
+ } else {
+ assert(vir_is_mul(inst));
+ inst->qpu.flags.mc = cond;
+ }
+}
+
+void
+vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
+{
+ if (vir_is_add(inst)) {
+ inst->qpu.flags.apf = pf;
+ } else {
+ assert(vir_is_mul(inst));
+ inst->qpu.flags.mpf = pf;
+ }
+}
+
+#if 0
+uint8_t
+vir_channels_written(struct qinst *inst)
+{
+ if (vir_is_mul(inst)) {
+ switch (inst->dst.pack) {
+ case QPU_PACK_MUL_NOP:
+ case QPU_PACK_MUL_8888:
+ return 0xf;
+ case QPU_PACK_MUL_8A:
+ return 0x1;
+ case QPU_PACK_MUL_8B:
+ return 0x2;
+ case QPU_PACK_MUL_8C:
+ return 0x4;
+ case QPU_PACK_MUL_8D:
+ return 0x8;
+ }
+ } else {
+ switch (inst->dst.pack) {
+ case QPU_PACK_A_NOP:
+ case QPU_PACK_A_8888:
+ case QPU_PACK_A_8888_SAT:
+ case QPU_PACK_A_32_SAT:
+ return 0xf;
+ case QPU_PACK_A_8A:
+ case QPU_PACK_A_8A_SAT:
+ return 0x1;
+ case QPU_PACK_A_8B:
+ case QPU_PACK_A_8B_SAT:
+ return 0x2;
+ case QPU_PACK_A_8C:
+ case QPU_PACK_A_8C_SAT:
+ return 0x4;
+ case QPU_PACK_A_8D:
+ case QPU_PACK_A_8D_SAT:
+ return 0x8;
+ case QPU_PACK_A_16A:
+ case QPU_PACK_A_16A_SAT:
+ return 0x3;
+ case QPU_PACK_A_16B:
+ case QPU_PACK_A_16B_SAT:
+ return 0xc;
+ }
+ }
+ unreachable("Bad pack field");
+}
+#endif
+
+struct qreg
+vir_get_temp(struct v3d_compile *c)
+{
+ struct qreg reg;
+
+ reg.file = QFILE_TEMP;
+ reg.index = c->num_temps++;
+
+ if (c->num_temps > c->defs_array_size) {
+ uint32_t old_size = c->defs_array_size;
+ c->defs_array_size = MAX2(old_size * 2, 16);
+ c->defs = reralloc(c, c->defs, struct qinst *,
+ c->defs_array_size);
+ memset(&c->defs[old_size], 0,
+ sizeof(c->defs[0]) * (c->defs_array_size - old_size));
+ }
+
+ return reg;
+}
+
+struct qinst *
+vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
+{
+ struct qinst *inst = calloc(1, sizeof(*inst));
+
+ inst->qpu = v3d_qpu_nop();
+ inst->qpu.alu.add.op = op;
+
+ inst->dst = dst;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->uniform = ~0;
+
+ return inst;
+}
+
+struct qinst *
+vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
+{
+ struct qinst *inst = calloc(1, sizeof(*inst));
+
+ inst->qpu = v3d_qpu_nop();
+ inst->qpu.alu.mul.op = op;
+
+ inst->dst = dst;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->uniform = ~0;
+
+ return inst;
+}
+
+struct qinst *
+vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
+{
+ struct qinst *inst = calloc(1, sizeof(*inst));
+
+ inst->qpu = v3d_qpu_nop();
+ inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
+ inst->qpu.branch.cond = cond;
+ inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
+ inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
+ inst->qpu.branch.ub = true;
+ inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
+
+ inst->dst = vir_reg(QFILE_NULL, 0);
+ inst->src[0] = src;
+ inst->uniform = ~0;
+
+ return inst;
+}
+
+static void
+vir_emit(struct v3d_compile *c, struct qinst *inst)
+{
+ list_addtail(&inst->link, &c->cur_block->instructions);
+
+ if (inst->dst.file == QFILE_MAGIC &&
+ inst->dst.index == V3D_QPU_WADDR_VPM)
+ c->num_vpm_writes++;
+}
+
+/* Updates inst to write to a new temporary, emits it, and notes the def. */
+struct qreg
+vir_emit_def(struct v3d_compile *c, struct qinst *inst)
+{
+ assert(inst->dst.file == QFILE_NULL);
+
+ inst->dst = vir_get_temp(c);
+
+ if (inst->dst.file == QFILE_TEMP)
+ c->defs[inst->dst.index] = inst;
+
+ vir_emit(c, inst);
+
+ return inst->dst;
+}
+
+struct qinst *
+vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
+{
+ if (inst->dst.file == QFILE_TEMP)
+ c->defs[inst->dst.index] = NULL;
+
+ vir_emit(c, inst);
+
+ return inst;
+}
+
+struct qblock *
+vir_new_block(struct v3d_compile *c)
+{
+ struct qblock *block = rzalloc(c, struct qblock);
+
+ list_inithead(&block->instructions);
+
+ block->predecessors = _mesa_set_create(block,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ block->index = c->next_block_index++;
+
+ return block;
+}
+
+void
+vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
+{
+ c->cur_block = block;
+ list_addtail(&block->link, &c->blocks);
+}
+
+struct qblock *
+vir_entry_block(struct v3d_compile *c)
+{
+ return list_first_entry(&c->blocks, struct qblock, link);
+}
+
+struct qblock *
+vir_exit_block(struct v3d_compile *c)
+{
+ return list_last_entry(&c->blocks, struct qblock, link);
+}
+
+void
+vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
+{
+ _mesa_set_add(successor->predecessors, predecessor);
+ if (predecessor->successors[0]) {
+ assert(!predecessor->successors[1]);
+ predecessor->successors[1] = successor;
+ } else {
+ predecessor->successors[0] = successor;
+ }
+}
+
+const struct v3d_compiler *
+v3d_compiler_init(const struct v3d_device_info *devinfo)
+{
+ struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
+ if (!compiler)
+ return NULL;
+
+ compiler->devinfo = devinfo;
+
+ if (!vir_init_reg_sets(compiler)) {
+ ralloc_free(compiler);
+ return NULL;
+ }
+
+ return compiler;
+}
+
+void
+v3d_compiler_free(const struct v3d_compiler *compiler)
+{
+ ralloc_free((void *)compiler);
+}
+
+static struct v3d_compile *
+vir_compile_init(const struct v3d_compiler *compiler,
+ struct v3d_key *key,
+ nir_shader *s,
+ int program_id, int variant_id)
+{
+ struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
+
+ c->compiler = compiler;
+ c->devinfo = compiler->devinfo;
+ c->key = key;
+ c->program_id = program_id;
+ c->variant_id = variant_id;
+
+ s = nir_shader_clone(c, s);
+ c->s = s;
+
+ list_inithead(&c->blocks);
+ vir_set_emit_block(c, vir_new_block(c));
+
+ c->output_position_index = -1;
+ c->output_point_size_index = -1;
+ c->output_sample_mask_index = -1;
+
+ c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ return c;
+}
+
+static void
+v3d_lower_nir(struct v3d_compile *c)
+{
+ struct nir_lower_tex_options tex_options = {
+ .lower_rect = false, /* XXX */
+ .lower_txp = ~0,
+ /* Apply swizzles to all samplers. */
+ .swizzle_result = ~0,
+ };
+
+ /* Lower the format swizzle and (for 32-bit returns)
+ * ARB_texture_swizzle-style swizzle.
+ */
+ for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
+ for (int j = 0; j < 4; j++)
+ tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
+ }
+
+ NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
+}
+
+static void
+v3d_lower_nir_late(struct v3d_compile *c)
+{
+ NIR_PASS_V(c->s, v3d_nir_lower_io, c);
+ NIR_PASS_V(c->s, nir_lower_idiv);
+}
+
+static void
+v3d_set_prog_data_uniforms(struct v3d_compile *c,
+ struct v3d_prog_data *prog_data)
+{
+ int count = c->num_uniforms;
+ struct v3d_uniform_list *ulist = &prog_data->uniforms;
+
+ ulist->count = count;
+ ulist->data = ralloc_array(prog_data, uint32_t, count);
+ memcpy(ulist->data, c->uniform_data,
+ count * sizeof(*ulist->data));
+ ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
+ memcpy(ulist->contents, c->uniform_contents,
+ count * sizeof(*ulist->contents));
+}
+
+/* Copy the compiler UBO range state to the compiled shader, dropping out
+ * arrays that were never referenced by an indirect load.
+ *
+ * (Note that QIR dead code elimination of an array access still leaves that
+ * array alive, though)
+ */
+static void
+v3d_set_prog_data_ubo(struct v3d_compile *c,
+ struct v3d_prog_data *prog_data)
+{
+ if (!c->num_ubo_ranges)
+ return;
+
+ prog_data->num_ubo_ranges = 0;
+ prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
+ c->num_ubo_ranges);
+ for (int i = 0; i < c->num_ubo_ranges; i++) {
+ if (!c->ubo_range_used[i])
+ continue;
+
+ struct v3d_ubo_range *range = &c->ubo_ranges[i];
+ prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
+ prog_data->ubo_size += range->size;
+ }
+
+ if (prog_data->ubo_size) {
+ if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
+ vir_get_stage_name(c),
+ c->program_id, c->variant_id,
+ prog_data->ubo_size / 4);
+ }
+ }
+}
+
+static void
+v3d_set_prog_data(struct v3d_compile *c,
+ struct v3d_prog_data *prog_data)
+{
+ v3d_set_prog_data_uniforms(c, prog_data);
+ v3d_set_prog_data_ubo(c, prog_data);
+}
+
+static uint64_t *
+v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
+{
+ *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
+
+ uint64_t *qpu_insts = malloc(*final_assembly_size);
+ if (!qpu_insts)
+ return NULL;
+
+ memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
+
+ vir_compile_destroy(c);
+
+ return qpu_insts;
+}
+
+uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
+ struct v3d_vs_key *key,
+ struct v3d_vs_prog_data *prog_data,
+ nir_shader *s,
+ int program_id, int variant_id,
+ uint32_t *final_assembly_size)
+{
+ struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
+ program_id, variant_id);
+
+ c->vs_key = key;
+
+ v3d_lower_nir(c);
+
+ if (key->clamp_color)
+ NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
+
+ if (key->base.ucp_enables) {
+ NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
+ NIR_PASS_V(c->s, nir_lower_io_to_scalar,
+ nir_var_shader_out);
+ }
+
+ /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
+ NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
+
+ v3d_lower_nir_late(c);
+ v3d_optimize_nir(c->s);
+ NIR_PASS_V(c->s, nir_convert_from_ssa, true);
+
+ v3d_nir_to_vir(c);
+
+ v3d_set_prog_data(c, &prog_data->base);
+
+ prog_data->base.num_inputs = c->num_inputs;
+
+ /* The vertex data gets format converted by the VPM so that
+ * each attribute channel takes up a VPM column. Precompute
+ * the sizes for the shader record.
+ */
+ for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
+ prog_data->vattr_sizes[i] = c->vattr_sizes[i];
+ prog_data->vpm_input_size += c->vattr_sizes[i];
+ }
+
+ /* Input/output segment size are in 8x32-bit multiples. */
+ prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
+ prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
+
+ prog_data->uses_vid = (s->info.system_values_read &
+ (1ull << SYSTEM_VALUE_VERTEX_ID));
+ prog_data->uses_iid = (s->info.system_values_read &
+ (1ull << SYSTEM_VALUE_INSTANCE_ID));
+
+ return v3d_return_qpu_insts(c, final_assembly_size);
+}
+
+static void
+v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
+ struct v3d_fs_prog_data *prog_data)
+{
+ prog_data->base.num_inputs = c->num_inputs;
+ memcpy(prog_data->input_slots, c->input_slots,
+ c->num_inputs * sizeof(*c->input_slots));
+
+ memcpy(prog_data->flat_shade_flags, c->flat_shade_flags,
+ sizeof(c->flat_shade_flags));
+ memcpy(prog_data->shade_model_flags, c->shade_model_flags,
+ sizeof(c->shade_model_flags));
+}
+
+uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
+ struct v3d_fs_key *key,
+ struct v3d_fs_prog_data *prog_data,
+ nir_shader *s,
+ int program_id, int variant_id,
+ uint32_t *final_assembly_size)
+{
+ struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
+ program_id, variant_id);
+
+ c->fs_key = key;
+
+ v3d_lower_nir(c);
+
+ if (key->light_twoside)
+ NIR_PASS_V(c->s, nir_lower_two_sided_color);
+
+ if (key->clamp_color)
+ NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
+
+ if (key->alpha_test) {
+ NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
+ false);
+ }
+
+ if (key->base.ucp_enables)
+ NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
+
+ /* Note: FS input scalarizing must happen after
+ * nir_lower_two_sided_color, which only handles a vec4 at a time.
+ */
+ NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
+
+ v3d_lower_nir_late(c);
+ v3d_optimize_nir(c->s);
+ NIR_PASS_V(c->s, nir_convert_from_ssa, true);
+
+ v3d_nir_to_vir(c);
+
+ v3d_set_prog_data(c, &prog_data->base);
+ v3d_set_fs_prog_data_inputs(c, prog_data);
+ prog_data->writes_z = (c->s->info.outputs_written &
+ (1 << FRAG_RESULT_DEPTH));
+ prog_data->discard = c->s->info.fs.uses_discard;
+
+ return v3d_return_qpu_insts(c, final_assembly_size);
+}
+
+void
+vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
+{
+ if (qinst->dst.file == QFILE_TEMP)
+ c->defs[qinst->dst.index] = NULL;
+
+ list_del(&qinst->link);
+ free(qinst);
+}
+
+struct qreg
+vir_follow_movs(struct v3d_compile *c, struct qreg reg)
+{
+ /* XXX
+ int pack = reg.pack;
+
+ while (reg.file == QFILE_TEMP &&
+ c->defs[reg.index] &&
+ (c->defs[reg.index]->op == QOP_MOV ||
+ c->defs[reg.index]->op == QOP_FMOV) &&
+ !c->defs[reg.index]->dst.pack &&
+ !c->defs[reg.index]->src[0].pack) {
+ reg = c->defs[reg.index]->src[0];
+ }
+
+ reg.pack = pack;
+ */
+ return reg;
+}
+
+void
+vir_compile_destroy(struct v3d_compile *c)
+{
+ vir_for_each_block(block, c) {
+ while (!list_empty(&block->instructions)) {
+ struct qinst *qinst =
+ list_first_entry(&block->instructions,
+ struct qinst, link);
+ vir_remove_instruction(c, qinst);
+ }
+ }
+
+ ralloc_free(c);
+}
+
+struct qreg
+vir_uniform(struct v3d_compile *c,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ for (int i = 0; i < c->num_uniforms; i++) {
+ if (c->uniform_contents[i] == contents &&
+ c->uniform_data[i] == data) {
+ return vir_reg(QFILE_UNIF, i);
+ }
+ }
+
+ uint32_t uniform = c->num_uniforms++;
+
+ if (uniform >= c->uniform_array_size) {
+ c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
+ c->uniform_array_size * 2);
+
+ c->uniform_data = reralloc(c, c->uniform_data,
+ uint32_t,
+ c->uniform_array_size);
+ c->uniform_contents = reralloc(c, c->uniform_contents,
+ enum quniform_contents,
+ c->uniform_array_size);
+ }
+
+ c->uniform_contents[uniform] = contents;
+ c->uniform_data[uniform] = data;
+
+ return vir_reg(QFILE_UNIF, uniform);
+}
+
+void
+vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
+{
+ struct qinst *last_inst = NULL;
+
+ if (!list_empty(&c->cur_block->instructions))
+ last_inst = (struct qinst *)c->cur_block->instructions.prev;
+
+ if (src.file != QFILE_TEMP ||
+ !c->defs[src.index] ||
+ last_inst != c->defs[src.index]) {
+ /* XXX: Make the MOV be the appropriate type */
+ last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
+ last_inst = (struct qinst *)c->cur_block->instructions.prev;
+ }
+
+ vir_set_pf(last_inst, pf);
+}
+
+#define OPTPASS(func) \
+ do { \
+ bool stage_progress = func(c); \
+ if (stage_progress) { \
+ progress = true; \
+ if (print_opt_debug) { \
+ fprintf(stderr, \
+ "VIR opt pass %2d: %s progress\n", \
+ pass, #func); \
+ } \
+ /*XXX vir_validate(c);*/ \
+ } \
+ } while (0)
+
+void
+vir_optimize(struct v3d_compile *c)
+{
+ bool print_opt_debug = false;
+ int pass = 1;
+
+ while (true) {
+ bool progress = false;
+
+ OPTPASS(vir_opt_copy_propagate);
+ OPTPASS(vir_opt_dead_code);
+
+ if (!progress)
+ break;
+
+ pass++;
+ }
+}
+
+const char *
+vir_get_stage_name(struct v3d_compile *c)
+{
+ if (c->vs_key && c->vs_key->is_coord)
+ return "MESA_SHADER_COORD";
+ else
+ return gl_shader_stage_name(c->s->info.stage);
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/vir_dump.c mesa-17.3.3/src/broadcom/compiler/vir_dump.c
--- mesa-17.2.4/src/broadcom/compiler/vir_dump.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/vir_dump.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,339 @@
+/*
+ * Copyright © 2016-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3d_compiler.h"
+
+static void
+vir_print_reg(struct v3d_compile *c, struct qreg reg)
+{
+ static const char *files[] = {
+ [QFILE_TEMP] = "t",
+ [QFILE_VARY] = "v",
+ [QFILE_UNIF] = "u",
+ [QFILE_TLB] = "tlb",
+ [QFILE_TLBU] = "tlbu",
+ };
+ static const char *quniform_names[] = {
+ [QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale",
+ [QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale",
+ [QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset",
+ [QUNIFORM_VIEWPORT_Z_SCALE] = "vp_z_scale",
+ };
+
+ switch (reg.file) {
+
+ case QFILE_NULL:
+ fprintf(stderr, "null");
+ break;
+
+ case QFILE_LOAD_IMM:
+ fprintf(stderr, "0x%08x (%f)", reg.index, uif(reg.index));
+ break;
+
+ case QFILE_REG:
+ fprintf(stderr, "rf%d", reg.index);
+ break;
+
+ case QFILE_MAGIC:
+ fprintf(stderr, "%s", v3d_qpu_magic_waddr_name(reg.index));
+ break;
+
+ case QFILE_SMALL_IMM:
+ if ((int)reg.index >= -16 && (int)reg.index <= 15)
+ fprintf(stderr, "%d", reg.index);
+ else
+ fprintf(stderr, "%f", uif(reg.index));
+ break;
+
+ case QFILE_VPM:
+ fprintf(stderr, "vpm%d.%d",
+ reg.index / 4, reg.index % 4);
+ break;
+
+ case QFILE_TLB:
+ fprintf(stderr, "%s", files[reg.file]);
+ break;
+
+ case QFILE_UNIF: {
+ enum quniform_contents contents = c->uniform_contents[reg.index];
+
+ fprintf(stderr, "%s%d", files[reg.file], reg.index);
+
+ switch (contents) {
+ case QUNIFORM_CONSTANT:
+ fprintf(stderr, " (0x%08x / %f)",
+ c->uniform_data[reg.index],
+ uif(c->uniform_data[reg.index]));
+ break;
+
+ case QUNIFORM_UNIFORM:
+ fprintf(stderr, " (push[%d])",
+ c->uniform_data[reg.index]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ fprintf(stderr, " (tex[%d].p1)",
+ c->uniform_data[reg.index]);
+ break;
+
+ case QUNIFORM_TEXTURE_WIDTH:
+ fprintf(stderr, " (tex[%d].width)",
+ c->uniform_data[reg.index]);
+ break;
+ case QUNIFORM_TEXTURE_HEIGHT:
+ fprintf(stderr, " (tex[%d].height)",
+ c->uniform_data[reg.index]);
+ break;
+ case QUNIFORM_TEXTURE_DEPTH:
+ fprintf(stderr, " (tex[%d].depth)",
+ c->uniform_data[reg.index]);
+ break;
+ case QUNIFORM_TEXTURE_ARRAY_SIZE:
+ fprintf(stderr, " (tex[%d].array_size)",
+ c->uniform_data[reg.index]);
+ break;
+ case QUNIFORM_TEXTURE_LEVELS:
+ fprintf(stderr, " (tex[%d].levels)",
+ c->uniform_data[reg.index]);
+ break;
+
+ case QUNIFORM_UBO_ADDR:
+ fprintf(stderr, " (ubo[%d])",
+ c->uniform_data[reg.index]);
+ break;
+
+ default:
+ if (quniform_contents_is_texture_p0(contents)) {
+ fprintf(stderr, " (tex[%d].p0: 0x%08x)",
+ contents - QUNIFORM_TEXTURE_CONFIG_P0_0,
+ c->uniform_data[reg.index]);
+ } else if (contents < ARRAY_SIZE(quniform_names)) {
+ fprintf(stderr, " (%s)",
+ quniform_names[contents]);
+ } else {
+ fprintf(stderr, " (%d / 0x%08x)", contents,
+ c->uniform_data[reg.index]);
+ }
+ }
+
+ break;
+ }
+
+ default:
+ fprintf(stderr, "%s%d", files[reg.file], reg.index);
+ break;
+ }
+}
+
+static void
+vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
+{
+ struct v3d_qpu_sig *sig = &inst->qpu.sig;
+
+ if (sig->thrsw)
+ fprintf(stderr, "; thrsw");
+ if (sig->ldvary)
+ fprintf(stderr, "; ldvary");
+ if (sig->ldvpm)
+ fprintf(stderr, "; ldvpm");
+ if (sig->ldtmu)
+ fprintf(stderr, "; ldtmu");
+ if (sig->ldunif)
+ fprintf(stderr, "; ldunif");
+ if (sig->wrtmuc)
+ fprintf(stderr, "; wrtmuc");
+}
+
+static void
+vir_dump_alu(struct v3d_compile *c, struct qinst *inst)
+{
+ struct v3d_qpu_instr *instr = &inst->qpu;
+ int nsrc = vir_get_non_sideband_nsrc(inst);
+ int sideband_nsrc = vir_get_nsrc(inst);
+ enum v3d_qpu_input_unpack unpack[2];
+
+ if (inst->qpu.alu.add.op != V3D_QPU_A_NOP) {
+ fprintf(stderr, "%s", v3d_qpu_add_op_name(instr->alu.add.op));
+ fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.ac));
+ fprintf(stderr, "%s", v3d_qpu_pf_name(instr->flags.apf));
+ fprintf(stderr, "%s", v3d_qpu_uf_name(instr->flags.auf));
+ fprintf(stderr, " ");
+
+ vir_print_reg(c, inst->dst);
+ fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.add.output_pack));
+
+ unpack[0] = instr->alu.add.a_unpack;
+ unpack[1] = instr->alu.add.b_unpack;
+ } else {
+ fprintf(stderr, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
+ fprintf(stderr, "%s", v3d_qpu_cond_name(instr->flags.mc));
+ fprintf(stderr, "%s", v3d_qpu_pf_name(instr->flags.mpf));
+ fprintf(stderr, "%s", v3d_qpu_uf_name(instr->flags.muf));
+ fprintf(stderr, " ");
+
+ vir_print_reg(c, inst->dst);
+ fprintf(stderr, "%s", v3d_qpu_pack_name(instr->alu.mul.output_pack));
+
+ unpack[0] = instr->alu.mul.a_unpack;
+ unpack[1] = instr->alu.mul.b_unpack;
+ }
+
+ for (int i = 0; i < sideband_nsrc; i++) {
+ fprintf(stderr, ", ");
+ vir_print_reg(c, inst->src[i]);
+ if (i < nsrc)
+ fprintf(stderr, "%s", v3d_qpu_unpack_name(unpack[i]));
+ }
+
+ vir_dump_sig(c, inst);
+}
+
+void
+vir_dump_inst(struct v3d_compile *c, struct qinst *inst)
+{
+ struct v3d_qpu_instr *instr = &inst->qpu;
+
+ switch (inst->qpu.type) {
+ case V3D_QPU_INSTR_TYPE_ALU:
+ vir_dump_alu(c, inst);
+ break;
+ case V3D_QPU_INSTR_TYPE_BRANCH:
+ fprintf(stderr, "b");
+ if (instr->branch.ub)
+ fprintf(stderr, "u");
+
+ fprintf(stderr, "%s",
+ v3d_qpu_branch_cond_name(instr->branch.cond));
+ fprintf(stderr, "%s", v3d_qpu_msfign_name(instr->branch.msfign));
+
+ switch (instr->branch.bdi) {
+ case V3D_QPU_BRANCH_DEST_ABS:
+ fprintf(stderr, " zero_addr+0x%08x", instr->branch.offset);
+ break;
+
+ case V3D_QPU_BRANCH_DEST_REL:
+ fprintf(stderr, " %d", instr->branch.offset);
+ break;
+
+ case V3D_QPU_BRANCH_DEST_LINK_REG:
+ fprintf(stderr, " lri");
+ break;
+
+ case V3D_QPU_BRANCH_DEST_REGFILE:
+ fprintf(stderr, " rf%d", instr->branch.raddr_a);
+ break;
+ }
+
+ if (instr->branch.ub) {
+ switch (instr->branch.bdu) {
+ case V3D_QPU_BRANCH_DEST_ABS:
+ fprintf(stderr, ", a:unif");
+ break;
+
+ case V3D_QPU_BRANCH_DEST_REL:
+ fprintf(stderr, ", r:unif");
+ break;
+
+ case V3D_QPU_BRANCH_DEST_LINK_REG:
+ fprintf(stderr, ", lri");
+ break;
+
+ case V3D_QPU_BRANCH_DEST_REGFILE:
+ fprintf(stderr, ", rf%d", instr->branch.raddr_a);
+ break;
+ }
+ }
+
+ if (vir_has_implicit_uniform(inst)) {
+ fprintf(stderr, " ");
+ vir_print_reg(c, inst->src[vir_get_implicit_uniform_src(inst)]);
+ }
+
+ break;
+ }
+}
+
+void
+vir_dump(struct v3d_compile *c)
+{
+ int ip = 0;
+
+ vir_for_each_block(block, c) {
+ fprintf(stderr, "BLOCK %d:\n", block->index);
+ vir_for_each_inst(inst, block) {
+ if (c->temp_start) {
+ bool first = true;
+
+ for (int i = 0; i < c->num_temps; i++) {
+ if (c->temp_start[i] != ip)
+ continue;
+
+ if (first) {
+ first = false;
+ } else {
+ fprintf(stderr, ", ");
+ }
+ fprintf(stderr, "S%4d", i);
+ }
+
+ if (first)
+ fprintf(stderr, " ");
+ else
+ fprintf(stderr, " ");
+ }
+
+ if (c->temp_end) {
+ bool first = true;
+
+ for (int i = 0; i < c->num_temps; i++) {
+ if (c->temp_end[i] != ip)
+ continue;
+
+ if (first) {
+ first = false;
+ } else {
+ fprintf(stderr, ", ");
+ }
+ fprintf(stderr, "E%4d", i);
+ }
+
+ if (first)
+ fprintf(stderr, " ");
+ else
+ fprintf(stderr, " ");
+ }
+
+ vir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ ip++;
+ }
+ if (block->successors[1]) {
+ fprintf(stderr, "-> BLOCK %d, %d\n",
+ block->successors[0]->index,
+ block->successors[1]->index);
+ } else if (block->successors[0]) {
+ fprintf(stderr, "-> BLOCK %d\n",
+ block->successors[0]->index);
+ }
+ }
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/vir_live_variables.c mesa-17.3.3/src/broadcom/compiler/vir_live_variables.c
--- mesa-17.2.4/src/broadcom/compiler/vir_live_variables.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/vir_live_variables.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,340 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define MAX_INSTRUCTION (1 << 30)
+
+#include "util/ralloc.h"
+#include "util/register_allocate.h"
+#include "v3d_compiler.h"
+
+struct partial_update_state {
+ struct qinst *insts[4];
+ uint8_t channels;
+};
+
+static uint32_t
+int_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(int));
+}
+
+static bool
+int_compare(const void *key1, const void *key2)
+{
+ return *(const int *)key1 == *(const int *)key2;
+}
+
+static int
+vir_reg_to_var(struct qreg reg)
+{
+ if (reg.file == QFILE_TEMP)
+ return reg.index;
+
+ return -1;
+}
+
+static void
+vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
+ struct qreg src)
+{
+ int var = vir_reg_to_var(src);
+ if (var == -1)
+ return;
+
+ c->temp_start[var] = MIN2(c->temp_start[var], ip);
+ c->temp_end[var] = MAX2(c->temp_end[var], ip);
+
+ /* The use[] bitset marks when the block makes
+ * use of a variable without having completely
+ * defined that variable within the block.
+ */
+ if (!BITSET_TEST(block->def, var))
+ BITSET_SET(block->use, var);
+}
+
+static struct partial_update_state *
+get_partial_update_state(struct hash_table *partial_update_ht,
+ struct qinst *inst)
+{
+ struct hash_entry *entry =
+ _mesa_hash_table_search(partial_update_ht,
+ &inst->dst.index);
+ if (entry)
+ return entry->data;
+
+ struct partial_update_state *state =
+ rzalloc(partial_update_ht, struct partial_update_state);
+
+ _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
+
+ return state;
+}
+
+static void
+vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
+ struct hash_table *partial_update_ht, struct qinst *inst)
+{
+ if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
+ return;
+
+ /* The def[] bitset marks when an initialization in a
+ * block completely screens off previous updates of
+ * that variable.
+ */
+ int var = vir_reg_to_var(inst->dst);
+ if (var == -1)
+ return;
+
+ c->temp_start[var] = MIN2(c->temp_start[var], ip);
+ c->temp_end[var] = MAX2(c->temp_end[var], ip);
+
+ /* If we've already tracked this as a def, or already used it within
+ * the block, there's nothing to do.
+ */
+ if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
+ return;
+
+ /* Easy, common case: unconditional full register update.
+ *
+ * We treat conditioning on the exec mask as the same as not being
+ * conditional. This makes sure that if the register gets set on
+ * either side of an if, it is treated as being screened off before
+ * the if. Otherwise, if there was no intervening def, its live
+ * interval doesn't extend back to the start of he program, and if too
+ * many registers did that we'd fail to register allocate.
+ */
+ if (((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
+ inst->qpu.flags.mc == V3D_QPU_COND_NONE) ||
+ inst->cond_is_exec_mask) &&
+ inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
+ inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
+ BITSET_SET(block->def, var);
+ return;
+ }
+
+ /* Finally, look at the condition code and packing and mark it as a
+ * def. We need to make sure that we understand sequences
+ * instructions like:
+ *
+ * mov.zs t0, t1
+ * mov.zc t0, t2
+ *
+ * or:
+ *
+ * mmov t0.8a, t1
+ * mmov t0.8b, t2
+ * mmov t0.8c, t3
+ * mmov t0.8d, t4
+ *
+ * as defining the temp within the block, because otherwise dst's live
+ * range will get extended up the control flow to the top of the
+ * program.
+ */
+ struct partial_update_state *state =
+ get_partial_update_state(partial_update_ht, inst);
+ uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
+
+ if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
+ inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
+ state->channels |= mask;
+ } else {
+ for (int i = 0; i < 4; i++) {
+ if (!(mask & (1 << i)))
+ continue;
+
+ /* XXXif (state->insts[i] &&
+ state->insts[i]->cond ==
+ qpu_cond_complement(inst->cond))
+ state->channels |= 1 << i;
+ else
+ */
+ state->insts[i] = inst;
+ }
+ }
+
+ if (state->channels == 0xf)
+ BITSET_SET(block->def, var);
+}
+
+static void
+sf_state_clear(struct hash_table *partial_update_ht)
+{
+ struct hash_entry *entry;
+
+ hash_table_foreach(partial_update_ht, entry) {
+ struct partial_update_state *state = entry->data;
+
+ for (int i = 0; i < 4; i++) {
+ if (state->insts[i] &&
+ (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
+ state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
+ state->insts[i] = NULL;
+ }
+ }
+}
+
+/* Sets up the def/use arrays for when variables are used-before-defined or
+ * defined-before-used in the block.
+ *
+ * Also initializes the temp_start/temp_end to cover just the instruction IPs
+ * where the variable is used, which will be extended later in
+ * vir_compute_start_end().
+ */
+static void
+vir_setup_def_use(struct v3d_compile *c)
+{
+ struct hash_table *partial_update_ht =
+ _mesa_hash_table_create(c, int_hash, int_compare);
+ int ip = 0;
+
+ vir_for_each_block(block, c) {
+ block->start_ip = ip;
+
+ _mesa_hash_table_clear(partial_update_ht, NULL);
+
+ vir_for_each_inst(inst, block) {
+ for (int i = 0; i < vir_get_nsrc(inst); i++)
+ vir_setup_use(c, block, ip, inst->src[i]);
+
+ vir_setup_def(c, block, ip, partial_update_ht, inst);
+
+ if (false /* XXX inst->uf */)
+ sf_state_clear(partial_update_ht);
+
+ /* Payload registers: r0/1/2 contain W, centroid W,
+ * and Z at program start. Register allocation will
+ * force their nodes to R0/1/2.
+ */
+ if (inst->src[0].file == QFILE_REG) {
+ switch (inst->src[0].index) {
+ case 0:
+ case 1:
+ case 2:
+ c->temp_start[inst->dst.index] = 0;
+ break;
+ }
+ }
+
+ ip++;
+ }
+ block->end_ip = ip;
+ }
+
+ _mesa_hash_table_destroy(partial_update_ht, NULL);
+}
+
+static bool
+vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
+{
+ bool cont = false;
+
+ vir_for_each_block_rev(block, c) {
+ /* Update live_out: Any successor using the variable
+ * on entrance needs us to have the variable live on
+ * exit.
+ */
+ vir_for_each_successor(succ, block) {
+ for (int i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_live_out = (succ->live_in[i] &
+ ~block->live_out[i]);
+ if (new_live_out) {
+ block->live_out[i] |= new_live_out;
+ cont = true;
+ }
+ }
+ }
+
+ /* Update live_in */
+ for (int i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_live_in = (block->use[i] |
+ (block->live_out[i] &
+ ~block->def[i]));
+ if (new_live_in & ~block->live_in[i]) {
+ block->live_in[i] |= new_live_in;
+ cont = true;
+ }
+ }
+ }
+
+ return cont;
+}
+
+/**
+ * Extend the start/end ranges for each variable to account for the
+ * new information calculated from control flow.
+ */
+static void
+vir_compute_start_end(struct v3d_compile *c, int num_vars)
+{
+ vir_for_each_block(block, c) {
+ for (int i = 0; i < num_vars; i++) {
+ if (BITSET_TEST(block->live_in, i)) {
+ c->temp_start[i] = MIN2(c->temp_start[i],
+ block->start_ip);
+ c->temp_end[i] = MAX2(c->temp_end[i],
+ block->start_ip);
+ }
+
+ if (BITSET_TEST(block->live_out, i)) {
+ c->temp_start[i] = MIN2(c->temp_start[i],
+ block->end_ip);
+ c->temp_end[i] = MAX2(c->temp_end[i],
+ block->end_ip);
+ }
+ }
+ }
+}
+
+void
+vir_calculate_live_intervals(struct v3d_compile *c)
+{
+ int bitset_words = BITSET_WORDS(c->num_temps);
+
+ /* If we called this function more than once, then we should be
+ * freeing the previous arrays.
+ */
+ assert(!c->temp_start);
+
+ c->temp_start = rzalloc_array(c, int, c->num_temps);
+ c->temp_end = rzalloc_array(c, int, c->num_temps);
+
+ for (int i = 0; i < c->num_temps; i++) {
+ c->temp_start[i] = MAX_INSTRUCTION;
+ c->temp_end[i] = -1;
+ }
+
+ vir_for_each_block(block, c) {
+ block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
+ block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
+ block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
+ block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
+ }
+
+ vir_setup_def_use(c);
+
+ while (vir_live_variables_dataflow(c, bitset_words))
+ ;
+
+ vir_compute_start_end(c, c->num_temps);
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/vir_lower_uniforms.c mesa-17.3.3/src/broadcom/compiler/vir_lower_uniforms.c
--- mesa-17.2.4/src/broadcom/compiler/vir_lower_uniforms.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/vir_lower_uniforms.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,209 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file v3d_vir_lower_uniforms.c
+ *
+ * This is the pre-code-generation pass for fixing up instructions that try to
+ * read from multiple uniform values.
+ */
+
+#include "v3d_compiler.h"
+#include "util/hash_table.h"
+#include "util/u_math.h"
+
+static inline uint32_t
+index_hash(const void *key)
+{
+ return (uintptr_t)key;
+}
+
+static inline bool
+index_compare(const void *a, const void *b)
+{
+ return a == b;
+}
+
+static void
+add_uniform(struct hash_table *ht, struct qreg reg)
+{
+ struct hash_entry *entry;
+ void *key = (void *)(uintptr_t)(reg.index + 1);
+
+ entry = _mesa_hash_table_search(ht, key);
+ if (entry) {
+ entry->data++;
+ } else {
+ _mesa_hash_table_insert(ht, key, (void *)(uintptr_t)1);
+ }
+}
+
+static void
+remove_uniform(struct hash_table *ht, struct qreg reg)
+{
+ struct hash_entry *entry;
+ void *key = (void *)(uintptr_t)(reg.index + 1);
+
+ entry = _mesa_hash_table_search(ht, key);
+ assert(entry);
+ entry->data = (void *)(((uintptr_t) entry->data) - 1);
+ if (entry->data == NULL)
+ _mesa_hash_table_remove(ht, entry);
+}
+
+static bool
+is_lowerable_uniform(struct qinst *inst, int i)
+{
+ if (inst->src[i].file != QFILE_UNIF)
+ return false;
+ if (vir_has_implicit_uniform(inst))
+ return i != vir_get_implicit_uniform_src(inst);
+ return true;
+}
+
+/* Returns the number of different uniform values referenced by the
+ * instruction.
+ */
+static uint32_t
+vir_get_instruction_uniform_count(struct qinst *inst)
+{
+ uint32_t count = 0;
+
+ for (int i = 0; i < vir_get_nsrc(inst); i++) {
+ if (inst->src[i].file != QFILE_UNIF)
+ continue;
+
+ bool is_duplicate = false;
+ for (int j = 0; j < i; j++) {
+ if (inst->src[j].file == QFILE_UNIF &&
+ inst->src[j].index == inst->src[i].index) {
+ is_duplicate = true;
+ break;
+ }
+ }
+ if (!is_duplicate)
+ count++;
+ }
+
+ return count;
+}
+
+void
+vir_lower_uniforms(struct v3d_compile *c)
+{
+ struct hash_table *ht =
+ _mesa_hash_table_create(c, index_hash, index_compare);
+
+ /* Walk the instruction list, finding which instructions have more
+ * than one uniform referenced, and add those uniform values to the
+ * ht.
+ */
+ vir_for_each_inst_inorder(inst, c) {
+ uint32_t nsrc = vir_get_nsrc(inst);
+
+ if (vir_get_instruction_uniform_count(inst) <= 1)
+ continue;
+
+ for (int i = 0; i < nsrc; i++) {
+ if (is_lowerable_uniform(inst, i))
+ add_uniform(ht, inst->src[i]);
+ }
+ }
+
+ while (ht->entries) {
+ /* Find the most commonly used uniform in instructions that
+ * need a uniform lowered.
+ */
+ uint32_t max_count = 0;
+ uint32_t max_index = 0;
+ struct hash_entry *entry;
+ hash_table_foreach(ht, entry) {
+ uint32_t count = (uintptr_t)entry->data;
+ uint32_t index = (uintptr_t)entry->key - 1;
+ if (count > max_count) {
+ max_count = count;
+ max_index = index;
+ }
+ }
+
+ struct qreg unif = vir_reg(QFILE_UNIF, max_index);
+
+ /* Now, find the instructions using this uniform and make them
+ * reference a temp instead.
+ */
+ vir_for_each_block(block, c) {
+ struct qinst *mov = NULL;
+
+ vir_for_each_inst(inst, block) {
+ uint32_t nsrc = vir_get_nsrc(inst);
+
+ uint32_t count = vir_get_instruction_uniform_count(inst);
+
+ if (count <= 1)
+ continue;
+
+ /* If the block doesn't have a load of the
+ * uniform yet, add it. We could potentially
+ * do better and CSE MOVs from multiple blocks
+ * into dominating blocks, except that may
+ * cause troubles for register allocation.
+ */
+ if (!mov) {
+ mov = vir_mul_inst(V3D_QPU_M_MOV,
+ vir_get_temp(c),
+ unif, c->undef);
+ list_add(&mov->link,
+ &block->instructions);
+ c->defs[mov->dst.index] = mov;
+ }
+
+ bool removed = false;
+ for (int i = 0; i < nsrc; i++) {
+ if (is_lowerable_uniform(inst, i) &&
+ inst->src[i].index == max_index) {
+ inst->src[i].file =
+ mov->dst.file;
+ inst->src[i].index =
+ mov->dst.index;
+ remove_uniform(ht, unif);
+ removed = true;
+ }
+ }
+ if (removed)
+ count--;
+
+ /* If the instruction doesn't need lowering any more,
+ * then drop it from the list.
+ */
+ if (count <= 1) {
+ for (int i = 0; i < nsrc; i++) {
+ if (is_lowerable_uniform(inst, i))
+ remove_uniform(ht, inst->src[i]);
+ }
+ }
+ }
+ }
+ }
+
+ _mesa_hash_table_destroy(ht, NULL);
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/vir_opt_copy_propagate.c mesa-17.3.3/src/broadcom/compiler/vir_opt_copy_propagate.c
--- mesa-17.2.4/src/broadcom/compiler/vir_opt_copy_propagate.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/vir_opt_copy_propagate.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,233 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file v3d_opt_copy_propagation.c
+ *
+ * This implements simple copy propagation for VIR without control flow.
+ *
+ * For each temp, it keeps a qreg of which source it was MOVed from, if it
+ * was. If we see that used later, we can just reuse the source value, since
+ * we know we don't have control flow, and we have SSA for our values so
+ * there's no killing to worry about.
+ */
+
+#include "v3d_compiler.h"
+
+static bool
+is_copy_mov(struct qinst *inst)
+{
+ if (!inst)
+ return false;
+
+ if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
+ (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
+ inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
+ return false;
+ }
+
+ if (inst->dst.file != QFILE_TEMP)
+ return false;
+
+ if (inst->src[0].file != QFILE_TEMP &&
+ inst->src[0].file != QFILE_UNIF) {
+ return false;
+ }
+
+ if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
+ inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
+ return false;
+ }
+
+ if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
+ inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
+ return false;
+ }
+
+ switch (inst->src[0].file) {
+ case QFILE_MAGIC:
+ /* No copy propagating from R3/R4/R5 -- the MOVs from those
+ * are there to register allocate values produced into R3/4/5
+ * to other regs (though hopefully r3/4/5).
+ */
+ switch (inst->src[0].index) {
+ case V3D_QPU_WADDR_R3:
+ case V3D_QPU_WADDR_R4:
+ case V3D_QPU_WADDR_R5:
+ return false;
+ default:
+ break;
+ }
+ break;
+
+ case QFILE_REG:
+ switch (inst->src[0].index) {
+ case 0:
+ case 1:
+ case 2:
+ /* MOVs from rf0/1/2 are only to track the live
+ * intervals for W/centroid W/Z.
+ */
+ return false;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return true;
+}
+
+static bool
+vir_has_unpack(struct qinst *inst, int chan)
+{
+ assert(chan == 0 || chan == 1);
+
+ if (vir_is_add(inst)) {
+ if (chan == 0)
+ return inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE;
+ else
+ return inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE;
+ } else {
+ if (chan == 0)
+ return inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE;
+ else
+ return inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE;
+ }
+}
+
+static bool
+try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
+{
+ bool debug = false;
+ bool progress = false;
+
+ for (int i = 0; i < vir_get_nsrc(inst); i++) {
+ if (inst->src[i].file != QFILE_TEMP)
+ continue;
+
+ /* We have two ways of finding MOVs we can copy propagate
+ * from. One is if it's an SSA def: then we can reuse it from
+ * any block in the program, as long as its source is also an
+ * SSA def. Alternatively, if it's in the "movs" array
+ * tracked within the block, then we know the sources for it
+ * haven't been changed since we saw the instruction within
+ * our block.
+ */
+ struct qinst *mov = movs[inst->src[i].index];
+ if (!mov) {
+ if (!is_copy_mov(c->defs[inst->src[i].index]))
+ continue;
+ mov = c->defs[inst->src[i].index];
+
+ if (mov->src[0].file == QFILE_TEMP &&
+ !c->defs[mov->src[0].index])
+ continue;
+ }
+
+ if (vir_has_unpack(mov, 0)) {
+ /* Make sure that the meaning of the unpack
+ * would be the same between the two
+ * instructions.
+ */
+ if (vir_is_float_input(inst) !=
+ vir_is_float_input(mov)) {
+ continue;
+ }
+ /* No composing the unpacks. */
+ if (vir_has_unpack(inst, i))
+ continue;
+ }
+
+ if (debug) {
+ fprintf(stderr, "Copy propagate: ");
+ vir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+
+ inst->src[i] = mov->src[0];
+ if (vir_has_unpack(mov, 0)) {
+ enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a_unpack;
+
+ vir_set_unpack(inst, i, unpack);
+ }
+
+ if (debug) {
+ fprintf(stderr, "to: ");
+ vir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+
+ progress = true;
+ }
+
+ return progress;
+}
+
+static void
+apply_kills(struct v3d_compile *c, struct qinst **movs, struct qinst *inst)
+{
+ if (inst->dst.file != QFILE_TEMP)
+ return;
+
+ for (int i = 0; i < c->num_temps; i++) {
+ if (movs[i] &&
+ (movs[i]->dst.index == inst->dst.index ||
+ (movs[i]->src[0].file == QFILE_TEMP &&
+ movs[i]->src[0].index == inst->dst.index))) {
+ movs[i] = NULL;
+ }
+ }
+}
+
+bool
+vir_opt_copy_propagate(struct v3d_compile *c)
+{
+ bool progress = false;
+ struct qinst **movs;
+
+ movs = ralloc_array(c, struct qinst *, c->num_temps);
+ if (!movs)
+ return false;
+
+ vir_for_each_block(block, c) {
+ /* The MOVs array tracks only available movs within the
+ * block.
+ */
+ memset(movs, 0, sizeof(struct qinst *) * c->num_temps);
+
+ vir_for_each_inst(inst, block) {
+ progress = try_copy_prop(c, inst, movs) || progress;
+
+ apply_kills(c, movs, inst);
+
+ if (is_copy_mov(inst))
+ movs[inst->dst.index] = inst;
+ }
+ }
+
+ ralloc_free(movs);
+
+ return progress;
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/vir_opt_dead_code.c mesa-17.3.3/src/broadcom/compiler/vir_opt_dead_code.c
--- mesa-17.2.4/src/broadcom/compiler/vir_opt_dead_code.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/vir_opt_dead_code.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file v3d_opt_dead_code.c
+ *
+ * This is a simple dead code eliminator for SSA values in VIR.
+ *
+ * It walks all the instructions finding what temps are used, then walks again
+ * to remove instructions writing unused temps.
+ *
+ * This is an inefficient implementation if you have long chains of
+ * instructions where the entire chain is dead, but we expect those to have
+ * been eliminated at the NIR level, and here we're just cleaning up small
+ * problems produced by NIR->VIR.
+ */
+
+#include "v3d_compiler.h"
+
+static bool debug;
+
+static void
+dce(struct v3d_compile *c, struct qinst *inst)
+{
+ if (debug) {
+ fprintf(stderr, "Removing: ");
+ vir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+ assert(inst->qpu.flags.apf == V3D_QPU_PF_NONE);
+ assert(inst->qpu.flags.mpf == V3D_QPU_PF_NONE);
+ vir_remove_instruction(c, inst);
+}
+
+static bool
+has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
+{
+ for (int i = 0; i < vir_get_nsrc(inst); i++) {
+ if (inst->src[i].file == QFILE_VPM) {
+ /* Instance ID, Vertex ID: Should have been removed at
+ * the NIR level
+ */
+ if (inst->src[i].index == ~0)
+ return true;
+
+ uint32_t attr = inst->src[i].index / 4;
+ uint32_t offset = inst->src[i].index % 4;
+
+ if (c->vattr_sizes[attr] != offset)
+ return true;
+
+ /* Can't get rid of the last VPM read, or the
+ * simulator (at least) throws an error.
+ */
+ uint32_t total_size = 0;
+ for (uint32_t i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++)
+ total_size += c->vattr_sizes[i];
+ if (total_size == 1)
+ return true;
+ }
+
+ /* Dead code removal of varyings is tricky, so just assert
+ * that it all happened at the NIR level.
+ */
+ if (inst->src[i].file == QFILE_VARY)
+ return true;
+ }
+
+ return false;
+}
+
+bool
+vir_opt_dead_code(struct v3d_compile *c)
+{
+ bool progress = false;
+ bool *used = calloc(c->num_temps, sizeof(bool));
+
+ vir_for_each_inst_inorder(inst, c) {
+ for (int i = 0; i < vir_get_nsrc(inst); i++) {
+ if (inst->src[i].file == QFILE_TEMP)
+ used[inst->src[i].index] = true;
+ }
+ }
+
+ vir_for_each_block(block, c) {
+ vir_for_each_inst_safe(inst, block) {
+ if (inst->dst.file != QFILE_NULL &&
+ !(inst->dst.file == QFILE_TEMP &&
+ !used[inst->dst.index])) {
+ continue;
+ }
+
+ if (vir_has_side_effects(c, inst))
+ continue;
+
+ if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
+ inst->qpu.flags.mpf != V3D_QPU_PF_NONE||
+ has_nonremovable_reads(c, inst)) {
+ /* If we can't remove the instruction, but we
+ * don't need its destination value, just
+ * remove the destination. The register
+ * allocator would trivially color it and it
+ * wouldn't cause any register pressure, but
+ * it's nicer to read the VIR code without
+ * unused destination regs.
+ */
+ if (inst->dst.file == QFILE_TEMP) {
+ if (debug) {
+ fprintf(stderr,
+ "Removing dst from: ");
+ vir_dump_inst(c, inst);
+ fprintf(stderr, "\n");
+ }
+ c->defs[inst->dst.index] = NULL;
+ inst->dst.file = QFILE_NULL;
+ progress = true;
+ }
+ continue;
+ }
+
+ for (int i = 0; i < vir_get_nsrc(inst); i++) {
+ if (inst->src[i].file != QFILE_VPM)
+ continue;
+ uint32_t attr = inst->src[i].index / 4;
+ uint32_t offset = (inst->src[i].index % 4);
+
+ if (c->vattr_sizes[attr] == offset) {
+ c->num_inputs--;
+ c->vattr_sizes[attr]--;
+ }
+ }
+
+ dce(c, inst);
+ progress = true;
+ continue;
+ }
+ }
+
+ free(used);
+
+ return progress;
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/vir_register_allocate.c mesa-17.3.3/src/broadcom/compiler/vir_register_allocate.c
--- mesa-17.2.4/src/broadcom/compiler/vir_register_allocate.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/vir_register_allocate.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,254 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/ralloc.h"
+#include "util/register_allocate.h"
+#include "v3d_compiler.h"
+
+#define QPU_R(i) { .magic = false, .index = i }
+
+#define ACC_INDEX 0
+#define ACC_COUNT 5
+#define PHYS_INDEX (ACC_INDEX + ACC_COUNT)
+#define PHYS_COUNT 64
+
+bool
+vir_init_reg_sets(struct v3d_compiler *compiler)
+{
+ compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
+ true);
+ if (!compiler->regs)
+ return false;
+
+ /* Allocate 3 regfile classes, for the ways the physical register file
+ * can be divided up for fragment shader threading.
+ */
+ for (int threads = 0; threads < 3; threads++) {
+ compiler->reg_class[threads] =
+ ra_alloc_reg_class(compiler->regs);
+
+ for (int i = PHYS_INDEX;
+ i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
+ ra_class_add_reg(compiler->regs,
+ compiler->reg_class[threads], i);
+ }
+
+ for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT; i++) {
+ ra_class_add_reg(compiler->regs,
+ compiler->reg_class[threads], i);
+ }
+ }
+
+ ra_set_finalize(compiler->regs, NULL);
+
+ return true;
+}
+
+struct node_to_temp_map {
+ uint32_t temp;
+ uint32_t priority;
+};
+
+static int
+node_to_temp_priority(const void *in_a, const void *in_b)
+{
+ const struct node_to_temp_map *a = in_a;
+ const struct node_to_temp_map *b = in_b;
+
+ return a->priority - b->priority;
+}
+
+#define CLASS_BIT_PHYS (1 << 0)
+#define CLASS_BIT_R0_R2 (1 << 1)
+#define CLASS_BIT_R3 (1 << 2)
+#define CLASS_BIT_R4 (1 << 3)
+
+/**
+ * Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
+ *
+ * The return value should be freed by the caller.
+ */
+struct qpu_reg *
+v3d_register_allocate(struct v3d_compile *c)
+{
+ struct node_to_temp_map map[c->num_temps];
+ uint32_t temp_to_node[c->num_temps];
+ uint8_t class_bits[c->num_temps];
+ struct qpu_reg *temp_registers = calloc(c->num_temps,
+ sizeof(*temp_registers));
+ int acc_nodes[ACC_COUNT];
+
+ struct ra_graph *g = ra_alloc_interference_graph(c->compiler->regs,
+ c->num_temps +
+ ARRAY_SIZE(acc_nodes));
+
+ /* Make some fixed nodes for the accumulators, which we will need to
+ * interfere with when ops have implied r3/r4 writes or for the thread
+ * switches. We could represent these as classes for the nodes to
+ * live in, but the classes take up a lot of memory to set up, so we
+ * don't want to make too many.
+ */
+ for (int i = 0; i < ARRAY_SIZE(acc_nodes); i++) {
+ acc_nodes[i] = c->num_temps + i;
+ ra_set_node_reg(g, acc_nodes[i], ACC_INDEX + i);
+ }
+
+ /* Compute the live ranges so we can figure out interference. */
+ vir_calculate_live_intervals(c);
+
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ map[i].temp = i;
+ map[i].priority = c->temp_end[i] - c->temp_start[i];
+ }
+ qsort(map, c->num_temps, sizeof(map[0]), node_to_temp_priority);
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ temp_to_node[map[i].temp] = i;
+ }
+
+ /* Figure out our register classes and preallocated registers. We
+ * start with any temp being able to be in any file, then instructions
+ * incrementally remove bits that the temp definitely can't be in.
+ */
+ memset(class_bits,
+ CLASS_BIT_PHYS | CLASS_BIT_R0_R2 | CLASS_BIT_R3 | CLASS_BIT_R4,
+ sizeof(class_bits));
+
+ int ip = 0;
+ vir_for_each_inst_inorder(inst, c) {
+ /* If the instruction writes r3/r4 (and optionally moves its
+ * result to a temp), nothing else can be stored in r3/r4 across
+ * it.
+ */
+ if (vir_writes_r3(inst)) {
+ for (int i = 0; i < c->num_temps; i++) {
+ if (c->temp_start[i] < ip &&
+ c->temp_end[i] > ip) {
+ ra_add_node_interference(g,
+ temp_to_node[i],
+ acc_nodes[3]);
+ }
+ }
+ }
+ if (vir_writes_r4(inst)) {
+ for (int i = 0; i < c->num_temps; i++) {
+ if (c->temp_start[i] < ip &&
+ c->temp_end[i] > ip) {
+ ra_add_node_interference(g,
+ temp_to_node[i],
+ acc_nodes[4]);
+ }
+ }
+ }
+
+ if (inst->src[0].file == QFILE_REG) {
+ switch (inst->src[0].index) {
+ case 0:
+ case 1:
+ case 2:
+ /* Payload setup instructions: Force allocate
+ * the dst to the given register (so the MOV
+ * will disappear).
+ */
+ assert(inst->qpu.alu.mul.op == V3D_QPU_M_MOV);
+ assert(inst->dst.file == QFILE_TEMP);
+ ra_set_node_reg(g,
+ temp_to_node[inst->dst.index],
+ PHYS_INDEX +
+ inst->src[0].index);
+ break;
+ }
+ }
+
+#if 0
+ switch (inst->op) {
+ case QOP_THRSW:
+ /* All accumulators are invalidated across a thread
+ * switch.
+ */
+ for (int i = 0; i < c->num_temps; i++) {
+ if (c->temp_start[i] < ip && c->temp_end[i] > ip)
+ class_bits[i] &= ~(CLASS_BIT_R0_R3 |
+ CLASS_BIT_R4);
+ }
+ break;
+
+ default:
+ break;
+ }
+#endif
+
+ ip++;
+ }
+
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ ra_set_node_class(g, temp_to_node[i],
+ c->compiler->reg_class[c->fs_threaded]);
+ }
+
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ for (uint32_t j = i + 1; j < c->num_temps; j++) {
+ if (!(c->temp_start[i] >= c->temp_end[j] ||
+ c->temp_start[j] >= c->temp_end[i])) {
+ ra_add_node_interference(g,
+ temp_to_node[i],
+ temp_to_node[j]);
+ }
+ }
+ }
+
+ bool ok = ra_allocate(g);
+ if (!ok) {
+ if (!c->fs_threaded) {
+ fprintf(stderr, "Failed to register allocate:\n");
+ vir_dump(c);
+ }
+
+ c->failed = true;
+ free(temp_registers);
+ return NULL;
+ }
+
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ int ra_reg = ra_get_node_reg(g, temp_to_node[i]);
+ if (ra_reg < PHYS_INDEX) {
+ temp_registers[i].magic = true;
+ temp_registers[i].index = (V3D_QPU_WADDR_R0 +
+ ra_reg - ACC_INDEX);
+ } else {
+ temp_registers[i].magic = false;
+ temp_registers[i].index = ra_reg - PHYS_INDEX;
+ }
+
+ /* If the value's never used, just write to the NOP register
+ * for clarity in debug output.
+ */
+ if (c->temp_start[i] == c->temp_end[i]) {
+ temp_registers[i].magic = true;
+ temp_registers[i].index = V3D_QPU_WADDR_NOP;
+ }
+ }
+
+ ralloc_free(g);
+
+ return temp_registers;
+}
diff -Nru mesa-17.2.4/src/broadcom/compiler/vir_to_qpu.c mesa-17.3.3/src/broadcom/compiler/vir_to_qpu.c
--- mesa-17.2.4/src/broadcom/compiler/vir_to_qpu.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/compiler/vir_to_qpu.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,359 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/v3d_compiler.h"
+#include "qpu/qpu_instr.h"
+#include "qpu/qpu_disasm.h"
+
+static inline struct qpu_reg
+qpu_reg(int index)
+{
+ struct qpu_reg reg = {
+ .magic = false,
+ .index = index,
+ };
+ return reg;
+}
+
+static inline struct qpu_reg
+qpu_magic(enum v3d_qpu_waddr waddr)
+{
+ struct qpu_reg reg = {
+ .magic = true,
+ .index = waddr,
+ };
+ return reg;
+}
+
+static inline struct qpu_reg
+qpu_acc(int acc)
+{
+ return qpu_magic(V3D_QPU_WADDR_R0 + acc);
+}
+
+struct v3d_qpu_instr
+v3d_qpu_nop(void)
+{
+ struct v3d_qpu_instr instr = {
+ .type = V3D_QPU_INSTR_TYPE_ALU,
+ .alu = {
+ .add = {
+ .op = V3D_QPU_A_NOP,
+ .waddr = V3D_QPU_WADDR_NOP,
+ .magic_write = true,
+ },
+ .mul = {
+ .op = V3D_QPU_M_NOP,
+ .waddr = V3D_QPU_WADDR_NOP,
+ .magic_write = true,
+ },
+ }
+ };
+
+ return instr;
+}
+
+static struct qinst *
+vir_nop(void)
+{
+ struct qreg undef = { QFILE_NULL, 0 };
+ struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
+
+ return qinst;
+}
+
+static struct qinst *
+new_qpu_nop_before(struct qinst *inst)
+{
+ struct qinst *q = vir_nop();
+
+ list_addtail(&q->link, &inst->link);
+
+ return q;
+}
+
+static void
+new_ldunif_instr(struct qinst *inst, int i)
+{
+ struct qinst *ldunif = new_qpu_nop_before(inst);
+
+ ldunif->qpu.sig.ldunif = true;
+ assert(inst->src[i].file == QFILE_UNIF);
+ ldunif->uniform = inst->src[i].index;
+}
+
+/**
+ * Allocates the src register (accumulator or register file) into the RADDR
+ * fields of the instruction.
+ */
+static void
+set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
+{
+ if (src.magic) {
+ assert(src.index >= V3D_QPU_WADDR_R0 &&
+ src.index <= V3D_QPU_WADDR_R5);
+ *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
+ return;
+ }
+
+ if (instr->alu.add.a != V3D_QPU_MUX_A &&
+ instr->alu.add.b != V3D_QPU_MUX_A &&
+ instr->alu.mul.a != V3D_QPU_MUX_A &&
+ instr->alu.mul.b != V3D_QPU_MUX_A) {
+ instr->raddr_a = src.index;
+ *mux = V3D_QPU_MUX_A;
+ } else {
+ if (instr->raddr_a == src.index) {
+ *mux = V3D_QPU_MUX_A;
+ } else {
+ assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
+ instr->alu.add.b == V3D_QPU_MUX_B &&
+ instr->alu.mul.a == V3D_QPU_MUX_B &&
+ instr->alu.mul.b == V3D_QPU_MUX_B) ||
+ src.index == instr->raddr_b);
+
+ instr->raddr_b = src.index;
+ *mux = V3D_QPU_MUX_B;
+ }
+ }
+}
+
+static void
+v3d_generate_code_block(struct v3d_compile *c,
+ struct qblock *block,
+ struct qpu_reg *temp_registers)
+{
+ int last_vpm_read_index = -1;
+
+ vir_for_each_inst(qinst, block) {
+#if 0
+ fprintf(stderr, "translating qinst to qpu: ");
+ vir_dump_inst(c, qinst);
+ fprintf(stderr, "\n");
+#endif
+
+ struct qinst *temp;
+
+ if (vir_has_implicit_uniform(qinst)) {
+ int src = vir_get_implicit_uniform_src(qinst);
+ assert(qinst->src[src].file == QFILE_UNIF);
+ qinst->uniform = qinst->src[src].index;
+ c->num_uniforms++;
+ }
+
+ int nsrc = vir_get_non_sideband_nsrc(qinst);
+ struct qpu_reg src[ARRAY_SIZE(qinst->src)];
+ bool emitted_ldunif = false;
+ for (int i = 0; i < nsrc; i++) {
+ int index = qinst->src[i].index;
+ switch (qinst->src[i].file) {
+ case QFILE_REG:
+ src[i] = qpu_reg(qinst->src[i].index);
+ break;
+ case QFILE_MAGIC:
+ src[i] = qpu_magic(qinst->src[i].index);
+ break;
+ case QFILE_NULL:
+ case QFILE_LOAD_IMM:
+ src[i] = qpu_acc(0);
+ break;
+ case QFILE_TEMP:
+ src[i] = temp_registers[index];
+ break;
+ case QFILE_UNIF:
+ if (!emitted_ldunif) {
+ new_ldunif_instr(qinst, i);
+ c->num_uniforms++;
+ emitted_ldunif = true;
+ }
+
+ src[i] = qpu_acc(5);
+ break;
+ case QFILE_VARY:
+ temp = new_qpu_nop_before(qinst);
+ temp->qpu.sig.ldvary = true;
+
+ src[i] = qpu_acc(3);
+ break;
+ case QFILE_SMALL_IMM:
+ abort(); /* XXX */
+#if 0
+ src[i].mux = QPU_MUX_SMALL_IMM;
+ src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
+ /* This should only have returned a valid
+ * small immediate field, not ~0 for failure.
+ */
+ assert(src[i].addr <= 47);
+#endif
+ break;
+
+ case QFILE_VPM:
+ assert((int)qinst->src[i].index >=
+ last_vpm_read_index);
+ (void)last_vpm_read_index;
+ last_vpm_read_index = qinst->src[i].index;
+
+ temp = new_qpu_nop_before(qinst);
+ temp->qpu.sig.ldvpm = true;
+
+ src[i] = qpu_acc(3);
+ break;
+
+ case QFILE_TLB:
+ case QFILE_TLBU:
+ unreachable("bad vir src file");
+ }
+ }
+
+ struct qpu_reg dst;
+ switch (qinst->dst.file) {
+ case QFILE_NULL:
+ dst = qpu_magic(V3D_QPU_WADDR_NOP);
+ break;
+
+ case QFILE_REG:
+ dst = qpu_reg(qinst->dst.index);
+ break;
+
+ case QFILE_MAGIC:
+ dst = qpu_magic(qinst->dst.index);
+ break;
+
+ case QFILE_TEMP:
+ dst = temp_registers[qinst->dst.index];
+ break;
+
+ case QFILE_VPM:
+ dst = qpu_magic(V3D_QPU_WADDR_VPM);
+ break;
+
+ case QFILE_TLB:
+ dst = qpu_magic(V3D_QPU_WADDR_TLB);
+ break;
+
+ case QFILE_TLBU:
+ dst = qpu_magic(V3D_QPU_WADDR_TLBU);
+ break;
+
+ case QFILE_VARY:
+ case QFILE_UNIF:
+ case QFILE_SMALL_IMM:
+ case QFILE_LOAD_IMM:
+ assert(!"not reached");
+ break;
+ }
+
+ if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
+ if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
+ assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
+ if (nsrc >= 1) {
+ set_src(&qinst->qpu,
+ &qinst->qpu.alu.add.a, src[0]);
+ }
+ if (nsrc >= 2) {
+ set_src(&qinst->qpu,
+ &qinst->qpu.alu.add.b, src[1]);
+ }
+
+ qinst->qpu.alu.add.waddr = dst.index;
+ qinst->qpu.alu.add.magic_write = dst.magic;
+ } else {
+ if (nsrc >= 1) {
+ set_src(&qinst->qpu,
+ &qinst->qpu.alu.mul.a, src[0]);
+ }
+ if (nsrc >= 2) {
+ set_src(&qinst->qpu,
+ &qinst->qpu.alu.mul.b, src[1]);
+ }
+
+ qinst->qpu.alu.mul.waddr = dst.index;
+ qinst->qpu.alu.mul.magic_write = dst.magic;
+ }
+ } else {
+ assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
+ }
+ }
+}
+
+
+static void
+v3d_dump_qpu(struct v3d_compile *c)
+{
+ fprintf(stderr, "%s prog %d/%d QPU:\n",
+ vir_get_stage_name(c),
+ c->program_id, c->variant_id);
+
+ for (int i = 0; i < c->qpu_inst_count; i++) {
+ const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
+ fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
+ }
+ fprintf(stderr, "\n");
+}
+
+void
+v3d_vir_to_qpu(struct v3d_compile *c)
+{
+ struct qpu_reg *temp_registers = v3d_register_allocate(c);
+ struct qblock *end_block = list_last_entry(&c->blocks,
+ struct qblock, link);
+
+ /* Reset the uniform count to how many will be actually loaded by the
+ * generated QPU code.
+ */
+ c->num_uniforms = 0;
+
+ vir_for_each_block(block, c)
+ v3d_generate_code_block(c, block, temp_registers);
+
+ struct qinst *thrsw = vir_nop();
+ list_addtail(&thrsw->link, &end_block->instructions);
+ thrsw->qpu.sig.thrsw = true;
+
+ uint32_t cycles = v3d_qpu_schedule_instructions(c);
+
+ c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
+ int i = 0;
+ vir_for_each_inst_inorder(inst, c) {
+ bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
+ &c->qpu_insts[i++]);
+ assert(ok); (void) ok;
+ }
+ assert(i == c->qpu_inst_count);
+
+ if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
+ vir_get_stage_name(c),
+ c->program_id, c->variant_id,
+ cycles);
+ }
+
+ if (V3D_DEBUG & (V3D_DEBUG_QPU |
+ v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
+ v3d_dump_qpu(c);
+ }
+
+ qpu_validate(c);
+
+ free(temp_registers);
+}
diff -Nru mesa-17.2.4/src/broadcom/Makefile.am mesa-17.3.3/src/broadcom/Makefile.am
--- mesa-17.2.4/src/broadcom/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/broadcom/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -20,6 +20,20 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_builddir)/src \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/broadcom/ \
+ -I$(top_srcdir)/src/broadcom/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/include \
+ $(VALGRIND_CFLAGS) \
+ $(DEFINES)
+
+AM_CFLAGS = \
+ $(EXPAT_CFLAGS)
+
include Makefile.sources
lib_LTLIBRARIES =
@@ -38,5 +52,7 @@
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
include Makefile.genxml.am
+include Makefile.cle.am
+include Makefile.vc5.am
CLEANFILES += $(BUILT_SOURCES)
diff -Nru mesa-17.2.4/src/broadcom/Makefile.cle.am mesa-17.3.3/src/broadcom/Makefile.cle.am
--- mesa-17.2.4/src/broadcom/Makefile.cle.am 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/Makefile.cle.am 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,6 @@
+noinst_LTLIBRARIES += cle/libbroadcom_cle.la
+
+cle_libbroadcom_cle_la_CFLAGS = \
+ -I$(top_builddir)/src/broadcom/cle \
+ $(AM_CFLAGS)
+cle_libbroadcom_cle_la_SOURCES = $(BROADCOM_DECODER_FILES)
diff -Nru mesa-17.2.4/src/broadcom/Makefile.genxml.am mesa-17.3.3/src/broadcom/Makefile.genxml.am
--- mesa-17.2.4/src/broadcom/Makefile.genxml.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/broadcom/Makefile.genxml.am 2018-01-18 21:30:28.000000000 +0000
@@ -32,6 +32,11 @@
$(MKDIR_GEN)
$(PYTHON_GEN) $(srcdir)/cle/gen_pack_header.py $< > $@ || ($(RM) $@; false)
+GEN_ZIPPED = $(srcdir)/../intel/genxml/gen_zipped_file.py
+cle/v3d_xml.h: $(GEN_ZIPPED) $(BROADCOM_GENXML_XML_FILES)
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(GEN_ZIPPED) $(BROADCOM_GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM) $@; false)
+
EXTRA_DIST += \
cle/gen_pack_header.py \
$()
diff -Nru mesa-17.2.4/src/broadcom/Makefile.in mesa-17.3.3/src/broadcom/Makefile.in
--- mesa-17.2.4/src/broadcom/Makefile.in 2017-10-30 14:49:58.000000000 +0000
+++ mesa-17.3.3/src/broadcom/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -136,11 +136,12 @@
host_triplet = @host@
target_triplet = @target@
noinst_PROGRAMS =
-check_PROGRAMS =
-TESTS =
+check_PROGRAMS = qpu/tests/qpu_disasm$(EXEEXT)
+@USE_VC5_SIMULATOR_TRUE@am__append_1 = $(VC5_SIMULATOR_CFLAGS)
subdir = src/broadcom
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -186,7 +187,42 @@
}
am__installdirs = "$(DESTDIR)$(libdir)"
LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
+cle_libbroadcom_cle_la_LIBADD =
+am__dirstamp = $(am__leading_dot)dirstamp
+am__objects_1 = cle/cle_libbroadcom_cle_la-v3d_decoder.lo
+am_cle_libbroadcom_cle_la_OBJECTS = $(am__objects_1)
+cle_libbroadcom_cle_la_OBJECTS = $(am_cle_libbroadcom_cle_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+cle_libbroadcom_cle_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(cle_libbroadcom_cle_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+libbroadcom_la_LIBADD =
+am__objects_2 = common/v3d_debug.lo clif/clif_dump.lo \
+ compiler/nir_to_vir.lo compiler/vir.lo compiler/vir_dump.lo \
+ compiler/vir_live_variables.lo compiler/vir_lower_uniforms.lo \
+ compiler/vir_opt_copy_propagate.lo \
+ compiler/vir_opt_dead_code.lo \
+ compiler/vir_register_allocate.lo compiler/vir_to_qpu.lo \
+ compiler/qpu_schedule.lo compiler/qpu_validate.lo \
+ compiler/v3d_nir_lower_io.lo qpu/qpu_disasm.lo \
+ qpu/qpu_instr.lo qpu/qpu_pack.lo
+am_libbroadcom_la_OBJECTS = $(am__objects_2)
+libbroadcom_la_OBJECTS = $(am_libbroadcom_la_OBJECTS)
+libbroadcom_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(AM_CFLAGS) $(CFLAGS) $(libbroadcom_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
PROGRAMS = $(noinst_PROGRAMS)
+qpu_tests_qpu_disasm_SOURCES = qpu/tests/qpu_disasm.c
+qpu_tests_qpu_disasm_OBJECTS = qpu/tests/qpu_disasm.$(OBJEXT)
+qpu_tests_qpu_disasm_LDADD = $(LDADD)
+qpu_tests_qpu_disasm_DEPENDENCIES = libbroadcom.la \
+ $(top_builddir)/src/compiler/nir/libnir.la \
+ $(top_builddir)/src/util/libmesautil.la
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
@@ -199,8 +235,32 @@
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
-SOURCES =
-DIST_SOURCES =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(cle_libbroadcom_cle_la_SOURCES) $(libbroadcom_la_SOURCES) \
+ qpu/tests/qpu_disasm.c
+DIST_SOURCES = $(cle_libbroadcom_cle_la_SOURCES) \
+ $(libbroadcom_la_SOURCES) qpu/tests/qpu_disasm.c
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
@@ -209,6 +269,24 @@
DATA = $(noinst_DATA)
HEADERS = $(noinst_HEADERS)
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
am__tty_colors_dummy = \
mgn= red= grn= lgn= blu= brg= std=; \
am__color_tests=no
@@ -368,12 +446,8 @@
AM_RECURSIVE_TARGETS = check recheck
TEST_SUITE_LOG = test-suite.log
TEST_EXTENSIONS = @EXEEXT@ .test
-am__test_logs1 = $(TESTS:=.log)
-am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log)
-TEST_LOGS = $(am__test_logs2:.test.log=.log)
-TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
-TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \
- $(TEST_LOG_FLAGS)
+LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
+LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS)
am__set_b = \
case '$@' in \
*/*) \
@@ -384,8 +458,16 @@
*) \
b='$*';; \
esac
-am__DIST_COMMON = $(srcdir)/Makefile.genxml.am $(srcdir)/Makefile.in \
- $(srcdir)/Makefile.sources $(top_srcdir)/bin/test-driver
+am__test_logs1 = $(TESTS:=.log)
+am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log)
+TEST_LOGS = $(am__test_logs2:.test.log=.log)
+TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
+TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \
+ $(TEST_LOG_FLAGS)
+am__DIST_COMMON = $(srcdir)/Makefile.cle.am \
+ $(srcdir)/Makefile.genxml.am $(srcdir)/Makefile.in \
+ $(srcdir)/Makefile.sources $(srcdir)/Makefile.vc5.am \
+ $(top_srcdir)/bin/depcomp $(top_srcdir)/bin/test-driver
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
@@ -525,9 +607,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -581,6 +663,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -592,12 +676,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -681,24 +768,67 @@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_builddir)/src \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/broadcom/ \
+ -I$(top_srcdir)/src/broadcom/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/include \
+ $(VALGRIND_CFLAGS) \
+ $(DEFINES)
+
+AM_CFLAGS = $(EXPAT_CFLAGS) $(am__append_1)
BROADCOM_GENXML_GENERATED_FILES = \
cle/v3d_packet_v21_pack.h \
+ cle/v3d_packet_v33_pack.h \
+ cle/v3d_xml.h \
$()
BROADCOM_GENXML_XML_FILES = \
cle/v3d_packet_v21.xml \
+ cle/v3d_packet_v33.xml \
$()
BROADCOM_FILES = \
cle/v3d_packet_helpers.h \
+ common/v3d_debug.c \
+ common/v3d_debug.h \
+ clif/clif_dump.c \
+ clif/clif_dump.h \
common/v3d_device_info.h \
+ compiler/nir_to_vir.c \
+ compiler/vir.c \
+ compiler/vir_dump.c \
+ compiler/vir_live_variables.c \
+ compiler/vir_lower_uniforms.c \
+ compiler/vir_opt_copy_propagate.c \
+ compiler/vir_opt_dead_code.c \
+ compiler/vir_register_allocate.c \
+ compiler/vir_to_qpu.c \
+ compiler/qpu_schedule.c \
+ compiler/qpu_validate.c \
+ compiler/v3d_compiler.h \
+ compiler/v3d_nir_lower_io.c \
+ qpu/qpu_disasm.c \
+ qpu/qpu_disasm.h \
+ qpu/qpu_instr.c \
+ qpu/qpu_instr.h \
+ qpu/qpu_pack.c \
+ $()
+
+BROADCOM_DECODER_FILES = \
+ cle/v3d_decoder.c \
+ cle/v3d_decoder.h \
$()
lib_LTLIBRARIES =
check_LTLIBRARIES =
noinst_DATA =
noinst_HEADERS =
-noinst_LTLIBRARIES =
+noinst_LTLIBRARIES = cle/libbroadcom_cle.la libbroadcom.la
+TESTS = $(check_PROGRAMS)
BUILT_SOURCES = $(BROADCOM_GENXML_GENERATED_FILES)
CLEANFILES = $(BUILT_SOURCES)
EXTRA_DIST = $(BROADCOM_FILES) $(BROADCOM_GENXML_XML_FILES) \
@@ -706,12 +836,26 @@
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
SUFFIXES = _pack.h .xml
+GEN_ZIPPED = $(srcdir)/../intel/genxml/gen_zipped_file.py
+cle_libbroadcom_cle_la_CFLAGS = \
+ -I$(top_builddir)/src/broadcom/cle \
+ $(AM_CFLAGS)
+
+cle_libbroadcom_cle_la_SOURCES = $(BROADCOM_DECODER_FILES)
+@USE_VC5_SIMULATOR_TRUE@libbroadcom_la_LDFLAGS = $(VC5_SIMULATOR_LIBS)
+libbroadcom_la_SOURCES = $(BROADCOM_FILES)
+LDADD = \
+ libbroadcom.la \
+ $(top_builddir)/src/compiler/nir/libnir.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(NULL)
+
all: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) all-am
.SUFFIXES:
-.SUFFIXES: _pack.h .xml .log .test .test$(EXEEXT) .trs
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(srcdir)/Makefile.genxml.am $(am__configure_deps)
+.SUFFIXES: _pack.h .xml .c .lo .log .o .obj .test .test$(EXEEXT) .trs
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(srcdir)/Makefile.genxml.am $(srcdir)/Makefile.cle.am $(srcdir)/Makefile.vc5.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -731,7 +875,7 @@
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
-$(srcdir)/Makefile.sources $(srcdir)/Makefile.genxml.am $(am__empty):
+$(srcdir)/Makefile.sources $(srcdir)/Makefile.genxml.am $(srcdir)/Makefile.cle.am $(srcdir)/Makefile.vc5.am $(am__empty):
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -798,6 +942,74 @@
echo rm -f $${locs}; \
rm -f $${locs}; \
}
+cle/$(am__dirstamp):
+ @$(MKDIR_P) cle
+ @: > cle/$(am__dirstamp)
+cle/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) cle/$(DEPDIR)
+ @: > cle/$(DEPDIR)/$(am__dirstamp)
+cle/cle_libbroadcom_cle_la-v3d_decoder.lo: cle/$(am__dirstamp) \
+ cle/$(DEPDIR)/$(am__dirstamp)
+
+cle/libbroadcom_cle.la: $(cle_libbroadcom_cle_la_OBJECTS) $(cle_libbroadcom_cle_la_DEPENDENCIES) $(EXTRA_cle_libbroadcom_cle_la_DEPENDENCIES) cle/$(am__dirstamp)
+ $(AM_V_CCLD)$(cle_libbroadcom_cle_la_LINK) $(cle_libbroadcom_cle_la_OBJECTS) $(cle_libbroadcom_cle_la_LIBADD) $(LIBS)
+common/$(am__dirstamp):
+ @$(MKDIR_P) common
+ @: > common/$(am__dirstamp)
+common/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) common/$(DEPDIR)
+ @: > common/$(DEPDIR)/$(am__dirstamp)
+common/v3d_debug.lo: common/$(am__dirstamp) \
+ common/$(DEPDIR)/$(am__dirstamp)
+clif/$(am__dirstamp):
+ @$(MKDIR_P) clif
+ @: > clif/$(am__dirstamp)
+clif/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) clif/$(DEPDIR)
+ @: > clif/$(DEPDIR)/$(am__dirstamp)
+clif/clif_dump.lo: clif/$(am__dirstamp) clif/$(DEPDIR)/$(am__dirstamp)
+compiler/$(am__dirstamp):
+ @$(MKDIR_P) compiler
+ @: > compiler/$(am__dirstamp)
+compiler/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) compiler/$(DEPDIR)
+ @: > compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/nir_to_vir.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/vir.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/vir_dump.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/vir_live_variables.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/vir_lower_uniforms.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/vir_opt_copy_propagate.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/vir_opt_dead_code.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/vir_register_allocate.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/vir_to_qpu.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/qpu_schedule.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/qpu_validate.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+compiler/v3d_nir_lower_io.lo: compiler/$(am__dirstamp) \
+ compiler/$(DEPDIR)/$(am__dirstamp)
+qpu/$(am__dirstamp):
+ @$(MKDIR_P) qpu
+ @: > qpu/$(am__dirstamp)
+qpu/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) qpu/$(DEPDIR)
+ @: > qpu/$(DEPDIR)/$(am__dirstamp)
+qpu/qpu_disasm.lo: qpu/$(am__dirstamp) qpu/$(DEPDIR)/$(am__dirstamp)
+qpu/qpu_instr.lo: qpu/$(am__dirstamp) qpu/$(DEPDIR)/$(am__dirstamp)
+qpu/qpu_pack.lo: qpu/$(am__dirstamp) qpu/$(DEPDIR)/$(am__dirstamp)
+
+libbroadcom.la: $(libbroadcom_la_OBJECTS) $(libbroadcom_la_DEPENDENCIES) $(EXTRA_libbroadcom_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(libbroadcom_la_LINK) $(libbroadcom_la_OBJECTS) $(libbroadcom_la_LIBADD) $(LIBS)
clean-checkPROGRAMS:
@list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
@@ -816,18 +1028,150 @@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
+qpu/tests/$(am__dirstamp):
+ @$(MKDIR_P) qpu/tests
+ @: > qpu/tests/$(am__dirstamp)
+qpu/tests/$(DEPDIR)/$(am__dirstamp):
+ @$(MKDIR_P) qpu/tests/$(DEPDIR)
+ @: > qpu/tests/$(DEPDIR)/$(am__dirstamp)
+qpu/tests/qpu_disasm.$(OBJEXT): qpu/tests/$(am__dirstamp) \
+ qpu/tests/$(DEPDIR)/$(am__dirstamp)
+
+qpu/tests/qpu_disasm$(EXEEXT): $(qpu_tests_qpu_disasm_OBJECTS) $(qpu_tests_qpu_disasm_DEPENDENCIES) $(EXTRA_qpu_tests_qpu_disasm_DEPENDENCIES) qpu/tests/$(am__dirstamp)
+ @rm -f qpu/tests/qpu_disasm$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(qpu_tests_qpu_disasm_OBJECTS) $(qpu_tests_qpu_disasm_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+ -rm -f cle/*.$(OBJEXT)
+ -rm -f cle/*.lo
+ -rm -f clif/*.$(OBJEXT)
+ -rm -f clif/*.lo
+ -rm -f common/*.$(OBJEXT)
+ -rm -f common/*.lo
+ -rm -f compiler/*.$(OBJEXT)
+ -rm -f compiler/*.lo
+ -rm -f qpu/*.$(OBJEXT)
+ -rm -f qpu/*.lo
+ -rm -f qpu/tests/*.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@cle/$(DEPDIR)/cle_libbroadcom_cle_la-v3d_decoder.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@clif/$(DEPDIR)/clif_dump.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/v3d_debug.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/nir_to_vir.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/qpu_schedule.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/qpu_validate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/v3d_nir_lower_io.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/vir.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/vir_dump.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/vir_live_variables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/vir_lower_uniforms.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/vir_opt_copy_propagate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/vir_opt_dead_code.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/vir_register_allocate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@compiler/$(DEPDIR)/vir_to_qpu.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@qpu/$(DEPDIR)/qpu_disasm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@qpu/$(DEPDIR)/qpu_instr.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@qpu/$(DEPDIR)/qpu_pack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@qpu/tests/$(DEPDIR)/qpu_disasm.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+cle/cle_libbroadcom_cle_la-v3d_decoder.lo: cle/v3d_decoder.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cle_libbroadcom_cle_la_CFLAGS) $(CFLAGS) -MT cle/cle_libbroadcom_cle_la-v3d_decoder.lo -MD -MP -MF cle/$(DEPDIR)/cle_libbroadcom_cle_la-v3d_decoder.Tpo -c -o cle/cle_libbroadcom_cle_la-v3d_decoder.lo `test -f 'cle/v3d_decoder.c' || echo '$(srcdir)/'`cle/v3d_decoder.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) cle/$(DEPDIR)/cle_libbroadcom_cle_la-v3d_decoder.Tpo cle/$(DEPDIR)/cle_libbroadcom_cle_la-v3d_decoder.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cle/v3d_decoder.c' object='cle/cle_libbroadcom_cle_la-v3d_decoder.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(cle_libbroadcom_cle_la_CFLAGS) $(CFLAGS) -c -o cle/cle_libbroadcom_cle_la-v3d_decoder.lo `test -f 'cle/v3d_decoder.c' || echo '$(srcdir)/'`cle/v3d_decoder.c
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
-tags TAGS:
-
-ctags CTAGS:
+ -rm -rf cle/.libs cle/_libs
+ -rm -rf clif/.libs clif/_libs
+ -rm -rf common/.libs common/_libs
+ -rm -rf compiler/.libs compiler/_libs
+ -rm -rf qpu/.libs qpu/_libs
+ -rm -rf qpu/tests/.libs qpu/tests/_libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
-cscope cscopelist:
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
# Recover from deleted '.trs' file; this should ensure that
# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create
@@ -970,6 +1314,13 @@
am__force_recheck=am--force-recheck \
TEST_LOGS="$$log_list"; \
exit $$?
+qpu/tests/qpu_disasm.log: qpu/tests/qpu_disasm$(EXEEXT)
+ @p='qpu/tests/qpu_disasm$(EXEEXT)'; \
+ b='qpu/tests/qpu_disasm'; \
+ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+ --log-file $$b.log --trs-file $$b.trs \
+ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
.test.log:
@p='$<'; \
$(am__set_b); \
@@ -1056,6 +1407,18 @@
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+ -rm -f cle/$(DEPDIR)/$(am__dirstamp)
+ -rm -f cle/$(am__dirstamp)
+ -rm -f clif/$(DEPDIR)/$(am__dirstamp)
+ -rm -f clif/$(am__dirstamp)
+ -rm -f common/$(DEPDIR)/$(am__dirstamp)
+ -rm -f common/$(am__dirstamp)
+ -rm -f compiler/$(DEPDIR)/$(am__dirstamp)
+ -rm -f compiler/$(am__dirstamp)
+ -rm -f qpu/$(DEPDIR)/$(am__dirstamp)
+ -rm -f qpu/$(am__dirstamp)
+ -rm -f qpu/tests/$(DEPDIR)/$(am__dirstamp)
+ -rm -f qpu/tests/$(am__dirstamp)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@@ -1068,8 +1431,10 @@
clean-noinstPROGRAMS mostlyclean-am
distclean: distclean-am
+ -rm -rf cle/$(DEPDIR) clif/$(DEPDIR) common/$(DEPDIR) compiler/$(DEPDIR) qpu/$(DEPDIR) qpu/tests/$(DEPDIR)
-rm -f Makefile
-distclean-am: clean-am distclean-generic
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
dvi: dvi-am
@@ -1112,12 +1477,14 @@
installcheck-am:
maintainer-clean: maintainer-clean-am
+ -rm -rf cle/$(DEPDIR) clif/$(DEPDIR) common/$(DEPDIR) compiler/$(DEPDIR) qpu/$(DEPDIR) qpu/tests/$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
pdf: pdf-am
@@ -1131,20 +1498,22 @@
.MAKE: all check check-am install install-am install-strip
-.PHONY: all all-am check check-TESTS check-am clean \
+.PHONY: CTAGS GTAGS TAGS all all-am check check-TESTS check-am clean \
clean-checkLTLIBRARIES clean-checkPROGRAMS clean-generic \
clean-libLTLIBRARIES clean-libtool clean-noinstLTLIBRARIES \
- clean-noinstPROGRAMS cscopelist-am ctags-am distclean \
- distclean-generic distclean-libtool distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-libLTLIBRARIES install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am recheck tags-am \
- uninstall uninstall-am uninstall-libLTLIBRARIES
+ clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-libLTLIBRARIES install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ recheck tags tags-am uninstall uninstall-am \
+ uninstall-libLTLIBRARIES
.PRECIOUS: Makefile
@@ -1154,6 +1523,9 @@
.xml_pack.h:
$(MKDIR_GEN)
$(PYTHON_GEN) $(srcdir)/cle/gen_pack_header.py $< > $@ || ($(RM) $@; false)
+cle/v3d_xml.h: $(GEN_ZIPPED) $(BROADCOM_GENXML_XML_FILES)
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(GEN_ZIPPED) $(BROADCOM_GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM) $@; false)
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff -Nru mesa-17.2.4/src/broadcom/Makefile.sources mesa-17.3.3/src/broadcom/Makefile.sources
--- mesa-17.2.4/src/broadcom/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/broadcom/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -1,13 +1,43 @@
BROADCOM_GENXML_GENERATED_FILES = \
cle/v3d_packet_v21_pack.h \
+ cle/v3d_packet_v33_pack.h \
+ cle/v3d_xml.h \
$()
BROADCOM_GENXML_XML_FILES = \
cle/v3d_packet_v21.xml \
+ cle/v3d_packet_v33.xml \
$()
BROADCOM_FILES = \
cle/v3d_packet_helpers.h \
+ common/v3d_debug.c \
+ common/v3d_debug.h \
+ clif/clif_dump.c \
+ clif/clif_dump.h \
common/v3d_device_info.h \
+ compiler/nir_to_vir.c \
+ compiler/vir.c \
+ compiler/vir_dump.c \
+ compiler/vir_live_variables.c \
+ compiler/vir_lower_uniforms.c \
+ compiler/vir_opt_copy_propagate.c \
+ compiler/vir_opt_dead_code.c \
+ compiler/vir_register_allocate.c \
+ compiler/vir_to_qpu.c \
+ compiler/qpu_schedule.c \
+ compiler/qpu_validate.c \
+ compiler/v3d_compiler.h \
+ compiler/v3d_nir_lower_io.c \
+ qpu/qpu_disasm.c \
+ qpu/qpu_disasm.h \
+ qpu/qpu_instr.c \
+ qpu/qpu_instr.h \
+ qpu/qpu_pack.c \
+ $()
+
+BROADCOM_DECODER_FILES = \
+ cle/v3d_decoder.c \
+ cle/v3d_decoder.h \
$()
diff -Nru mesa-17.2.4/src/broadcom/Makefile.vc5.am mesa-17.3.3/src/broadcom/Makefile.vc5.am
--- mesa-17.2.4/src/broadcom/Makefile.vc5.am 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/Makefile.vc5.am 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,20 @@
+noinst_LTLIBRARIES += libbroadcom.la
+
+if USE_VC5_SIMULATOR
+AM_CFLAGS += $(VC5_SIMULATOR_CFLAGS)
+libbroadcom_la_LDFLAGS = $(VC5_SIMULATOR_LIBS)
+endif
+
+libbroadcom_la_SOURCES = $(BROADCOM_FILES)
+
+check_PROGRAMS += \
+ qpu/tests/qpu_disasm \
+ $(NULL)
+
+LDADD = \
+ libbroadcom.la \
+ $(top_builddir)/src/compiler/nir/libnir.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(NULL)
+
+TESTS += $(check_PROGRAMS)
diff -Nru mesa-17.2.4/src/broadcom/meson.build mesa-17.3.3/src/broadcom/meson.build
--- mesa-17.2.4/src/broadcom/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,44 @@
+# Copyright © 2017 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_broadcom = include_directories('.', 'cle')
+
+subdir('cle')
+
+if with_gallium_vc5
+ subdir('compiler')
+ subdir('qpu')
+
+ libbroadcom_vc5 = static_library(
+ 'libbroadcom_vc5',
+ [
+ files(
+ 'common/v3d_debug.c',
+ 'clif/clif_dump.c',
+ ),
+ v3d_xml_pack,
+ ],
+ include_directories : [inc_common, inc_broadcom, inc_src],
+ c_args : [c_vis_args, no_override_init_args],
+ link_whole : [libbroadcom_compiler, libbroadcom_qpu],
+ build_by_default : false,
+ dependencies: dep_valgrind,
+ )
+endif
diff -Nru mesa-17.2.4/src/broadcom/qpu/meson.build mesa-17.3.3/src/broadcom/qpu/meson.build
--- mesa-17.2.4/src/broadcom/qpu/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/qpu/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,39 @@
+# Copyright © 2017 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libbroadcom_qpu_files = files(
+ 'qpu_disasm.c',
+ 'qpu_instr.c',
+ 'qpu_pack.c',
+)
+
+libbroadcom_qpu = static_library(
+ ['broadcom_qpu', v3d_xml_pack],
+ libbroadcom_qpu_files,
+ include_directories : [inc_common, inc_broadcom],
+ c_args : [c_vis_args, no_override_init_args],
+ dependencies : [dep_libdrm, dep_valgrind],
+ build_by_default : false,
+)
+
+test('qpu_disasm',
+ executable('qpu_disasm', 'tests/qpu_disasm.c',
+ link_with: [libbroadcom_qpu, libmesa_util],
+ include_directories: inc_common))
diff -Nru mesa-17.2.4/src/broadcom/qpu/qpu_disasm.c mesa-17.3.3/src/broadcom/qpu/qpu_disasm.c
--- mesa-17.2.4/src/broadcom/qpu/qpu_disasm.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/qpu/qpu_disasm.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,298 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include
+#include "util/ralloc.h"
+
+#include "broadcom/common/v3d_device_info.h"
+#include "qpu_instr.h"
+#include "qpu_disasm.h"
+
+struct disasm_state {
+ const struct v3d_device_info *devinfo;
+ char *string;
+ size_t offset;
+};
+
+static void
+append(struct disasm_state *disasm, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ ralloc_vasprintf_rewrite_tail(&disasm->string,
+ &disasm->offset,
+ fmt, args);
+ va_end(args);
+}
+
+static void
+pad_to(struct disasm_state *disasm, int n)
+{
+ /* FIXME: Do a single append somehow. */
+ while (disasm->offset < n)
+ append(disasm, " ");
+}
+
+
+static void
+v3d_qpu_disasm_raddr(struct disasm_state *disasm,
+ const struct v3d_qpu_instr *instr, uint8_t mux)
+{
+ if (mux == V3D_QPU_MUX_A) {
+ append(disasm, "rf%d", instr->raddr_a);
+ } else if (mux == V3D_QPU_MUX_B) {
+ append(disasm, "rf%d", instr->raddr_b);
+ } else {
+ append(disasm, "r%d", mux);
+ }
+}
+
+static void
+v3d_qpu_disasm_waddr(struct disasm_state *disasm, uint32_t waddr, bool magic)
+{
+ if (!magic) {
+ append(disasm, "rf%d", waddr);
+ return;
+ }
+
+ const char *name = v3d_qpu_magic_waddr_name(waddr);
+ if (name)
+ append(disasm, "%s", name);
+ else
+ append(disasm, "waddr UNKNOWN %d", waddr);
+}
+
+static void
+v3d_qpu_disasm_add(struct disasm_state *disasm,
+ const struct v3d_qpu_instr *instr)
+{
+ bool has_dst = v3d_qpu_add_op_has_dst(instr->alu.add.op);
+ int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op);
+
+ append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op));
+ append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
+ append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf));
+ append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf));
+
+ append(disasm, " ");
+
+ if (has_dst) {
+ v3d_qpu_disasm_waddr(disasm, instr->alu.add.waddr,
+ instr->alu.add.magic_write);
+ append(disasm, v3d_qpu_pack_name(instr->alu.add.output_pack));
+ }
+
+ if (num_src >= 1) {
+ if (has_dst)
+ append(disasm, ", ");
+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a);
+ append(disasm, "%s",
+ v3d_qpu_unpack_name(instr->alu.add.a_unpack));
+ }
+
+ if (num_src >= 2) {
+ append(disasm, ", ");
+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b);
+ append(disasm, "%s",
+ v3d_qpu_unpack_name(instr->alu.add.b_unpack));
+ }
+}
+
+static void
+v3d_qpu_disasm_mul(struct disasm_state *disasm,
+ const struct v3d_qpu_instr *instr)
+{
+ bool has_dst = v3d_qpu_mul_op_has_dst(instr->alu.mul.op);
+ int num_src = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
+
+ pad_to(disasm, 21);
+ append(disasm, "; ");
+
+ append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
+ append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
+ append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf));
+ append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf));
+
+ if (instr->alu.mul.op == V3D_QPU_M_NOP)
+ return;
+
+ append(disasm, " ");
+
+ if (has_dst) {
+ v3d_qpu_disasm_waddr(disasm, instr->alu.mul.waddr,
+ instr->alu.mul.magic_write);
+ append(disasm, v3d_qpu_pack_name(instr->alu.mul.output_pack));
+ }
+
+ if (num_src >= 1) {
+ if (has_dst)
+ append(disasm, ", ");
+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a);
+ append(disasm, "%s",
+ v3d_qpu_unpack_name(instr->alu.mul.a_unpack));
+ }
+
+ if (num_src >= 2) {
+ append(disasm, ", ");
+ v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b);
+ append(disasm, "%s",
+ v3d_qpu_unpack_name(instr->alu.mul.b_unpack));
+ }
+}
+
+static void
+v3d_qpu_disasm_sig(struct disasm_state *disasm,
+ const struct v3d_qpu_instr *instr)
+{
+ const struct v3d_qpu_sig *sig = &instr->sig;
+
+ if (!sig->thrsw &&
+ !sig->ldvary &&
+ !sig->ldvpm &&
+ !sig->ldtmu &&
+ !sig->ldunif &&
+ !sig->wrtmuc) {
+ return;
+ }
+
+ pad_to(disasm, 41);
+
+ if (sig->thrsw)
+ append(disasm, "; thrsw");
+ if (sig->ldvary)
+ append(disasm, "; ldvary");
+ if (sig->ldvpm)
+ append(disasm, "; ldvpm");
+ if (sig->ldtmu)
+ append(disasm, "; ldtmu");
+ if (sig->ldunif)
+ append(disasm, "; ldunif");
+ if (sig->wrtmuc)
+ append(disasm, "; wrtmuc");
+}
+
+static void
+v3d_qpu_disasm_alu(struct disasm_state *disasm,
+ const struct v3d_qpu_instr *instr)
+{
+ v3d_qpu_disasm_add(disasm, instr);
+ v3d_qpu_disasm_mul(disasm, instr);
+ v3d_qpu_disasm_sig(disasm, instr);
+}
+
+static void
+v3d_qpu_disasm_branch(struct disasm_state *disasm,
+ const struct v3d_qpu_instr *instr)
+{
+ append(disasm, "b");
+ if (instr->branch.ub)
+ append(disasm, "u");
+ append(disasm, "%s", v3d_qpu_branch_cond_name(instr->branch.cond));
+ append(disasm, "%s", v3d_qpu_msfign_name(instr->branch.msfign));
+
+ switch (instr->branch.bdi) {
+ case V3D_QPU_BRANCH_DEST_ABS:
+ append(disasm, " zero_addr+0x%08x", instr->branch.offset);
+ break;
+
+ case V3D_QPU_BRANCH_DEST_REL:
+ append(disasm, " %d", instr->branch.offset);
+ break;
+
+ case V3D_QPU_BRANCH_DEST_LINK_REG:
+ append(disasm, " lri");
+ break;
+
+ case V3D_QPU_BRANCH_DEST_REGFILE:
+ append(disasm, " rf%d", instr->branch.raddr_a);
+ break;
+ }
+
+ if (instr->branch.ub) {
+ switch (instr->branch.bdu) {
+ case V3D_QPU_BRANCH_DEST_ABS:
+ append(disasm, ", a:unif");
+ break;
+
+ case V3D_QPU_BRANCH_DEST_REL:
+ append(disasm, ", r:unif");
+ break;
+
+ case V3D_QPU_BRANCH_DEST_LINK_REG:
+ append(disasm, ", lri");
+ break;
+
+ case V3D_QPU_BRANCH_DEST_REGFILE:
+ append(disasm, ", rf%d", instr->branch.raddr_a);
+ break;
+ }
+ }
+}
+
+const char *
+v3d_qpu_decode(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr)
+{
+ struct disasm_state disasm = {
+ .string = rzalloc_size(NULL, 1),
+ .offset = 0,
+ .devinfo = devinfo,
+ };
+
+ switch (instr->type) {
+ case V3D_QPU_INSTR_TYPE_ALU:
+ v3d_qpu_disasm_alu(&disasm, instr);
+ break;
+
+ case V3D_QPU_INSTR_TYPE_BRANCH:
+ v3d_qpu_disasm_branch(&disasm, instr);
+ break;
+ }
+
+ return disasm.string;
+}
+
+/**
+ * Returns a string containing the disassembled representation of the QPU
+ * instruction. It is the caller's responsibility to free the return value
+ * with ralloc_free().
+ */
+const char *
+v3d_qpu_disasm(const struct v3d_device_info *devinfo, uint64_t inst)
+{
+ struct v3d_qpu_instr instr;
+ bool ok = v3d_qpu_instr_unpack(devinfo, inst, &instr);
+ assert(ok); (void)ok;
+
+ return v3d_qpu_decode(devinfo, &instr);
+}
+
+void
+v3d_qpu_dump(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr)
+{
+ const char *decoded = v3d_qpu_decode(devinfo, instr);
+ fprintf(stderr, "%s", decoded);
+ ralloc_free((char *)decoded);
+}
diff -Nru mesa-17.2.4/src/broadcom/qpu/qpu_disasm.h mesa-17.3.3/src/broadcom/qpu/qpu_disasm.h
--- mesa-17.2.4/src/broadcom/qpu/qpu_disasm.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/qpu/qpu_disasm.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC5_QPU_DISASM_H
+#define VC5_QPU_DISASM_H
+
+#include "broadcom/common/v3d_device_info.h"
+
+struct v3d_qpu_instr;
+
+const char *v3d_qpu_decode(const struct v3d_device_info *devinfo, const
+ struct v3d_qpu_instr *instr);
+
+const char *v3d_qpu_disasm(const struct v3d_device_info *devinfo, uint64_t inst);
+
+void v3d_qpu_dump(const struct v3d_device_info *devinfo, const
+ struct v3d_qpu_instr *instr);
+
+#endif /* VC5_QPU_DISASM_H */
diff -Nru mesa-17.2.4/src/broadcom/qpu/qpu_instr.c mesa-17.3.3/src/broadcom/qpu/qpu_instr.c
--- mesa-17.2.4/src/broadcom/qpu/qpu_instr.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/qpu/qpu_instr.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,645 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include "util/macros.h"
+#include "qpu_instr.h"
+
+#ifndef QPU_MASK
+#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
+/* Using the GNU statement expression extension */
+#define QPU_SET_FIELD(value, field) \
+ ({ \
+ uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
+ assert((fieldval & ~ field ## _MASK) == 0); \
+ fieldval & field ## _MASK; \
+ })
+
+#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
+
+#define QPU_UPDATE_FIELD(inst, value, field) \
+ (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
+#endif /* QPU_MASK */
+
+#define VC5_QPU_OP_MUL_SHIFT 58
+#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
+
+#define VC5_QPU_SIG_SHIFT 53
+#define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
+# define VC5_QPU_SIG_THRSW_BIT 0x1
+# define VC5_QPU_SIG_LDUNIF_BIT 0x2
+# define VC5_QPU_SIG_LDTMU_BIT 0x4
+# define VC5_QPU_SIG_LDVARY_BIT 0x8
+
+#define VC5_QPU_COND_SHIFT 46
+#define VC5_QPU_COND_MASK QPU_MASK(52, 46)
+
+#define VC5_QPU_COND_IFA 0
+#define VC5_QPU_COND_IFB 1
+#define VC5_QPU_COND_IFNA 2
+#define VC5_QPU_COND_IFNB 3
+
+#define VC5_QPU_MM QPU_MASK(45, 45)
+#define VC5_QPU_MA QPU_MASK(44, 44)
+
+#define V3D_QPU_WADDR_M_SHIFT 38
+#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
+
+#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
+#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
+
+#define V3D_QPU_WADDR_A_SHIFT 32
+#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
+
+#define VC5_QPU_BRANCH_COND_SHIFT 32
+#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
+
+#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
+#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
+
+#define VC5_QPU_OP_ADD_SHIFT 24
+#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
+
+#define VC5_QPU_MUL_B_SHIFT 21
+#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
+
+#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
+#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
+
+#define VC5_QPU_MUL_A_SHIFT 18
+#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
+
+#define VC5_QPU_ADD_B_SHIFT 15
+#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
+
+#define VC5_QPU_BRANCH_BDU_SHIFT 15
+#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
+
+#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
+
+#define VC5_QPU_ADD_A_SHIFT 12
+#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
+
+#define VC5_QPU_BRANCH_BDI_SHIFT 12
+#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
+
+#define VC5_QPU_RADDR_A_SHIFT 6
+#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
+
+#define VC5_QPU_RADDR_B_SHIFT 0
+#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
+
+const char *
+v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr)
+{
+ static const char *waddr_magic[] = {
+ [V3D_QPU_WADDR_R0] = "r0",
+ [V3D_QPU_WADDR_R1] = "r1",
+ [V3D_QPU_WADDR_R2] = "r2",
+ [V3D_QPU_WADDR_R3] = "r3",
+ [V3D_QPU_WADDR_R4] = "r4",
+ [V3D_QPU_WADDR_R5] = "r5",
+ [V3D_QPU_WADDR_NOP] = "-",
+ [V3D_QPU_WADDR_TLB] = "tlb",
+ [V3D_QPU_WADDR_TLBU] = "tlbu",
+ [V3D_QPU_WADDR_TMU] = "tmu",
+ [V3D_QPU_WADDR_TMUL] = "tmul",
+ [V3D_QPU_WADDR_TMUD] = "tmud",
+ [V3D_QPU_WADDR_TMUA] = "tmua",
+ [V3D_QPU_WADDR_TMUAU] = "tmuau",
+ [V3D_QPU_WADDR_VPM] = "vpm",
+ [V3D_QPU_WADDR_VPMU] = "vpmu",
+ [V3D_QPU_WADDR_SYNC] = "sync",
+ [V3D_QPU_WADDR_SYNCU] = "syncu",
+ [V3D_QPU_WADDR_RECIP] = "recip",
+ [V3D_QPU_WADDR_RSQRT] = "rsqrt",
+ [V3D_QPU_WADDR_EXP] = "exp",
+ [V3D_QPU_WADDR_LOG] = "log",
+ [V3D_QPU_WADDR_SIN] = "sin",
+ [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
+ };
+
+ return waddr_magic[waddr];
+}
+
+const char *
+v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
+{
+ static const char *op_names[] = {
+ [V3D_QPU_A_FADD] = "fadd",
+ [V3D_QPU_A_FADDNF] = "faddnf",
+ [V3D_QPU_A_VFPACK] = "vfpack",
+ [V3D_QPU_A_ADD] = "add",
+ [V3D_QPU_A_SUB] = "sub",
+ [V3D_QPU_A_FSUB] = "fsub",
+ [V3D_QPU_A_MIN] = "min",
+ [V3D_QPU_A_MAX] = "max",
+ [V3D_QPU_A_UMIN] = "umin",
+ [V3D_QPU_A_UMAX] = "umax",
+ [V3D_QPU_A_SHL] = "shl",
+ [V3D_QPU_A_SHR] = "shr",
+ [V3D_QPU_A_ASR] = "asr",
+ [V3D_QPU_A_ROR] = "ror",
+ [V3D_QPU_A_FMIN] = "fmin",
+ [V3D_QPU_A_FMAX] = "fmax",
+ [V3D_QPU_A_VFMIN] = "vfmin",
+ [V3D_QPU_A_AND] = "and",
+ [V3D_QPU_A_OR] = "or",
+ [V3D_QPU_A_XOR] = "xor",
+ [V3D_QPU_A_VADD] = "vadd",
+ [V3D_QPU_A_VSUB] = "vsub",
+ [V3D_QPU_A_NOT] = "not",
+ [V3D_QPU_A_NEG] = "neg",
+ [V3D_QPU_A_FLAPUSH] = "flapush",
+ [V3D_QPU_A_FLBPUSH] = "flbpush",
+ [V3D_QPU_A_FLBPOP] = "flbpop",
+ [V3D_QPU_A_SETMSF] = "setmsf",
+ [V3D_QPU_A_SETREVF] = "setrevf",
+ [V3D_QPU_A_NOP] = "nop",
+ [V3D_QPU_A_TIDX] = "tidx",
+ [V3D_QPU_A_EIDX] = "eidx",
+ [V3D_QPU_A_LR] = "lr",
+ [V3D_QPU_A_VFLA] = "vfla",
+ [V3D_QPU_A_VFLNA] = "vflna",
+ [V3D_QPU_A_VFLB] = "vflb",
+ [V3D_QPU_A_VFLNB] = "vflnb",
+ [V3D_QPU_A_FXCD] = "fxcd",
+ [V3D_QPU_A_XCD] = "xcd",
+ [V3D_QPU_A_FYCD] = "fycd",
+ [V3D_QPU_A_YCD] = "ycd",
+ [V3D_QPU_A_MSF] = "msf",
+ [V3D_QPU_A_REVF] = "revf",
+ [V3D_QPU_A_VDWWT] = "vdwwt",
+ [V3D_QPU_A_IID] = "iid",
+ [V3D_QPU_A_SAMPID] = "sampid",
+ [V3D_QPU_A_PATCHID] = "patchid",
+ [V3D_QPU_A_TMUWT] = "tmuwt",
+ [V3D_QPU_A_VPMSETUP] = "vpmsetup",
+ [V3D_QPU_A_VPMWT] = "vpmwt",
+ [V3D_QPU_A_LDVPMV] = "ldvpmv",
+ [V3D_QPU_A_LDVPMD] = "ldvpmd",
+ [V3D_QPU_A_LDVPMP] = "ldvpmp",
+ [V3D_QPU_A_LDVPMG] = "ldvpmg",
+ [V3D_QPU_A_FCMP] = "fcmp",
+ [V3D_QPU_A_VFMAX] = "vfmax",
+ [V3D_QPU_A_FROUND] = "fround",
+ [V3D_QPU_A_FTOIN] = "ftoin",
+ [V3D_QPU_A_FTRUNC] = "ftrunc",
+ [V3D_QPU_A_FTOIZ] = "ftoiz",
+ [V3D_QPU_A_FFLOOR] = "ffloor",
+ [V3D_QPU_A_FTOUZ] = "ftouz",
+ [V3D_QPU_A_FCEIL] = "fceil",
+ [V3D_QPU_A_FTOC] = "ftoc",
+ [V3D_QPU_A_FDX] = "fdx",
+ [V3D_QPU_A_FDY] = "fdy",
+ [V3D_QPU_A_STVPMV] = "stvpmv",
+ [V3D_QPU_A_STVPMD] = "stvpmd",
+ [V3D_QPU_A_STVPMP] = "stvpmp",
+ [V3D_QPU_A_ITOF] = "itof",
+ [V3D_QPU_A_CLZ] = "clz",
+ [V3D_QPU_A_UTOF] = "utof",
+ };
+
+ if (op >= ARRAY_SIZE(op_names))
+ return NULL;
+
+ return op_names[op];
+}
+
+const char *
+v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
+{
+ static const char *op_names[] = {
+ [V3D_QPU_M_ADD] = "add",
+ [V3D_QPU_M_SUB] = "sub",
+ [V3D_QPU_M_UMUL24] = "umul24",
+ [V3D_QPU_M_VFMUL] = "vfmul",
+ [V3D_QPU_M_SMUL24] = "smul24",
+ [V3D_QPU_M_MULTOP] = "multop",
+ [V3D_QPU_M_FMOV] = "fmov",
+ [V3D_QPU_M_MOV] = "mov",
+ [V3D_QPU_M_NOP] = "nop",
+ [V3D_QPU_M_FMUL] = "fmul",
+ };
+
+ if (op >= ARRAY_SIZE(op_names))
+ return NULL;
+
+ return op_names[op];
+}
+
+const char *
+v3d_qpu_cond_name(enum v3d_qpu_cond cond)
+{
+ switch (cond) {
+ case V3D_QPU_COND_NONE:
+ return "";
+ case V3D_QPU_COND_IFA:
+ return ".ifa";
+ case V3D_QPU_COND_IFB:
+ return ".ifb";
+ case V3D_QPU_COND_IFNA:
+ return ".ifna";
+ case V3D_QPU_COND_IFNB:
+ return ".ifnb";
+ default:
+ unreachable("bad cond value");
+ }
+}
+
+const char *
+v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
+{
+ switch (cond) {
+ case V3D_QPU_BRANCH_COND_ALWAYS:
+ return "";
+ case V3D_QPU_BRANCH_COND_A0:
+ return ".a0";
+ case V3D_QPU_BRANCH_COND_NA0:
+ return ".na0";
+ case V3D_QPU_BRANCH_COND_ALLA:
+ return ".alla";
+ case V3D_QPU_BRANCH_COND_ANYNA:
+ return ".anyna";
+ case V3D_QPU_BRANCH_COND_ANYA:
+ return ".anya";
+ case V3D_QPU_BRANCH_COND_ALLNA:
+ return ".allna";
+ default:
+ unreachable("bad branch cond value");
+ }
+}
+
+const char *
+v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
+{
+ switch (msfign) {
+ case V3D_QPU_MSFIGN_NONE:
+ return "";
+ case V3D_QPU_MSFIGN_P:
+ return "p";
+ case V3D_QPU_MSFIGN_Q:
+ return "q";
+ default:
+ unreachable("bad branch cond value");
+ }
+}
+
+const char *
+v3d_qpu_pf_name(enum v3d_qpu_pf pf)
+{
+ switch (pf) {
+ case V3D_QPU_PF_NONE:
+ return "";
+ case V3D_QPU_PF_PUSHZ:
+ return ".pushz";
+ case V3D_QPU_PF_PUSHN:
+ return ".pushn";
+ case V3D_QPU_PF_PUSHC:
+ return ".pushc";
+ default:
+ unreachable("bad pf value");
+ }
+}
+
+const char *
+v3d_qpu_uf_name(enum v3d_qpu_uf uf)
+{
+ switch (uf) {
+ case V3D_QPU_UF_NONE:
+ return "";
+ case V3D_QPU_UF_ANDZ:
+ return ".andz";
+ case V3D_QPU_UF_ANDNZ:
+ return ".andnz";
+ case V3D_QPU_UF_NORZ:
+ return ".norz";
+ case V3D_QPU_UF_NORNZ:
+ return ".nornz";
+ case V3D_QPU_UF_ANDN:
+ return ".andn";
+ case V3D_QPU_UF_ANDNN:
+ return ".andnn";
+ case V3D_QPU_UF_NORN:
+ return ".norn";
+ case V3D_QPU_UF_NORNN:
+ return ".nornn";
+ case V3D_QPU_UF_ANDC:
+ return ".andc";
+ case V3D_QPU_UF_ANDNC:
+ return ".andnc";
+ case V3D_QPU_UF_NORC:
+ return ".norc";
+ case V3D_QPU_UF_NORNC:
+ return ".nornc";
+ default:
+ unreachable("bad pf value");
+ }
+}
+
+const char *
+v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
+{
+ switch (pack) {
+ case V3D_QPU_PACK_NONE:
+ return "";
+ case V3D_QPU_PACK_L:
+ return ".l";
+ case V3D_QPU_PACK_H:
+ return ".h";
+ default:
+ unreachable("bad pack value");
+ }
+}
+
+const char *
+v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
+{
+ switch (unpack) {
+ case V3D_QPU_UNPACK_NONE:
+ return "";
+ case V3D_QPU_UNPACK_L:
+ return ".l";
+ case V3D_QPU_UNPACK_H:
+ return ".h";
+ case V3D_QPU_UNPACK_ABS:
+ return ".abs";
+ case V3D_QPU_UNPACK_REPLICATE_32F_16:
+ return ".ff";
+ case V3D_QPU_UNPACK_REPLICATE_L_16:
+ return ".ll";
+ case V3D_QPU_UNPACK_REPLICATE_H_16:
+ return ".hh";
+ case V3D_QPU_UNPACK_SWAP_16:
+ return ".swp";
+ default:
+ unreachable("bad unpack value");
+ }
+}
+
+#define D 1
+#define A 2
+#define B 4
+static const uint8_t add_op_args[] = {
+ [V3D_QPU_A_FADD] = D | A | B,
+ [V3D_QPU_A_FADDNF] = D | A | B,
+ [V3D_QPU_A_VFPACK] = D | A | B,
+ [V3D_QPU_A_ADD] = D | A | B,
+ [V3D_QPU_A_VFPACK] = D | A | B,
+ [V3D_QPU_A_SUB] = D | A | B,
+ [V3D_QPU_A_VFPACK] = D | A | B,
+ [V3D_QPU_A_FSUB] = D | A | B,
+ [V3D_QPU_A_MIN] = D | A | B,
+ [V3D_QPU_A_MAX] = D | A | B,
+ [V3D_QPU_A_UMIN] = D | A | B,
+ [V3D_QPU_A_UMAX] = D | A | B,
+ [V3D_QPU_A_SHL] = D | A | B,
+ [V3D_QPU_A_SHR] = D | A | B,
+ [V3D_QPU_A_ASR] = D | A | B,
+ [V3D_QPU_A_ROR] = D | A | B,
+ [V3D_QPU_A_FMIN] = D | A | B,
+ [V3D_QPU_A_FMAX] = D | A | B,
+ [V3D_QPU_A_VFMIN] = D | A | B,
+
+ [V3D_QPU_A_AND] = D | A | B,
+ [V3D_QPU_A_OR] = D | A | B,
+ [V3D_QPU_A_XOR] = D | A | B,
+
+ [V3D_QPU_A_VADD] = D | A | B,
+ [V3D_QPU_A_VSUB] = D | A | B,
+ [V3D_QPU_A_NOT] = D | A,
+ [V3D_QPU_A_NEG] = D | A,
+ [V3D_QPU_A_FLAPUSH] = D | A,
+ [V3D_QPU_A_FLBPUSH] = D | A,
+ [V3D_QPU_A_FLBPOP] = D | A,
+ [V3D_QPU_A_SETMSF] = D | A,
+ [V3D_QPU_A_SETREVF] = D | A,
+ [V3D_QPU_A_NOP] = 0,
+ [V3D_QPU_A_TIDX] = D,
+ [V3D_QPU_A_EIDX] = D,
+ [V3D_QPU_A_LR] = D,
+ [V3D_QPU_A_VFLA] = D,
+ [V3D_QPU_A_VFLNA] = D,
+ [V3D_QPU_A_VFLB] = D,
+ [V3D_QPU_A_VFLNB] = D,
+
+ [V3D_QPU_A_FXCD] = D,
+ [V3D_QPU_A_XCD] = D,
+ [V3D_QPU_A_FYCD] = D,
+ [V3D_QPU_A_YCD] = D,
+
+ [V3D_QPU_A_MSF] = D,
+ [V3D_QPU_A_REVF] = D,
+ [V3D_QPU_A_VDWWT] = D,
+ [V3D_QPU_A_IID] = D,
+ [V3D_QPU_A_SAMPID] = D,
+ [V3D_QPU_A_PATCHID] = D,
+ [V3D_QPU_A_TMUWT] = D,
+ [V3D_QPU_A_VPMWT] = D,
+
+ [V3D_QPU_A_VPMSETUP] = D | A,
+
+ [V3D_QPU_A_LDVPMV] = D | A,
+ [V3D_QPU_A_LDVPMD] = D | A,
+ [V3D_QPU_A_LDVPMP] = D | A,
+ [V3D_QPU_A_LDVPMG] = D | A | B,
+
+ /* FIXME: MOVABSNEG */
+
+ [V3D_QPU_A_FCMP] = D | A | B,
+ [V3D_QPU_A_VFMAX] = D | A | B,
+
+ [V3D_QPU_A_FROUND] = D | A,
+ [V3D_QPU_A_FTOIN] = D | A,
+ [V3D_QPU_A_FTRUNC] = D | A,
+ [V3D_QPU_A_FTOIZ] = D | A,
+ [V3D_QPU_A_FFLOOR] = D | A,
+ [V3D_QPU_A_FTOUZ] = D | A,
+ [V3D_QPU_A_FCEIL] = D | A,
+ [V3D_QPU_A_FTOC] = D | A,
+
+ [V3D_QPU_A_FDX] = D | A,
+ [V3D_QPU_A_FDY] = D | A,
+
+ [V3D_QPU_A_STVPMV] = A | B,
+ [V3D_QPU_A_STVPMD] = A | B,
+ [V3D_QPU_A_STVPMP] = A | B,
+
+ [V3D_QPU_A_ITOF] = D | A,
+ [V3D_QPU_A_CLZ] = D | A,
+ [V3D_QPU_A_UTOF] = D | A,
+};
+
+static const uint8_t mul_op_args[] = {
+ [V3D_QPU_M_ADD] = D | A | B,
+ [V3D_QPU_M_SUB] = D | A | B,
+ [V3D_QPU_M_UMUL24] = D | A | B,
+ [V3D_QPU_M_VFMUL] = D | A | B,
+ [V3D_QPU_M_SMUL24] = D | A | B,
+ [V3D_QPU_M_MULTOP] = D | A | B,
+ [V3D_QPU_M_FMOV] = D | A,
+ [V3D_QPU_M_NOP] = 0,
+ [V3D_QPU_M_MOV] = D | A,
+ [V3D_QPU_M_FMUL] = D | A | B,
+};
+
+bool
+v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
+{
+ assert(op < ARRAY_SIZE(add_op_args));
+
+ return add_op_args[op] & D;
+}
+
+bool
+v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
+{
+ assert(op < ARRAY_SIZE(mul_op_args));
+
+ return mul_op_args[op] & D;
+}
+
+int
+v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
+{
+ assert(op < ARRAY_SIZE(add_op_args));
+
+ uint8_t args = add_op_args[op];
+ if (args & B)
+ return 2;
+ else if (args & A)
+ return 1;
+ else
+ return 0;
+}
+
+int
+v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
+{
+ assert(op < ARRAY_SIZE(mul_op_args));
+
+ uint8_t args = mul_op_args[op];
+ if (args & B)
+ return 2;
+ else if (args & A)
+ return 1;
+ else
+ return 0;
+}
+
+bool
+v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
+{
+ switch (waddr) {
+ case V3D_QPU_WADDR_RECIP:
+ case V3D_QPU_WADDR_RSQRT:
+ case V3D_QPU_WADDR_EXP:
+ case V3D_QPU_WADDR_LOG:
+ case V3D_QPU_WADDR_SIN:
+ case V3D_QPU_WADDR_RSQRT2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)
+{
+ switch (waddr) {
+ case V3D_QPU_WADDR_TMU:
+ case V3D_QPU_WADDR_TMUL:
+ case V3D_QPU_WADDR_TMUD:
+ case V3D_QPU_WADDR_TMUA:
+ case V3D_QPU_WADDR_TMUAU:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
+{
+ return (waddr == V3D_QPU_WADDR_TLB ||
+ waddr == V3D_QPU_WADDR_TLBU);
+}
+
+bool
+v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
+{
+ return (waddr == V3D_QPU_WADDR_VPM ||
+ waddr == V3D_QPU_WADDR_VPMU);
+}
+
+bool
+v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
+{
+ return (waddr == V3D_QPU_WADDR_SYNC ||
+ waddr == V3D_QPU_WADDR_SYNCU);
+}
+
+bool
+v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
+{
+ return inst->sig.ldvary || inst->sig.ldvpm;
+}
+
+bool
+v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
+{
+ if (inst->sig.ldtmu)
+ return true;
+
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+ if (inst->alu.add.magic_write &&
+ v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
+ return true;
+ }
+
+ if (inst->alu.mul.magic_write &&
+ v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool
+v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
+{
+ return inst->sig.ldvary || inst->sig.ldunif;
+}
+
+bool
+v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
+{
+ int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
+ int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
+
+ return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
+ (add_nsrc > 1 && inst->alu.add.b == mux) ||
+ (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
+ (mul_nsrc > 1 && inst->alu.mul.b == mux));
+}
diff -Nru mesa-17.2.4/src/broadcom/qpu/qpu_instr.h mesa-17.3.3/src/broadcom/qpu/qpu_instr.h
--- mesa-17.2.4/src/broadcom/qpu/qpu_instr.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/qpu/qpu_instr.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,411 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file qpu_instr.h
+ *
+ * Definitions of the unpacked form of QPU instructions. Assembly and
+ * disassembly will use this for talking about instructions, with qpu_encode.c
+ * and qpu_decode.c handling the pack and unpack of the actual 64-bit QPU
+ * instruction.
+ */
+
+#ifndef QPU_INSTR_H
+#define QPU_INSTR_H
+
+#include
+#include
+#include "util/macros.h"
+
+struct v3d_device_info;
+
+struct v3d_qpu_sig {
+ bool thrsw:1;
+ bool ldunif:1;
+ bool ldtmu:1;
+ bool ldvary:1;
+ bool ldvpm:1;
+ bool ldtlb:1;
+ bool ldtlbu:1;
+ bool small_imm:1;
+ bool ucb:1;
+ bool rotate:1;
+ bool wrtmuc:1;
+};
+
+enum v3d_qpu_cond {
+ V3D_QPU_COND_NONE,
+ V3D_QPU_COND_IFA,
+ V3D_QPU_COND_IFB,
+ V3D_QPU_COND_IFNA,
+ V3D_QPU_COND_IFNB,
+};
+
+enum v3d_qpu_pf {
+ V3D_QPU_PF_NONE,
+ V3D_QPU_PF_PUSHZ,
+ V3D_QPU_PF_PUSHN,
+ V3D_QPU_PF_PUSHC,
+};
+
+enum v3d_qpu_uf {
+ V3D_QPU_UF_NONE,
+ V3D_QPU_UF_ANDZ,
+ V3D_QPU_UF_ANDNZ,
+ V3D_QPU_UF_NORNZ,
+ V3D_QPU_UF_NORZ,
+ V3D_QPU_UF_ANDN,
+ V3D_QPU_UF_ANDNN,
+ V3D_QPU_UF_NORNN,
+ V3D_QPU_UF_NORN,
+ V3D_QPU_UF_ANDC,
+ V3D_QPU_UF_ANDNC,
+ V3D_QPU_UF_NORNC,
+ V3D_QPU_UF_NORC,
+};
+
+enum v3d_qpu_waddr {
+ V3D_QPU_WADDR_R0 = 0,
+ V3D_QPU_WADDR_R1 = 1,
+ V3D_QPU_WADDR_R2 = 2,
+ V3D_QPU_WADDR_R3 = 3,
+ V3D_QPU_WADDR_R4 = 4,
+ V3D_QPU_WADDR_R5 = 5,
+ /* 6 is reserved, but note 3.2.2.8: "Result Writes" */
+ V3D_QPU_WADDR_NOP = 6,
+ V3D_QPU_WADDR_TLB = 7,
+ V3D_QPU_WADDR_TLBU = 8,
+ V3D_QPU_WADDR_TMU = 9,
+ V3D_QPU_WADDR_TMUL = 10,
+ V3D_QPU_WADDR_TMUD = 11,
+ V3D_QPU_WADDR_TMUA = 12,
+ V3D_QPU_WADDR_TMUAU = 13,
+ V3D_QPU_WADDR_VPM = 14,
+ V3D_QPU_WADDR_VPMU = 15,
+ V3D_QPU_WADDR_SYNC = 16,
+ V3D_QPU_WADDR_SYNCU = 17,
+ /* reserved */
+ V3D_QPU_WADDR_RECIP = 19,
+ V3D_QPU_WADDR_RSQRT = 20,
+ V3D_QPU_WADDR_EXP = 21,
+ V3D_QPU_WADDR_LOG = 22,
+ V3D_QPU_WADDR_SIN = 23,
+ V3D_QPU_WADDR_RSQRT2 = 24,
+};
+
+struct v3d_qpu_flags {
+ enum v3d_qpu_cond ac, mc;
+ enum v3d_qpu_pf apf, mpf;
+ enum v3d_qpu_uf auf, muf;
+};
+
+enum v3d_qpu_add_op {
+ V3D_QPU_A_FADD,
+ V3D_QPU_A_FADDNF,
+ V3D_QPU_A_VFPACK,
+ V3D_QPU_A_ADD,
+ V3D_QPU_A_SUB,
+ V3D_QPU_A_FSUB,
+ V3D_QPU_A_MIN,
+ V3D_QPU_A_MAX,
+ V3D_QPU_A_UMIN,
+ V3D_QPU_A_UMAX,
+ V3D_QPU_A_SHL,
+ V3D_QPU_A_SHR,
+ V3D_QPU_A_ASR,
+ V3D_QPU_A_ROR,
+ V3D_QPU_A_FMIN,
+ V3D_QPU_A_FMAX,
+ V3D_QPU_A_VFMIN,
+ V3D_QPU_A_AND,
+ V3D_QPU_A_OR,
+ V3D_QPU_A_XOR,
+ V3D_QPU_A_VADD,
+ V3D_QPU_A_VSUB,
+ V3D_QPU_A_NOT,
+ V3D_QPU_A_NEG,
+ V3D_QPU_A_FLAPUSH,
+ V3D_QPU_A_FLBPUSH,
+ V3D_QPU_A_FLBPOP,
+ V3D_QPU_A_SETMSF,
+ V3D_QPU_A_SETREVF,
+ V3D_QPU_A_NOP,
+ V3D_QPU_A_TIDX,
+ V3D_QPU_A_EIDX,
+ V3D_QPU_A_LR,
+ V3D_QPU_A_VFLA,
+ V3D_QPU_A_VFLNA,
+ V3D_QPU_A_VFLB,
+ V3D_QPU_A_VFLNB,
+ V3D_QPU_A_FXCD,
+ V3D_QPU_A_XCD,
+ V3D_QPU_A_FYCD,
+ V3D_QPU_A_YCD,
+ V3D_QPU_A_MSF,
+ V3D_QPU_A_REVF,
+ V3D_QPU_A_VDWWT,
+ V3D_QPU_A_IID,
+ V3D_QPU_A_SAMPID,
+ V3D_QPU_A_PATCHID,
+ V3D_QPU_A_TMUWT,
+ V3D_QPU_A_VPMSETUP,
+ V3D_QPU_A_VPMWT,
+ V3D_QPU_A_LDVPMV,
+ V3D_QPU_A_LDVPMD,
+ V3D_QPU_A_LDVPMP,
+ V3D_QPU_A_LDVPMG,
+ V3D_QPU_A_FCMP,
+ V3D_QPU_A_VFMAX,
+ V3D_QPU_A_FROUND,
+ V3D_QPU_A_FTOIN,
+ V3D_QPU_A_FTRUNC,
+ V3D_QPU_A_FTOIZ,
+ V3D_QPU_A_FFLOOR,
+ V3D_QPU_A_FTOUZ,
+ V3D_QPU_A_FCEIL,
+ V3D_QPU_A_FTOC,
+ V3D_QPU_A_FDX,
+ V3D_QPU_A_FDY,
+ V3D_QPU_A_STVPMV,
+ V3D_QPU_A_STVPMD,
+ V3D_QPU_A_STVPMP,
+ V3D_QPU_A_ITOF,
+ V3D_QPU_A_CLZ,
+ V3D_QPU_A_UTOF,
+};
+
+enum v3d_qpu_mul_op {
+ V3D_QPU_M_ADD,
+ V3D_QPU_M_SUB,
+ V3D_QPU_M_UMUL24,
+ V3D_QPU_M_VFMUL,
+ V3D_QPU_M_SMUL24,
+ V3D_QPU_M_MULTOP,
+ V3D_QPU_M_FMOV,
+ V3D_QPU_M_MOV,
+ V3D_QPU_M_NOP,
+ V3D_QPU_M_FMUL,
+};
+
+enum v3d_qpu_output_pack {
+ V3D_QPU_PACK_NONE,
+ /**
+ * Convert to 16-bit float, put in low 16 bits of destination leaving
+ * high unmodified.
+ */
+ V3D_QPU_PACK_L,
+ /**
+ * Convert to 16-bit float, put in high 16 bits of destination leaving
+ * low unmodified.
+ */
+ V3D_QPU_PACK_H,
+};
+
+enum v3d_qpu_input_unpack {
+ /**
+ * No-op input unpacking. Note that this enum's value doesn't match
+ * the packed QPU instruction value of the field (we use 0 so that the
+ * default on new instruction creation is no-op).
+ */
+ V3D_QPU_UNPACK_NONE,
+ /** Absolute value. Only available for some operations. */
+ V3D_QPU_UNPACK_ABS,
+ /** Convert low 16 bits from 16-bit float to 32-bit float. */
+ V3D_QPU_UNPACK_L,
+ /** Convert high 16 bits from 16-bit float to 32-bit float. */
+ V3D_QPU_UNPACK_H,
+
+ /** Convert to 16f and replicate it to the high bits. */
+ V3D_QPU_UNPACK_REPLICATE_32F_16,
+
+ /** Replicate low 16 bits to high */
+ V3D_QPU_UNPACK_REPLICATE_L_16,
+
+ /** Replicate high 16 bits to low */
+ V3D_QPU_UNPACK_REPLICATE_H_16,
+
+ /** Swap high and low 16 bits */
+ V3D_QPU_UNPACK_SWAP_16,
+};
+
+enum v3d_qpu_mux {
+ V3D_QPU_MUX_R0,
+ V3D_QPU_MUX_R1,
+ V3D_QPU_MUX_R2,
+ V3D_QPU_MUX_R3,
+ V3D_QPU_MUX_R4,
+ V3D_QPU_MUX_R5,
+ V3D_QPU_MUX_A,
+ V3D_QPU_MUX_B,
+};
+
+struct v3d_qpu_alu_instr {
+ struct {
+ enum v3d_qpu_add_op op;
+ enum v3d_qpu_mux a, b;
+ uint8_t waddr;
+ bool magic_write;
+ enum v3d_qpu_output_pack output_pack;
+ enum v3d_qpu_input_unpack a_unpack;
+ enum v3d_qpu_input_unpack b_unpack;
+ } add;
+
+ struct {
+ enum v3d_qpu_mul_op op;
+ enum v3d_qpu_mux a, b;
+ uint8_t waddr;
+ bool magic_write;
+ enum v3d_qpu_output_pack output_pack;
+ enum v3d_qpu_input_unpack a_unpack;
+ enum v3d_qpu_input_unpack b_unpack;
+ } mul;
+};
+
+enum v3d_qpu_branch_cond {
+ V3D_QPU_BRANCH_COND_ALWAYS,
+ V3D_QPU_BRANCH_COND_A0,
+ V3D_QPU_BRANCH_COND_NA0,
+ V3D_QPU_BRANCH_COND_ALLA,
+ V3D_QPU_BRANCH_COND_ANYNA,
+ V3D_QPU_BRANCH_COND_ANYA,
+ V3D_QPU_BRANCH_COND_ALLNA,
+};
+
+enum v3d_qpu_msfign {
+ /** Ignore multisample flags when determining branch condition. */
+ V3D_QPU_MSFIGN_NONE,
+ /**
+ * If no multisample flags are set in the lane (a pixel in the FS, a
+ * vertex in the VS), ignore the lane's condition when computing the
+ * branch condition.
+ */
+ V3D_QPU_MSFIGN_P,
+ /**
+ * If no multisample flags are set in a 2x2 quad in the FS, ignore the
+ * quad's a/b conditions.
+ */
+ V3D_QPU_MSFIGN_Q,
+};
+
+enum v3d_qpu_branch_dest {
+ V3D_QPU_BRANCH_DEST_ABS,
+ V3D_QPU_BRANCH_DEST_REL,
+ V3D_QPU_BRANCH_DEST_LINK_REG,
+ V3D_QPU_BRANCH_DEST_REGFILE,
+};
+
+struct v3d_qpu_branch_instr {
+ enum v3d_qpu_branch_cond cond;
+ enum v3d_qpu_msfign msfign;
+
+ /** Selects how to compute the new IP if the branch is taken. */
+ enum v3d_qpu_branch_dest bdi;
+
+ /**
+ * Selects how to compute the new uniforms pointer if the branch is
+ * taken. (ABS/REL implicitly load a uniform and use that)
+ */
+ enum v3d_qpu_branch_dest bdu;
+
+ /**
+ * If set, then udest determines how the uniform stream will branch,
+ * otherwise the uniform stream is left as is.
+ */
+ bool ub;
+
+ uint8_t raddr_a;
+
+ uint32_t offset;
+};
+
+enum v3d_qpu_instr_type {
+ V3D_QPU_INSTR_TYPE_ALU,
+ V3D_QPU_INSTR_TYPE_BRANCH,
+};
+
+struct v3d_qpu_instr {
+ enum v3d_qpu_instr_type type;
+
+ struct v3d_qpu_sig sig;
+ uint8_t raddr_a;
+ uint8_t raddr_b;
+ struct v3d_qpu_flags flags;
+
+ union {
+ struct v3d_qpu_alu_instr alu;
+ struct v3d_qpu_branch_instr branch;
+ };
+};
+
+const char *v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr);
+const char *v3d_qpu_add_op_name(enum v3d_qpu_add_op op);
+const char *v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op);
+const char *v3d_qpu_cond_name(enum v3d_qpu_cond cond);
+const char *v3d_qpu_pf_name(enum v3d_qpu_pf pf);
+const char *v3d_qpu_uf_name(enum v3d_qpu_uf uf);
+const char *v3d_qpu_pack_name(enum v3d_qpu_output_pack pack);
+const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack);
+const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond);
+const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign);
+
+bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op);
+bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op);
+int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op);
+int v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op);
+
+bool v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_sig *sig,
+ uint32_t *packed_sig);
+bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
+ uint32_t packed_sig,
+ struct v3d_qpu_sig *sig);
+
+bool
+v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_flags *cond,
+ uint32_t *packed_cond);
+bool
+v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
+ uint32_t packed_cond,
+ struct v3d_qpu_flags *cond);
+
+bool
+v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr,
+ uint64_t *packed_instr);
+bool
+v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
+ uint64_t packed_instr,
+ struct v3d_qpu_instr *instr);
+
+bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
+bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
+bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
+bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
+bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
+bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
+
+#endif
diff -Nru mesa-17.2.4/src/broadcom/qpu/qpu_pack.c mesa-17.3.3/src/broadcom/qpu/qpu_pack.c
--- mesa-17.2.4/src/broadcom/qpu/qpu_pack.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/qpu/qpu_pack.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,1206 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include "util/macros.h"
+
+#include "broadcom/common/v3d_device_info.h"
+#include "qpu_instr.h"
+
+#ifndef QPU_MASK
+#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
+/* Using the GNU statement expression extension */
+#define QPU_SET_FIELD(value, field) \
+ ({ \
+ uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
+ assert((fieldval & ~ field ## _MASK) == 0); \
+ fieldval & field ## _MASK; \
+ })
+
+#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
+
+#define QPU_UPDATE_FIELD(inst, value, field) \
+ (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
+#endif /* QPU_MASK */
+
+#define VC5_QPU_OP_MUL_SHIFT 58
+#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
+
+#define VC5_QPU_SIG_SHIFT 53
+#define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
+# define VC5_QPU_SIG_THRSW_BIT 0x1
+# define VC5_QPU_SIG_LDUNIF_BIT 0x2
+# define VC5_QPU_SIG_LDTMU_BIT 0x4
+# define VC5_QPU_SIG_LDVARY_BIT 0x8
+
+#define VC5_QPU_COND_SHIFT 46
+#define VC5_QPU_COND_MASK QPU_MASK(52, 46)
+
+#define VC5_QPU_COND_IFA 0
+#define VC5_QPU_COND_IFB 1
+#define VC5_QPU_COND_IFNA 2
+#define VC5_QPU_COND_IFNB 3
+
+#define VC5_QPU_MM QPU_MASK(45, 45)
+#define VC5_QPU_MA QPU_MASK(44, 44)
+
+#define V3D_QPU_WADDR_M_SHIFT 38
+#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
+
+#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
+#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
+
+#define V3D_QPU_WADDR_A_SHIFT 32
+#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
+
+#define VC5_QPU_BRANCH_COND_SHIFT 32
+#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
+
+#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
+#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
+
+#define VC5_QPU_OP_ADD_SHIFT 24
+#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
+
+#define VC5_QPU_MUL_B_SHIFT 21
+#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
+
+#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
+#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
+
+#define VC5_QPU_MUL_A_SHIFT 18
+#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
+
+#define VC5_QPU_ADD_B_SHIFT 15
+#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
+
+#define VC5_QPU_BRANCH_BDU_SHIFT 15
+#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
+
+#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
+
+#define VC5_QPU_ADD_A_SHIFT 12
+#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
+
+#define VC5_QPU_BRANCH_BDI_SHIFT 12
+#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
+
+#define VC5_QPU_RADDR_A_SHIFT 6
+#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
+
+#define VC5_QPU_RADDR_B_SHIFT 0
+#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
+
+#define THRSW .thrsw = true
+#define LDUNIF .ldunif = true
+#define LDTMU .ldtmu = true
+#define LDVARY .ldvary = true
+#define LDVPM .ldvpm = true
+#define SMIMM .small_imm = true
+#define LDTLB .ldtlb = true
+#define LDTLBU .ldtlbu = true
+#define UCB .ucb = true
+#define ROT .rotate = true
+#define WRTMUC .wrtmuc = true
+
+static const struct v3d_qpu_sig v33_sig_map[] = {
+ /* MISC R3 R4 R5 */
+ [0] = { },
+ [1] = { THRSW, },
+ [2] = { LDUNIF },
+ [3] = { THRSW, LDUNIF },
+ [4] = { LDTMU, },
+ [5] = { THRSW, LDTMU, },
+ [6] = { LDTMU, LDUNIF },
+ [7] = { THRSW, LDTMU, LDUNIF },
+ [8] = { LDVARY, },
+ [9] = { THRSW, LDVARY, },
+ [10] = { LDVARY, LDUNIF },
+ [11] = { THRSW, LDVARY, LDUNIF },
+ [12] = { LDVARY, LDTMU, },
+ [13] = { THRSW, LDVARY, LDTMU, },
+ [14] = { SMIMM, LDVARY, },
+ [15] = { SMIMM, },
+ [16] = { LDTLB, },
+ [17] = { LDTLBU, },
+ /* 18-21 reserved */
+ [22] = { UCB, },
+ [23] = { ROT, },
+ [24] = { LDVPM, },
+ [25] = { THRSW, LDVPM, },
+ [26] = { LDVPM, LDUNIF },
+ [27] = { THRSW, LDVPM, LDUNIF },
+ [28] = { LDVPM, LDTMU, },
+ [29] = { THRSW, LDVPM, LDTMU, },
+ [30] = { SMIMM, LDVPM, },
+ [31] = { SMIMM, },
+};
+
+bool
+v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
+ uint32_t packed_sig,
+ struct v3d_qpu_sig *sig)
+{
+ if (packed_sig >= ARRAY_SIZE(v33_sig_map))
+ return false;
+
+ *sig = v33_sig_map[packed_sig];
+
+ /* Signals with zeroed unpacked contents after element 0 are reserved. */
+ return (packed_sig == 0 ||
+ memcmp(sig, &v33_sig_map[0], sizeof(*sig) != 0));
+}
+
+bool
+v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_sig *sig,
+ uint32_t *packed_sig)
+{
+ static const struct v3d_qpu_sig *map;
+
+ map = v33_sig_map;
+
+ for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
+ if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
+ *packed_sig = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool
+v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
+ uint32_t packed_cond,
+ struct v3d_qpu_flags *cond)
+{
+ static const enum v3d_qpu_cond cond_map[4] = {
+ [0] = V3D_QPU_COND_IFA,
+ [1] = V3D_QPU_COND_IFB,
+ [2] = V3D_QPU_COND_IFNA,
+ [3] = V3D_QPU_COND_IFNB,
+ };
+
+ cond->ac = V3D_QPU_COND_NONE;
+ cond->mc = V3D_QPU_COND_NONE;
+ cond->apf = V3D_QPU_PF_NONE;
+ cond->mpf = V3D_QPU_PF_NONE;
+ cond->auf = V3D_QPU_UF_NONE;
+ cond->muf = V3D_QPU_UF_NONE;
+
+ if (packed_cond == 0) {
+ return true;
+ } else if (packed_cond >> 2 == 0) {
+ cond->apf = packed_cond & 0x3;
+ } else if (packed_cond >> 4 == 0) {
+ cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
+ } else if (packed_cond == 0x10) {
+ return false;
+ } else if (packed_cond >> 2 == 0x4) {
+ cond->mpf = packed_cond & 0x3;
+ } else if (packed_cond >> 4 == 0x1) {
+ cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
+ } else if (packed_cond >> 4 == 0x2) {
+ cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
+ cond->mpf = packed_cond & 0x3;
+ } else if (packed_cond >> 4 == 0x3) {
+ cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
+ cond->apf = packed_cond & 0x3;
+ } else if (packed_cond >> 6) {
+ cond->mc = cond_map[(packed_cond >> 4) & 0x3];
+ if (((packed_cond >> 2) & 0x3) == 0) {
+ cond->ac = cond_map[packed_cond & 0x3];
+ } else {
+ cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
+ }
+ }
+
+ return true;
+}
+
+bool
+v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_flags *cond,
+ uint32_t *packed_cond)
+{
+#define AC (1 << 0)
+#define MC (1 << 1)
+#define APF (1 << 2)
+#define MPF (1 << 3)
+#define AUF (1 << 4)
+#define MUF (1 << 5)
+ static const struct {
+ uint8_t flags_present;
+ uint8_t bits;
+ } flags_table[] = {
+ { 0, 0 },
+ { APF, 0 },
+ { AUF, 0 },
+ { MPF, (1 << 4) },
+ { MUF, (1 << 4) },
+ { AC, (1 << 5) },
+ { AC | MPF, (1 << 5) },
+ { MC, (1 << 5) | (1 << 4) },
+ { MC | APF, (1 << 5) | (1 << 4) },
+ { MC | AC, (1 << 6) },
+ { MC | AUF, (1 << 6) },
+ };
+
+ uint8_t flags_present = 0;
+ if (cond->ac != V3D_QPU_COND_NONE)
+ flags_present |= AC;
+ if (cond->mc != V3D_QPU_COND_NONE)
+ flags_present |= MC;
+ if (cond->apf != V3D_QPU_PF_NONE)
+ flags_present |= APF;
+ if (cond->mpf != V3D_QPU_PF_NONE)
+ flags_present |= MPF;
+ if (cond->auf != V3D_QPU_UF_NONE)
+ flags_present |= AUF;
+ if (cond->muf != V3D_QPU_UF_NONE)
+ flags_present |= MUF;
+
+ for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
+ if (flags_table[i].flags_present != flags_present)
+ continue;
+
+ *packed_cond = flags_table[i].bits;
+
+ *packed_cond |= cond->apf;
+ *packed_cond |= cond->mpf;
+
+ if (flags_present & AUF)
+ *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
+ if (flags_present & MUF)
+ *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
+
+ if (flags_present & AC)
+ *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
+
+ if (flags_present & MC) {
+ if (*packed_cond & (1 << 6))
+ *packed_cond |= (cond->mc -
+ V3D_QPU_COND_IFA) << 4;
+ else
+ *packed_cond |= (cond->mc -
+ V3D_QPU_COND_IFA) << 2;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Make a mapping of the table of opcodes in the spec. The opcode is
+ * determined by a combination of the opcode field, and in the case of 0 or
+ * 1-arg opcodes, the mux_b field as well.
+ */
+#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
+#define ANYMUX MUX_MASK(0, 7)
+
+struct opcode_desc {
+ uint8_t opcode_first;
+ uint8_t opcode_last;
+ uint8_t mux_b_mask;
+ uint8_t mux_a_mask;
+ uint8_t op;
+ /* 0 if it's the same across V3D versions, or a specific V3D version. */
+ uint8_t ver;
+};
+
+static const struct opcode_desc add_ops[] = {
+ /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
+ { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
+ { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
+ { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
+ { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
+ { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
+ { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
+ { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
+ { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
+ { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
+ { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
+ { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
+ { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
+ { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
+ { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
+ { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
+ { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
+ /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
+ { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
+ { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
+ { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
+
+ { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
+ { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
+ { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
+
+ { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
+ { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
+ { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
+ { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
+ { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
+ { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
+ { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP },
+ { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
+ { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
+ { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
+ { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
+ { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
+ { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
+ { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
+ { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
+ { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
+ { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
+
+ { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
+ { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
+ { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
+ { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
+
+ { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
+ { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
+ { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT },
+ { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
+ { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
+
+ { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP },
+
+ /* FIXME: MORE COMPLICATED */
+ /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
+
+ { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
+ { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
+
+ { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
+ { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
+ { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
+ { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
+ { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
+ { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
+ { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
+ { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
+
+ { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
+ { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
+
+ /* The stvpms are distinguished by the waddr field. */
+ { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
+ { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
+ { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
+
+ { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
+ { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
+ { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
+};
+
+static const struct opcode_desc mul_ops[] = {
+ { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
+ { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
+ { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
+ { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
+ { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
+ { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
+ { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
+ { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
+ { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
+ { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
+ { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
+};
+
+static const struct opcode_desc *
+lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
+ uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
+{
+ for (int i = 0; i < num_opcodes; i++) {
+ const struct opcode_desc *op_desc = &opcodes[i];
+
+ if (opcode < op_desc->opcode_first ||
+ opcode > op_desc->opcode_last)
+ continue;
+
+ if (!(op_desc->mux_b_mask & (1 << mux_b)))
+ continue;
+
+ if (!(op_desc->mux_a_mask & (1 << mux_a)))
+ continue;
+
+ return op_desc;
+ }
+
+ return NULL;
+}
+
+static bool
+v3d_qpu_float32_unpack_unpack(uint32_t packed,
+ enum v3d_qpu_input_unpack *unpacked)
+{
+ switch (packed) {
+ case 0:
+ *unpacked = V3D_QPU_UNPACK_ABS;
+ return true;
+ case 1:
+ *unpacked = V3D_QPU_UNPACK_NONE;
+ return true;
+ case 2:
+ *unpacked = V3D_QPU_UNPACK_L;
+ return true;
+ case 3:
+ *unpacked = V3D_QPU_UNPACK_H;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
+ uint32_t *packed)
+{
+ switch (unpacked) {
+ case V3D_QPU_UNPACK_ABS:
+ *packed = 0;
+ return true;
+ case V3D_QPU_UNPACK_NONE:
+ *packed = 1;
+ return true;
+ case V3D_QPU_UNPACK_L:
+ *packed = 2;
+ return true;
+ case V3D_QPU_UNPACK_H:
+ *packed = 3;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+v3d_qpu_float16_unpack_unpack(uint32_t packed,
+ enum v3d_qpu_input_unpack *unpacked)
+{
+ switch (packed) {
+ case 0:
+ *unpacked = V3D_QPU_UNPACK_NONE;
+ return true;
+ case 1:
+ *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
+ return true;
+ case 2:
+ *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
+ return true;
+ case 3:
+ *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
+ return true;
+ case 4:
+ *unpacked = V3D_QPU_UNPACK_SWAP_16;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
+ uint32_t *packed)
+{
+ switch (unpacked) {
+ case V3D_QPU_UNPACK_NONE:
+ *packed = 0;
+ return true;
+ case V3D_QPU_UNPACK_REPLICATE_32F_16:
+ *packed = 1;
+ return true;
+ case V3D_QPU_UNPACK_REPLICATE_L_16:
+ *packed = 2;
+ return true;
+ case V3D_QPU_UNPACK_REPLICATE_H_16:
+ *packed = 3;
+ return true;
+ case V3D_QPU_UNPACK_SWAP_16:
+ *packed = 4;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
+ uint32_t *packed)
+{
+ switch (unpacked) {
+ case V3D_QPU_PACK_NONE:
+ *packed = 0;
+ return true;
+ case V3D_QPU_PACK_L:
+ *packed = 1;
+ return true;
+ case V3D_QPU_PACK_H:
+ *packed = 2;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
+ struct v3d_qpu_instr *instr)
+{
+ uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
+ uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
+ uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
+ uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
+
+ uint32_t map_op = op;
+ /* Some big clusters of opcodes are replicated with unpack
+ * flags
+ */
+ if (map_op >= 249 && map_op <= 251)
+ map_op = (map_op - 249 + 245);
+ if (map_op >= 253 && map_op <= 255)
+ map_op = (map_op - 253 + 245);
+
+ const struct opcode_desc *desc =
+ lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
+ map_op, mux_a, mux_b);
+ if (!desc)
+ return false;
+
+ instr->alu.add.op = desc->op;
+
+ /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
+ * operands.
+ */
+ if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
+ if (instr->alu.add.op == V3D_QPU_A_FMIN)
+ instr->alu.add.op = V3D_QPU_A_FMAX;
+ if (instr->alu.add.op == V3D_QPU_A_FADD)
+ instr->alu.add.op = V3D_QPU_A_FADDNF;
+ }
+
+ /* Some QPU ops require a bit more than just basic opcode and mux a/b
+ * comparisons to distinguish them.
+ */
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_STVPMV:
+ case V3D_QPU_A_STVPMD:
+ case V3D_QPU_A_STVPMP:
+ switch (waddr) {
+ case 0:
+ instr->alu.add.op = V3D_QPU_A_STVPMV;
+ break;
+ case 1:
+ instr->alu.add.op = V3D_QPU_A_STVPMD;
+ break;
+ case 2:
+ instr->alu.add.op = V3D_QPU_A_STVPMP;
+ break;
+ default:
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_FADD:
+ case V3D_QPU_A_FADDNF:
+ case V3D_QPU_A_FSUB:
+ case V3D_QPU_A_FMIN:
+ case V3D_QPU_A_FMAX:
+ case V3D_QPU_A_FCMP:
+ instr->alu.add.output_pack = (op >> 4) & 0x3;
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
+ &instr->alu.add.a_unpack)) {
+ return false;
+ }
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
+ &instr->alu.add.b_unpack)) {
+ return false;
+ }
+ break;
+
+ case V3D_QPU_A_FFLOOR:
+ case V3D_QPU_A_FROUND:
+ case V3D_QPU_A_FTRUNC:
+ case V3D_QPU_A_FCEIL:
+ case V3D_QPU_A_FDX:
+ case V3D_QPU_A_FDY:
+ instr->alu.add.output_pack = mux_b & 0x3;
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
+ &instr->alu.add.a_unpack)) {
+ return false;
+ }
+ break;
+
+ case V3D_QPU_A_FTOIN:
+ case V3D_QPU_A_FTOIZ:
+ case V3D_QPU_A_FTOUZ:
+ case V3D_QPU_A_FTOC:
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
+ &instr->alu.add.a_unpack)) {
+ return false;
+ }
+ break;
+
+ case V3D_QPU_A_VFMIN:
+ case V3D_QPU_A_VFMAX:
+ if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
+ &instr->alu.add.a_unpack)) {
+ return false;
+ }
+
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
+ instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
+ break;
+
+ default:
+ instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
+ instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
+ break;
+ }
+
+ instr->alu.add.a = mux_a;
+ instr->alu.add.b = mux_b;
+ instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
+ instr->alu.add.magic_write = packed_inst & VC5_QPU_MA;
+
+ return true;
+}
+
+static bool
+v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
+ struct v3d_qpu_instr *instr)
+{
+ uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
+ uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
+ uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
+
+ {
+ const struct opcode_desc *desc =
+ lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
+ op, mux_a, mux_b);
+ if (!desc)
+ return false;
+
+ instr->alu.mul.op = desc->op;
+ }
+
+ switch (instr->alu.mul.op) {
+ case V3D_QPU_M_FMUL:
+ instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
+ &instr->alu.mul.a_unpack)) {
+ return false;
+ }
+
+ if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
+ &instr->alu.mul.b_unpack)) {
+ return false;
+ }
+
+ break;
+
+ case V3D_QPU_M_FMOV:
+ instr->alu.mul.output_pack = (((op & 1) << 1) +
+ ((mux_b >> 2) & 1));
+
+ if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
+ &instr->alu.mul.a_unpack)) {
+ return false;
+ }
+
+ break;
+ default:
+ instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
+ instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
+ instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
+ break;
+ }
+
+ instr->alu.mul.a = mux_a;
+ instr->alu.mul.b = mux_b;
+ instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
+ instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
+
+ return true;
+}
+
+static bool
+v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
+{
+ uint32_t waddr = instr->alu.add.waddr;
+ uint32_t mux_a = instr->alu.add.a;
+ uint32_t mux_b = instr->alu.add.b;
+ int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
+ const struct opcode_desc *desc;
+
+ int opcode;
+ for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
+ desc++) {
+ if (desc->op == instr->alu.add.op)
+ break;
+ }
+ if (desc == &add_ops[ARRAY_SIZE(add_ops)])
+ return false;
+
+ opcode = desc->opcode_first;
+
+ /* If an operation doesn't use an arg, its mux values may be used to
+ * identify the operation type.
+ */
+ if (nsrc < 2)
+ mux_b = ffs(desc->mux_b_mask) - 1;
+
+ if (nsrc < 1)
+ mux_a = ffs(desc->mux_a_mask) - 1;
+
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_STVPMV:
+ waddr = 0;
+ break;
+ case V3D_QPU_A_STVPMD:
+ waddr = 1;
+ break;
+ case V3D_QPU_A_STVPMP:
+ waddr = 2;
+ break;
+ default:
+ break;
+ }
+
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_FADD:
+ case V3D_QPU_A_FADDNF:
+ case V3D_QPU_A_FSUB:
+ case V3D_QPU_A_FMIN:
+ case V3D_QPU_A_FMAX:
+ case V3D_QPU_A_FCMP: {
+ uint32_t output_pack;
+ uint32_t a_unpack;
+ uint32_t b_unpack;
+
+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
+ &output_pack)) {
+ return false;
+ }
+ opcode |= output_pack << 4;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
+ &a_unpack)) {
+ return false;
+ }
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
+ &b_unpack)) {
+ return false;
+ }
+
+ /* These operations with commutative operands are
+ * distinguished by which order their operands come in.
+ */
+ bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
+ if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
+ instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
+ ((instr->alu.add.op == V3D_QPU_A_FMAX ||
+ instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
+ uint32_t temp;
+
+ temp = a_unpack;
+ a_unpack = b_unpack;
+ b_unpack = temp;
+
+ temp = mux_a;
+ mux_a = mux_b;
+ mux_b = temp;
+ }
+
+ opcode |= a_unpack << 2;
+ opcode |= b_unpack << 0;
+ break;
+ }
+
+ case V3D_QPU_A_FFLOOR:
+ case V3D_QPU_A_FROUND:
+ case V3D_QPU_A_FTRUNC:
+ case V3D_QPU_A_FCEIL:
+ case V3D_QPU_A_FDX:
+ case V3D_QPU_A_FDY: {
+ uint32_t packed;
+
+ if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
+ &packed)) {
+ return false;
+ }
+ mux_b |= packed;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
+ &packed)) {
+ return false;
+ }
+ if (packed == 0)
+ return false;
+ opcode |= packed << 2;
+ break;
+ }
+
+ case V3D_QPU_A_FTOIN:
+ case V3D_QPU_A_FTOIZ:
+ case V3D_QPU_A_FTOUZ:
+ case V3D_QPU_A_FTOC:
+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
+ return false;
+
+ uint32_t packed;
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
+ &packed)) {
+ return false;
+ }
+ if (packed == 0)
+ return false;
+ opcode |= packed << 2;
+
+ break;
+
+ case V3D_QPU_A_VFMIN:
+ case V3D_QPU_A_VFMAX:
+ if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
+ instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
+ return false;
+ }
+
+ if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
+ &packed)) {
+ return false;
+ }
+ opcode |= packed;
+ break;
+
+ default:
+ if (instr->alu.add.op != V3D_QPU_A_NOP &&
+ (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
+ instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
+ instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
+ return false;
+ }
+ break;
+ }
+
+ *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
+ *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
+ *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
+ *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
+ if (instr->alu.add.magic_write)
+ *packed_instr |= VC5_QPU_MA;
+
+ return true;
+}
+
+static bool
+v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
+{
+ uint32_t mux_a = instr->alu.mul.a;
+ uint32_t mux_b = instr->alu.mul.b;
+ int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
+ const struct opcode_desc *desc;
+
+ for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
+ desc++) {
+ if (desc->op == instr->alu.mul.op)
+ break;
+ }
+ if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
+ return false;
+
+ uint32_t opcode = desc->opcode_first;
+
+ /* Some opcodes have a single valid value for their mux a/b, so set
+ * that here. If mux a/b determine packing, it will be set below.
+ */
+ if (nsrc < 2)
+ mux_b = ffs(desc->mux_b_mask) - 1;
+
+ if (nsrc < 1)
+ mux_a = ffs(desc->mux_a_mask) - 1;
+
+ switch (instr->alu.mul.op) {
+ case V3D_QPU_M_FMUL: {
+ uint32_t packed;
+
+ if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
+ &packed)) {
+ return false;
+ }
+ /* No need for a +1 because desc->opcode_first has a 1 in this
+ * field.
+ */
+ opcode += packed << 4;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
+ &packed)) {
+ return false;
+ }
+ opcode |= packed << 2;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
+ &packed)) {
+ return false;
+ }
+ opcode |= packed << 0;
+ break;
+ }
+
+ case V3D_QPU_M_FMOV: {
+ uint32_t packed;
+
+ if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
+ &packed)) {
+ return false;
+ }
+ opcode |= (packed >> 1) & 1;
+ mux_b = (packed & 1) << 2;
+
+ if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
+ &packed)) {
+ return false;
+ }
+ mux_b |= packed;
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
+ *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
+
+ *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
+ *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
+ if (instr->alu.mul.magic_write)
+ *packed_instr |= VC5_QPU_MM;
+
+ return true;
+}
+
+static bool
+v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
+ uint64_t packed_instr,
+ struct v3d_qpu_instr *instr)
+{
+ instr->type = V3D_QPU_INSTR_TYPE_ALU;
+
+ if (!v3d_qpu_sig_unpack(devinfo,
+ QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
+ &instr->sig))
+ return false;
+
+ if (!v3d_qpu_flags_unpack(devinfo,
+ QPU_GET_FIELD(packed_instr, VC5_QPU_COND),
+ &instr->flags))
+ return false;
+
+ instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
+ instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
+
+ if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
+ return false;
+
+ if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
+ return false;
+
+ return true;
+}
+
+static bool
+v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
+ uint64_t packed_instr,
+ struct v3d_qpu_instr *instr)
+{
+ instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
+
+ uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
+ if (cond == 0)
+ instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
+ else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
+ V3D_QPU_BRANCH_COND_ALLNA)
+ instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
+ else
+ return false;
+
+ uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
+ if (msfign == 3)
+ return false;
+ instr->branch.msfign = msfign;
+
+ instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
+
+ instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
+ if (instr->branch.ub) {
+ instr->branch.bdu = QPU_GET_FIELD(packed_instr,
+ VC5_QPU_BRANCH_BDU);
+ }
+
+ instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
+ VC5_QPU_RADDR_A);
+
+ instr->branch.offset = 0;
+
+ instr->branch.offset +=
+ QPU_GET_FIELD(packed_instr,
+ VC5_QPU_BRANCH_ADDR_LOW) << 3;
+
+ instr->branch.offset +=
+ QPU_GET_FIELD(packed_instr,
+ VC5_QPU_BRANCH_ADDR_HIGH) << 24;
+
+ return true;
+}
+
+bool
+v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
+ uint64_t packed_instr,
+ struct v3d_qpu_instr *instr)
+{
+ if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
+ return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
+ } else {
+ uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
+
+ if ((sig & 24) == 16) {
+ return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
+ instr);
+ } else {
+ return false;
+ }
+ }
+}
+
+static bool
+v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr,
+ uint64_t *packed_instr)
+{
+ uint32_t sig;
+ if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
+ return false;
+ *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
+
+ if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
+ *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
+ *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
+
+ if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
+ return false;
+ if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
+ return false;
+
+ uint32_t flags;
+ if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
+ return false;
+ *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
+ }
+
+ return true;
+}
+
+static bool
+v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr,
+ uint64_t *packed_instr)
+{
+ *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
+
+ if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
+ *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
+ V3D_QPU_BRANCH_COND_A0),
+ VC5_QPU_BRANCH_COND);
+ }
+
+ *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
+ VC5_QPU_BRANCH_MSFIGN);
+
+ *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
+ VC5_QPU_BRANCH_BDI);
+
+ if (instr->branch.ub) {
+ *packed_instr |= VC5_QPU_BRANCH_UB;
+ *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
+ VC5_QPU_BRANCH_BDU);
+ }
+
+ switch (instr->branch.bdi) {
+ case V3D_QPU_BRANCH_DEST_ABS:
+ case V3D_QPU_BRANCH_DEST_REL:
+ *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
+ VC5_QPU_BRANCH_MSFIGN);
+
+ *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
+ ~0xff000000) >> 3,
+ VC5_QPU_BRANCH_ADDR_LOW);
+
+ *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
+ VC5_QPU_BRANCH_ADDR_HIGH);
+
+ case V3D_QPU_BRANCH_DEST_REGFILE:
+ *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
+ VC5_QPU_RADDR_A);
+ break;
+
+ default:
+ break;
+ }
+
+ return true;
+}
+
+bool
+v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
+ const struct v3d_qpu_instr *instr,
+ uint64_t *packed_instr)
+{
+ *packed_instr = 0;
+
+ switch (instr->type) {
+ case V3D_QPU_INSTR_TYPE_ALU:
+ return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
+ case V3D_QPU_INSTR_TYPE_BRANCH:
+ return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
+ default:
+ return false;
+ }
+}
diff -Nru mesa-17.2.4/src/broadcom/qpu/tests/qpu_disasm.c mesa-17.3.3/src/broadcom/qpu/tests/qpu_disasm.c
--- mesa-17.2.4/src/broadcom/qpu/tests/qpu_disasm.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/broadcom/qpu/tests/qpu_disasm.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,146 @@
+/*
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+#include
+#include "util/macros.h"
+#include "broadcom/common/v3d_device_info.h"
+#include "broadcom/qpu/qpu_disasm.h"
+#include "broadcom/qpu/qpu_instr.h"
+
+static const struct {
+ int ver;
+ uint64_t inst;
+ const char *expected;
+} tests[] = {
+ { 33, 0x3d003186bb800000ull, "nop ; nop ; ldvary" },
+ { 33, 0x3c20318105829000ull, "fadd r1, r1, r5 ; nop ; thrsw" },
+ { 33, 0x3c403186bb81d000ull, "vpmsetup -, r5 ; nop ; ldunif" },
+ { 33, 0x3f003186bb800000ull, "nop ; nop ; ldvpm" },
+ { 33, 0x3c002380b6edb000ull, "or rf0, r3, r3 ; mov vpm, r3" },
+ { 33, 0x57403006bbb80000ull, "nop ; fmul r0, rf0, r5 ; ldvpm; ldunif" },
+
+ /* branch conditions */
+ { 33, 0x02000006002034c0ull, "b.anyap rf19" },
+ { 33, 0x02679356b4201000ull, "b.anyap -1268280496" },
+ { 33, 0x02b76a2dd0400000ull, "b.anynaq zero_addr+0xd0b76a28" },
+ { 33, 0x0200000500402000ull, "b.anynaq lri" },
+ { 33, 0x0216fe167301c8c0ull, "bu.anya zero_addr+0x7316fe10, rf35" },
+ { 33, 0x020000050040e000ull, "bu.anynaq lri, r:unif" },
+ { 33, 0x0200000300006000ull, "bu.na0 lri, a:unif" },
+
+ /* Special waddr names */
+ { 33, 0x3c00318735808000ull, "vfpack tlb, r0, r1 ; nop" },
+ { 33, 0xe0571c938e8d5000ull, "fmax.andc recip, r5.h, r2.l; fmul.ifb rf50.h, r3.l, r4.abs; ldunif" },
+ { 33, 0xc04098d4382c9000ull, "add.pushn rsqrt, r1, r1; fmul rf35.h, r3.abs, r1.abs; ldunif" },
+ { 33, 0x481edcd6b3184500ull, "vfmin.norn log, r4.hh, r0; fmul.ifnb rf51, rf20.abs, r0.l" },
+ { 33, 0x041618d57c453000ull, "shl.andn exp, r3, r2; add.ifb rf35, r1, r2" },
+ { 33, 0x7048e5da49272800ull, "fsub.ifa rf26, r2.l, rf32; fmul.pushc sin, r1.h, r1.abs; ldunif" },
+
+};
+
+static void
+swap_mux(enum v3d_qpu_mux *a, enum v3d_qpu_mux *b)
+{
+ enum v3d_qpu_mux t = *a;
+ *a = *b;
+ *b = t;
+}
+
+static void
+swap_pack(enum v3d_qpu_input_unpack *a, enum v3d_qpu_input_unpack *b)
+{
+ enum v3d_qpu_input_unpack t = *a;
+ *a = *b;
+ *b = t;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct v3d_device_info devinfo = { };
+ int retval = 0;
+
+ for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+ devinfo.ver = tests[i].ver;
+
+ printf("Testing v%d.%d 0x%016llx... ",
+ devinfo.ver / 10, devinfo.ver % 10,
+ (long long)tests[i].inst);
+
+ const char *disasm_output = v3d_qpu_disasm(&devinfo,
+ tests[i].inst);
+
+ if (strcmp(disasm_output, tests[i].expected) != 0) {
+ printf("FAIL\n");
+ printf(" Expected: \"%s\"\n", tests[i].expected);
+ printf(" Got: \"%s\"\n", disasm_output);
+ retval = 1;
+ continue;
+ }
+
+ struct v3d_qpu_instr instr;
+ if (!v3d_qpu_instr_unpack(&devinfo, tests[i].inst, &instr)) {
+ printf("FAIL (unpack) %s\n", tests[i].expected);
+ retval = 1;
+ continue;
+ }
+
+ if (instr.type == V3D_QPU_INSTR_TYPE_ALU) {
+ switch (instr.alu.add.op) {
+ case V3D_QPU_A_FADD:
+ case V3D_QPU_A_FADDNF:
+ case V3D_QPU_A_FMIN:
+ case V3D_QPU_A_FMAX:
+ /* Swap the operands to be sure that we test
+ * how the QPUs distinguish between these ops.
+ */
+ swap_mux(&instr.alu.add.a,
+ &instr.alu.add.b);
+ swap_pack(&instr.alu.add.a_unpack,
+ &instr.alu.add.b_unpack);
+ default:
+ break;
+ }
+ }
+
+ uint64_t repack;
+ if (!v3d_qpu_instr_pack(&devinfo, &instr, &repack)) {
+ printf("FAIL (pack) %s\n", tests[i].expected);
+ retval = 1;
+ continue;
+ }
+
+ if (repack != tests[i].inst) {
+ printf("FAIL (repack) 0x%016llx\n", (long long)repack);
+ printf(" Expected: \"%s\"\n", tests[i].expected);
+ const char *redisasm = v3d_qpu_disasm(&devinfo, repack);
+ printf(" Got: \"%s\"\n", redisasm);
+ retval = 1;
+ }
+
+ printf("PASS\n");
+ }
+
+ return retval;
+}
diff -Nru mesa-17.2.4/src/compiler/blob.c mesa-17.3.3/src/compiler/blob.c
--- mesa-17.2.4/src/compiler/blob.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/blob.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,384 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include
+
+#include "main/macros.h"
+#include "blob.h"
+
+#ifdef HAVE_VALGRIND
+#include
+#include
+#define VG(x) x
+#else
+#define VG(x)
+#endif
+
+#define BLOB_INITIAL_SIZE 4096
+
+/* Ensure that \blob will be able to fit an additional object of size
+ * \additional. The growing (if any) will occur by doubling the existing
+ * allocation.
+ */
+static bool
+grow_to_fit(struct blob *blob, size_t additional)
+{
+ size_t to_allocate;
+ uint8_t *new_data;
+
+ if (blob->out_of_memory)
+ return false;
+
+ if (blob->size + additional <= blob->allocated)
+ return true;
+
+ if (blob->fixed_allocation) {
+ blob->out_of_memory = true;
+ return false;
+ }
+
+ if (blob->allocated == 0)
+ to_allocate = BLOB_INITIAL_SIZE;
+ else
+ to_allocate = blob->allocated * 2;
+
+ to_allocate = MAX2(to_allocate, blob->allocated + additional);
+
+ new_data = realloc(blob->data, to_allocate);
+ if (new_data == NULL) {
+ blob->out_of_memory = true;
+ return false;
+ }
+
+ blob->data = new_data;
+ blob->allocated = to_allocate;
+
+ return true;
+}
+
+/* Align the blob->size so that reading or writing a value at (blob->data +
+ * blob->size) will result in an access aligned to a granularity of \alignment
+ * bytes.
+ *
+ * \return True unless allocation fails
+ */
+static bool
+align_blob(struct blob *blob, size_t alignment)
+{
+ const size_t new_size = ALIGN(blob->size, alignment);
+
+ if (blob->size < new_size) {
+ if (!grow_to_fit(blob, new_size - blob->size))
+ return false;
+
+ if (blob->data)
+ memset(blob->data + blob->size, 0, new_size - blob->size);
+ blob->size = new_size;
+ }
+
+ return true;
+}
+
+static void
+align_blob_reader(struct blob_reader *blob, size_t alignment)
+{
+ blob->current = blob->data + ALIGN(blob->current - blob->data, alignment);
+}
+
+void
+blob_init(struct blob *blob)
+{
+ blob->data = NULL;
+ blob->allocated = 0;
+ blob->size = 0;
+ blob->fixed_allocation = false;
+ blob->out_of_memory = false;
+}
+
+void
+blob_init_fixed(struct blob *blob, void *data, size_t size)
+{
+ blob->data = data;
+ blob->allocated = size;
+ blob->size = 0;
+ blob->fixed_allocation = true;
+ blob->out_of_memory = false;
+}
+
+bool
+blob_overwrite_bytes(struct blob *blob,
+ size_t offset,
+ const void *bytes,
+ size_t to_write)
+{
+ /* Detect an attempt to overwrite data out of bounds. */
+ if (offset + to_write < offset || blob->size < offset + to_write)
+ return false;
+
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write));
+
+ if (blob->data)
+ memcpy(blob->data + offset, bytes, to_write);
+
+ return true;
+}
+
+bool
+blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write)
+{
+ if (! grow_to_fit(blob, to_write))
+ return false;
+
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write));
+
+ if (blob->data)
+ memcpy(blob->data + blob->size, bytes, to_write);
+ blob->size += to_write;
+
+ return true;
+}
+
+intptr_t
+blob_reserve_bytes(struct blob *blob, size_t to_write)
+{
+ intptr_t ret;
+
+ if (! grow_to_fit (blob, to_write))
+ return -1;
+
+ ret = blob->size;
+ blob->size += to_write;
+
+ return ret;
+}
+
+intptr_t
+blob_reserve_uint32(struct blob *blob)
+{
+ align_blob(blob, sizeof(uint32_t));
+ return blob_reserve_bytes(blob, sizeof(uint32_t));
+}
+
+intptr_t
+blob_reserve_intptr(struct blob *blob)
+{
+ align_blob(blob, sizeof(intptr_t));
+ return blob_reserve_bytes(blob, sizeof(intptr_t));
+}
+
+bool
+blob_write_uint32(struct blob *blob, uint32_t value)
+{
+ align_blob(blob, sizeof(value));
+
+ return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+#define ASSERT_ALIGNED(_offset, _align) \
+ assert(ALIGN((_offset), (_align)) == (_offset))
+
+bool
+blob_overwrite_uint32 (struct blob *blob,
+ size_t offset,
+ uint32_t value)
+{
+ ASSERT_ALIGNED(offset, sizeof(value));
+ return blob_overwrite_bytes(blob, offset, &value, sizeof(value));
+}
+
+bool
+blob_write_uint64(struct blob *blob, uint64_t value)
+{
+ align_blob(blob, sizeof(value));
+
+ return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+bool
+blob_write_intptr(struct blob *blob, intptr_t value)
+{
+ align_blob(blob, sizeof(value));
+
+ return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+bool
+blob_overwrite_intptr (struct blob *blob,
+ size_t offset,
+ intptr_t value)
+{
+ ASSERT_ALIGNED(offset, sizeof(value));
+ return blob_overwrite_bytes(blob, offset, &value, sizeof(value));
+}
+
+bool
+blob_write_string(struct blob *blob, const char *str)
+{
+ return blob_write_bytes(blob, str, strlen(str) + 1);
+}
+
+void
+blob_reader_init(struct blob_reader *blob, const void *data, size_t size)
+{
+ blob->data = data;
+ blob->end = blob->data + size;
+ blob->current = data;
+ blob->overrun = false;
+}
+
+/* Check that an object of size \size can be read from this blob.
+ *
+ * If not, set blob->overrun to indicate that we attempted to read too far.
+ */
+static bool
+ensure_can_read(struct blob_reader *blob, size_t size)
+{
+ if (blob->overrun)
+ return false;
+
+ if (blob->current < blob->end && blob->end - blob->current >= size)
+ return true;
+
+ blob->overrun = true;
+
+ return false;
+}
+
+const void *
+blob_read_bytes(struct blob_reader *blob, size_t size)
+{
+ const void *ret;
+
+ if (! ensure_can_read (blob, size))
+ return NULL;
+
+ ret = blob->current;
+
+ blob->current += size;
+
+ return ret;
+}
+
+void
+blob_copy_bytes(struct blob_reader *blob, void *dest, size_t size)
+{
+ const void *bytes;
+
+ bytes = blob_read_bytes(blob, size);
+ if (bytes == NULL)
+ return;
+
+ memcpy(dest, bytes, size);
+}
+
+/* These next three read functions have identical form. If we add any beyond
+ * these first three we should probably switch to generating these with a
+ * preprocessor macro.
+*/
+uint32_t
+blob_read_uint32(struct blob_reader *blob)
+{
+ uint32_t ret;
+ int size = sizeof(ret);
+
+ align_blob_reader(blob, size);
+
+ if (! ensure_can_read(blob, size))
+ return 0;
+
+ ret = *((uint32_t*) blob->current);
+
+ blob->current += size;
+
+ return ret;
+}
+
+uint64_t
+blob_read_uint64(struct blob_reader *blob)
+{
+ uint64_t ret;
+ int size = sizeof(ret);
+
+ align_blob_reader(blob, size);
+
+ if (! ensure_can_read(blob, size))
+ return 0;
+
+ ret = *((uint64_t*) blob->current);
+
+ blob->current += size;
+
+ return ret;
+}
+
+intptr_t
+blob_read_intptr(struct blob_reader *blob)
+{
+ intptr_t ret;
+ int size = sizeof(ret);
+
+ align_blob_reader(blob, size);
+
+ if (! ensure_can_read(blob, size))
+ return 0;
+
+ ret = *((intptr_t *) blob->current);
+
+ blob->current += size;
+
+ return ret;
+}
+
+char *
+blob_read_string(struct blob_reader *blob)
+{
+ int size;
+ char *ret;
+ uint8_t *nul;
+
+ /* If we're already at the end, then this is an overrun. */
+ if (blob->current >= blob->end) {
+ blob->overrun = true;
+ return NULL;
+ }
+
+ /* Similarly, if there is no zero byte in the data remaining in this blob,
+ * we also consider that an overrun.
+ */
+ nul = memchr(blob->current, 0, blob->end - blob->current);
+
+ if (nul == NULL) {
+ blob->overrun = true;
+ return NULL;
+ }
+
+ size = nul - blob->current + 1;
+
+ assert(ensure_can_read(blob, size));
+
+ ret = (char *) blob->current;
+
+ blob->current += size;
+
+ return ret;
+}
diff -Nru mesa-17.2.4/src/compiler/blob.h mesa-17.3.3/src/compiler/blob.h
--- mesa-17.2.4/src/compiler/blob.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/blob.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,352 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BLOB_H
+#define BLOB_H
+
+#include
+#include
+#include
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The blob functions implement a simple, low-level API for serializing and
+ * deserializing.
+ *
+ * All objects written to a blob will be serialized directly, (without any
+ * additional meta-data to describe the data written). Therefore, it is the
+ * caller's responsibility to ensure that any data can be read later, (either
+ * by knowing exactly what data is expected, or by writing to the blob
+ * sufficient meta-data to describe what has been written).
+ *
+ * A blob is efficient in that it dynamically grows by doubling in size, so
+ * allocation costs are logarithmic.
+ */
+
+struct blob {
+ /* The data actually written to the blob. */
+ uint8_t *data;
+
+ /** Number of bytes that have been allocated for \c data. */
+ size_t allocated;
+
+ /** The number of bytes that have actual data written to them. */
+ size_t size;
+
+ /** True if \c data a fixed allocation that we cannot resize
+ *
+ * \see blob_init_fixed
+ */
+ bool fixed_allocation;
+
+ /**
+ * True if we've ever failed to realloc or if we go pas the end of a fixed
+ * allocation blob.
+ */
+ bool out_of_memory;
+};
+
+/* When done reading, the caller can ensure that everything was consumed by
+ * checking the following:
+ *
+ * 1. blob->current should be equal to blob->end, (if not, too little was
+ * read).
+ *
+ * 2. blob->overrun should be false, (otherwise, too much was read).
+ */
+struct blob_reader {
+ const uint8_t *data;
+ const uint8_t *end;
+ const uint8_t *current;
+ bool overrun;
+};
+
+/**
+ * Init a new, empty blob.
+ */
+void
+blob_init(struct blob *blob);
+
+/**
+ * Init a new, fixed-size blob.
+ *
+ * A fixed-size blob has a fixed block of data that will not be freed on
+ * blob_finish and will never be grown. If we hit the end, we simply start
+ * returning false from the write functions.
+ *
+ * If a fixed-size blob has a NULL data pointer then the data is written but
+ * it otherwise operates normally. This can be used to determine the size
+ * that will be required to write a given data structure.
+ */
+void
+blob_init_fixed(struct blob *blob, void *data, size_t size);
+
+/**
+ * Finish a blob and free its memory.
+ *
+ * If \blob was initialized with blob_init_fixed, the data pointer is
+ * considered to be owned by the user and will not be freed.
+ */
+static inline void
+blob_finish(struct blob *blob)
+{
+ if (!blob->fixed_allocation)
+ free(blob->data);
+}
+
+/**
+ * Add some unstructured, fixed-size data to a blob.
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write);
+
+/**
+ * Reserve space in \blob for a number of bytes.
+ *
+ * Space will be allocated within the blob for these byes, but the bytes will
+ * be left uninitialized. The caller is expected to use \sa
+ * blob_overwrite_bytes to write to these bytes.
+ *
+ * \return An offset to space allocated within \blob to which \to_write bytes
+ * can be written, (or -1 in case of any allocation error).
+ */
+intptr_t
+blob_reserve_bytes(struct blob *blob, size_t to_write);
+
+/**
+ * Similar to \sa blob_reserve_bytes, but only reserves an uint32_t worth of
+ * space. Note that this must be used if later reading with \sa
+ * blob_read_uint32, since it aligns the offset correctly.
+ */
+intptr_t
+blob_reserve_uint32(struct blob *blob);
+
+/**
+ * Similar to \sa blob_reserve_bytes, but only reserves an intptr_t worth of
+ * space. Note that this must be used if later reading with \sa
+ * blob_read_intptr, since it aligns the offset correctly.
+ */
+intptr_t
+blob_reserve_intptr(struct blob *blob);
+
+/**
+ * Overwrite some data previously written to the blob.
+ *
+ * Writes data to an existing portion of the blob at an offset of \offset.
+ * This data range must have previously been written to the blob by one of the
+ * blob_write_* calls.
+ *
+ * For example usage, see blob_overwrite_uint32
+ *
+ * \return True unless the requested offset or offset+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_bytes(struct blob *blob,
+ size_t offset,
+ const void *bytes,
+ size_t to_write);
+
+/**
+ * Add a uint32_t to a blob.
+ *
+ * \note This function will only write to a uint32_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_uint32(struct blob *blob, uint32_t value);
+
+/**
+ * Overwrite a uint32_t previously written to the blob.
+ *
+ * Writes a uint32_t value to an existing portion of the blob at an offset of
+ * \offset. This data range must have previously been written to the blob by
+ * one of the blob_write_* calls.
+ *
+ *
+ * The expected usage is something like the following pattern:
+ *
+ * size_t offset;
+ *
+ * offset = blob_reserve_uint32(blob);
+ * ... various blob write calls, writing N items ...
+ * blob_overwrite_uint32 (blob, offset, N);
+ *
+ * \return True unless the requested position or position+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_uint32(struct blob *blob,
+ size_t offset,
+ uint32_t value);
+
+/**
+ * Add a uint64_t to a blob.
+ *
+ * \note This function will only write to a uint64_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_uint64(struct blob *blob, uint64_t value);
+
+/**
+ * Add an intptr_t to a blob.
+ *
+ * \note This function will only write to an intptr_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_intptr(struct blob *blob, intptr_t value);
+
+/**
+ * Overwrite an intptr_t previously written to the blob.
+ *
+ * Writes a intptr_t value to an existing portion of the blob at an offset of
+ * \offset. This data range must have previously been written to the blob by
+ * one of the blob_write_* calls.
+ *
+ * For example usage, see blob_overwrite_uint32
+ *
+ * \return True unless the requested position or position+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_intptr(struct blob *blob,
+ size_t offset,
+ intptr_t value);
+
+/**
+ * Add a NULL-terminated string to a blob, (including the NULL terminator).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_string(struct blob *blob, const char *str);
+
+/**
+ * Start reading a blob, (initializing the contents of \blob for reading).
+ *
+ * After this call, the caller can use the various blob_read_* functions to
+ * read elements from the data array.
+ *
+ * For all of the blob_read_* functions, if there is insufficient data
+ * remaining, the functions will do nothing, (perhaps returning default values
+ * such as 0). The caller can detect this by noting that the blob_reader's
+ * current value is unchanged before and after the call.
+ */
+void
+blob_reader_init(struct blob_reader *blob, const void *data, size_t size);
+
+/**
+ * Read some unstructured, fixed-size data from the current location, (and
+ * update the current location to just past this data).
+ *
+ * \note The memory returned belongs to the data underlying the blob reader. The
+ * caller must copy the data in order to use it after the lifetime of the data
+ * underlying the blob reader.
+ *
+ * \return The bytes read (see note above about memory lifetime).
+ */
+const void *
+blob_read_bytes(struct blob_reader *blob, size_t size);
+
+/**
+ * Read some unstructured, fixed-size data from the current location, copying
+ * it to \dest (and update the current location to just past this data)
+ */
+void
+blob_copy_bytes(struct blob_reader *blob, void *dest, size_t size);
+
+/**
+ * Read a uint32_t from the current location, (and update the current location
+ * to just past this uint32_t).
+ *
+ * \note This function will only read from a uint32_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The uint32_t read
+ */
+uint32_t
+blob_read_uint32(struct blob_reader *blob);
+
+/**
+ * Read a uint64_t from the current location, (and update the current location
+ * to just past this uint64_t).
+ *
+ * \note This function will only read from a uint64_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The uint64_t read
+ */
+uint64_t
+blob_read_uint64(struct blob_reader *blob);
+
+/**
+ * Read an intptr_t value from the current location, (and update the
+ * current location to just past this intptr_t).
+ *
+ * \note This function will only read from an intptr_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The intptr_t read
+ */
+intptr_t
+blob_read_intptr(struct blob_reader *blob);
+
+/**
+ * Read a NULL-terminated string from the current location, (and update the
+ * current location to just past this string).
+ *
+ * \note The memory returned belongs to the data underlying the blob reader. The
+ * caller must copy the string in order to use the string after the lifetime
+ * of the data underlying the blob reader.
+ *
+ * \return The string read (see note above about memory lifetime). However, if
+ * there is no NULL byte remaining within the blob, this function returns
+ * NULL.
+ */
+char *
+blob_read_string(struct blob_reader *blob);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BLOB_H */
diff -Nru mesa-17.2.4/src/compiler/glsl/ast_array_index.cpp mesa-17.3.3/src/compiler/glsl/ast_array_index.cpp
--- mesa-17.2.4/src/compiler/glsl/ast_array_index.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ast_array_index.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -88,23 +88,23 @@
if (deref_var != NULL) {
if (deref_var->var->is_interface_instance()) {
- unsigned field_index =
- deref_record->record->type->field_index(deref_record->field);
- assert(field_index < deref_var->var->get_interface_type()->length);
+ unsigned field_idx = deref_record->field_idx;
+ assert(field_idx < deref_var->var->get_interface_type()->length);
int *const max_ifc_array_access =
deref_var->var->get_max_ifc_array_access();
assert(max_ifc_array_access != NULL);
- if (idx > max_ifc_array_access[field_index]) {
- max_ifc_array_access[field_index] = idx;
+ if (idx > max_ifc_array_access[field_idx]) {
+ max_ifc_array_access[field_idx] = idx;
/* Check whether this access will, as a side effect, implicitly
* cause the size of a built-in array to be too large.
*/
- check_builtin_array_max_size(deref_record->field, idx+1, *loc,
- state);
+ const char *field_name =
+ deref_record->record->type->fields.structure[field_idx].name;
+ check_builtin_array_max_size(field_name, idx+1, *loc, state);
}
}
}
@@ -167,7 +167,7 @@
* index is not a constant expression, ensure that the array has a
* declared size.
*/
- ir_constant *const const_index = idx->constant_expression_value();
+ ir_constant *const const_index = idx->constant_expression_value(mem_ctx);
if (const_index != NULL && idx->type->is_integer()) {
const int idx = const_index->value.i[0];
const char *type_name = "error";
diff -Nru mesa-17.2.4/src/compiler/glsl/ast_function.cpp mesa-17.3.3/src/compiler/glsl/ast_function.cpp
--- mesa-17.2.4/src/compiler/glsl/ast_function.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ast_function.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -37,6 +37,7 @@
exec_list *parameters,
struct _mesa_glsl_parse_state *state)
{
+ void *mem_ctx = state;
unsigned count = 0;
foreach_list_typed(ast_node, ast, link, parameters) {
@@ -48,7 +49,9 @@
ast->set_is_lhs(true);
ir_rvalue *result = ast->hir(instructions, state);
- ir_constant *const constant = result->constant_expression_value();
+ ir_constant *const constant =
+ result->constant_expression_value(mem_ctx);
+
if (constant != NULL)
result = constant;
@@ -224,19 +227,28 @@
val = ((ir_swizzle *)val)->val;
}
- while (val->ir_type == ir_type_dereference_array) {
- val = ((ir_dereference_array *)val)->array;
+ for (;;) {
+ if (val->ir_type == ir_type_dereference_array) {
+ val = ((ir_dereference_array *)val)->array;
+ } else if (val->ir_type == ir_type_dereference_record &&
+ !state->es_shader) {
+ val = ((ir_dereference_record *)val)->record;
+ } else
+ break;
}
- if (!val->as_dereference_variable() ||
- val->variable_referenced()->data.mode != ir_var_shader_in) {
+ ir_variable *var = NULL;
+ if (const ir_dereference_variable *deref_var = val->as_dereference_variable())
+ var = deref_var->variable_referenced();
+
+ if (!var || var->data.mode != ir_var_shader_in) {
_mesa_glsl_error(&loc, state,
"parameter `%s` must be a shader input",
formal->name);
return false;
}
- val->variable_referenced()->data.must_be_shader_input = 1;
+ var->data.must_be_shader_input = 1;
}
/* Verify that 'out' and 'inout' actual parameters are lvalues. */
@@ -433,8 +445,7 @@
exec_list *actual_parameters,
ir_variable *sub_var,
ir_rvalue *array_idx,
- struct _mesa_glsl_parse_state *state,
- bool inline_immediately)
+ struct _mesa_glsl_parse_state *state)
{
void *ctx = state;
exec_list post_call_conversions;
@@ -519,7 +530,8 @@
* instructions; just generate an ir_constant.
*/
if (state->is_version(120, 100)) {
- ir_constant *value = sig->constant_expression_value(actual_parameters,
+ ir_constant *value = sig->constant_expression_value(ctx,
+ actual_parameters,
NULL);
if (value != NULL) {
return value;
@@ -546,7 +558,8 @@
ir_call *call = new(ctx) ir_call(sig, deref,
actual_parameters, sub_var, array_idx);
instructions->push_tail(call);
- if (inline_immediately) {
+ if (sig->is_builtin()) {
+ /* inline immediately */
call->generate_inline(call);
call->remove();
}
@@ -663,8 +676,13 @@
ir_variable *sub_var = NULL;
*function_name = array->primary_expression.identifier;
- match_subroutine_by_name(*function_name, actual_parameters,
- state, &sub_var);
+ if (!match_subroutine_by_name(*function_name, actual_parameters,
+ state, &sub_var)) {
+ _mesa_glsl_error(&loc, state, "Unknown subroutine `%s'",
+ *function_name);
+ *function_name = NULL; /* indicate error condition to caller */
+ return NULL;
+ }
ir_rvalue *outer_array_idx = idx->hir(instructions, state);
return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx);
@@ -939,7 +957,7 @@
assert(result->type == desired_type);
/* Try constant folding; it may fold in the conversion we just added. */
- ir_constant *const constant = result->constant_expression_value();
+ ir_constant *const constant = result->constant_expression_value(ctx);
return (constant != NULL) ? (ir_rvalue *) constant : (ir_rvalue *) result;
}
@@ -967,6 +985,7 @@
implicitly_convert_component(ir_rvalue * &from, const glsl_base_type to,
struct _mesa_glsl_parse_state *state)
{
+ void *mem_ctx = state;
ir_rvalue *result = from;
if (to != from->type->base_type) {
@@ -985,7 +1004,7 @@
}
}
- ir_rvalue *const constant = result->constant_expression_value();
+ ir_rvalue *const constant = result->constant_expression_value(mem_ctx);
if (constant != NULL)
result = constant;
@@ -1123,7 +1142,7 @@
if (var->type->is_matrix()) {
ir_rvalue *lhs =
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i));
- assignment = new(ctx) ir_assignment(lhs, rhs, NULL);
+ assignment = new(ctx) ir_assignment(lhs, rhs);
} else {
/* use writemask rather than index for vector */
assert(var->type->is_vector());
@@ -1259,7 +1278,7 @@
ir_rvalue *lhs = new(ctx) ir_dereference_array(var,
new(ctx) ir_constant(i));
- ir_instruction *assignment = new(ctx) ir_assignment(lhs, rhs, NULL);
+ ir_instruction *assignment = new(ctx) ir_assignment(lhs, rhs);
instructions->push_tail(assignment);
i++;
@@ -1272,7 +1291,7 @@
/**
* Determine if a list consists of a single scalar r-value
*/
-bool
+static bool
single_scalar_parameter(exec_list *parameters)
{
const ir_rvalue *const p = (ir_rvalue *) parameters->get_head_raw();
@@ -1293,7 +1312,7 @@
* An \c ir_dereference_variable of the temprorary generated in the constructor
* body.
*/
-ir_rvalue *
+static ir_rvalue *
emit_inline_vector_constructor(const glsl_type *type,
exec_list *instructions,
exec_list *parameters,
@@ -1457,7 +1476,7 @@
* \c src_base + \c count must be less than or equal to the number of
* components in the source vector.
*/
-ir_instruction *
+static ir_instruction *
assign_to_matrix_column(ir_variable *var, unsigned column, unsigned row_base,
ir_rvalue *src, unsigned src_base, unsigned count,
void *mem_ctx)
@@ -1497,7 +1516,7 @@
* An \c ir_dereference_variable of the temprorary generated in the constructor
* body.
*/
-ir_rvalue *
+static ir_rvalue *
emit_inline_matrix_constructor(const glsl_type *type,
exec_list *instructions,
exec_list *parameters,
@@ -1544,8 +1563,7 @@
ir_instruction *inst =
new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
- new(ctx) ir_constant(rhs_var->type, &zero),
- NULL);
+ new(ctx) ir_constant(rhs_var->type, &zero));
instructions->push_tail(inst);
ir_dereference *const rhs_ref =
@@ -1578,7 +1596,7 @@
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i],
type->vector_elements);
- inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
+ inst = new(ctx) ir_assignment(col_ref, rhs);
instructions->push_tail(inst);
}
@@ -1591,7 +1609,7 @@
ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1,
type->vector_elements);
- inst = new(ctx) ir_assignment(col_ref, rhs, NULL);
+ inst = new(ctx) ir_assignment(col_ref, rhs);
instructions->push_tail(inst);
}
} else if (first_param->type->is_matrix()) {
@@ -1645,7 +1663,7 @@
ir_rvalue *const lhs =
new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col));
- ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL);
+ ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs);
instructions->push_tail(inst);
}
}
@@ -1663,7 +1681,7 @@
ir_dereference *const rhs_var_ref =
new(ctx) ir_dereference_variable(rhs_var);
ir_instruction *const inst =
- new(ctx) ir_assignment(rhs_var_ref, first_param, NULL);
+ new(ctx) ir_assignment(rhs_var_ref, first_param);
instructions->push_tail(inst);
const unsigned last_row = MIN2(src_matrix->type->vector_elements,
@@ -1726,7 +1744,7 @@
ir_dereference *rhs_var_ref =
new(ctx) ir_dereference_variable(rhs_var);
- ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL);
+ ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs);
instructions->push_tail(inst);
do {
@@ -1766,7 +1784,7 @@
}
-ir_rvalue *
+static ir_rvalue *
emit_inline_record_constructor(const glsl_type *type,
exec_list *instructions,
exec_list *parameters,
@@ -1790,8 +1808,7 @@
ir_rvalue *const rhs = ((ir_instruction *) node)->as_rvalue();
assert(rhs != NULL);
- ir_instruction *const assign =
- new(mem_ctx) ir_assignment(lhs, rhs, NULL);
+ ir_instruction *const assign = new(mem_ctx) ir_assignment(lhs, rhs);
instructions->push_tail(assign);
node = node->next;
@@ -2153,8 +2170,8 @@
instructions->push_tail(var);
instructions->push_tail(
new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var),
- matrix, NULL));
- var->constant_value = matrix->constant_expression_value();
+ matrix));
+ var->constant_value = matrix->constant_expression_value(ctx);
/* Replace the matrix with dereferences of its columns. */
for (int i = 0; i < matrix->type->matrix_columns; i++) {
@@ -2221,7 +2238,7 @@
* After doing so, track whether or not all the parameters to the
* constructor are trivially constant valued expressions.
*/
- ir_rvalue *const constant = result->constant_expression_value();
+ ir_rvalue *const constant = result->constant_expression_value(ctx);
if (constant != NULL)
result = constant;
@@ -2331,7 +2348,7 @@
}
value = generate_call(instructions, sig, &actual_parameters, sub_var,
- array_idx, state, sig->is_builtin());
+ array_idx, state);
if (!value) {
ir_variable *const tmp = new(ctx) ir_variable(glsl_type::void_type,
"void_var",
diff -Nru mesa-17.2.4/src/compiler/glsl/ast_to_hir.cpp mesa-17.3.3/src/compiler/glsl/ast_to_hir.cpp
--- mesa-17.2.4/src/compiler/glsl/ast_to_hir.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ast_to_hir.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -1182,7 +1182,7 @@
* scalar booleans. If it isn't, emit an error and return a constant
* boolean to avoid triggering cascading error messages.
*/
-ir_rvalue *
+static ir_rvalue *
get_scalar_boolean_operand(exec_list *instructions,
struct _mesa_glsl_parse_state *state,
ast_expression *parent_expr,
@@ -1845,7 +1845,7 @@
error_emitted = true;
}
- ir_constant *cond_val = op[0]->constant_expression_value();
+ ir_constant *cond_val = op[0]->constant_expression_value(ctx);
if (then_instructions.is_empty()
&& else_instructions.is_empty()
@@ -2229,6 +2229,8 @@
process_array_size(exec_node *node,
struct _mesa_glsl_parse_state *state)
{
+ void *mem_ctx = state;
+
exec_list dummy_instructions;
ast_node *array_size = exec_node_data(ast_node, node, link);
@@ -2261,7 +2263,7 @@
return 0;
}
- ir_constant *const size = ir->constant_expression_value();
+ ir_constant *const size = ir->constant_expression_value(mem_ctx);
if (size == NULL ||
(state->is_version(120, 300) &&
array_size->has_sequence_subexpression())) {
@@ -3124,17 +3126,6 @@
interpolation = INTERP_MODE_NOPERSPECTIVE;
else if (qual->flags.q.smooth)
interpolation = INTERP_MODE_SMOOTH;
- else if (state->es_shader &&
- ((mode == ir_var_shader_in &&
- state->stage != MESA_SHADER_VERTEX) ||
- (mode == ir_var_shader_out &&
- state->stage != MESA_SHADER_FRAGMENT)))
- /* Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
- *
- * "When no interpolation qualifier is present, smooth interpolation
- * is used."
- */
- interpolation = INTERP_MODE_SMOOTH;
else
interpolation = INTERP_MODE_NONE;
@@ -4164,11 +4155,13 @@
* otherwise.
*/
static ir_variable *
-get_variable_being_redeclared(ir_variable *var, YYLTYPE loc,
+get_variable_being_redeclared(ir_variable **var_ptr, YYLTYPE loc,
struct _mesa_glsl_parse_state *state,
bool allow_all_redeclarations,
bool *is_redeclaration)
{
+ ir_variable *var = *var_ptr;
+
/* Check if this declaration is actually a re-declaration, either to
* resize an array or add qualifiers to an existing variable.
*
@@ -4209,6 +4202,7 @@
earlier->type = var->type;
delete var;
var = NULL;
+ *var_ptr = NULL;
} else if ((state->ARB_fragment_coord_conventions_enable ||
state->is_version(150, 0))
&& strcmp(var->name, "gl_FragCoord") == 0
@@ -4320,12 +4314,13 @@
/**
* Generate the IR for an initializer in a variable declaration
*/
-ir_rvalue *
+static ir_rvalue *
process_initializer(ir_variable *var, ast_declaration *decl,
ast_fully_specified_type *type,
exec_list *initializer_instructions,
struct _mesa_glsl_parse_state *state)
{
+ void *mem_ctx = state;
ir_rvalue *result = NULL;
YYLTYPE initializer_loc = decl->initializer->get_location();
@@ -4460,7 +4455,9 @@
* GLSL ES 3.00.4 spec. This is a new limitation for these GLSL
* versions.
*/
- ir_constant *constant_value = rhs->constant_expression_value();
+ ir_constant *constant_value =
+ rhs->constant_expression_value(mem_ctx);
+
if (!constant_value ||
(state->is_version(430, 300) &&
decl->initializer->has_sequence_subexpression())) {
@@ -4520,7 +4517,7 @@
} else
initializer_type = rhs->type;
- var->constant_initializer = rhs->constant_expression_value();
+ var->constant_initializer = rhs->constant_expression_value(mem_ctx);
var->data.has_initializer = true;
/* If the declared variable is an unsized array, it must inherrit
@@ -4714,7 +4711,7 @@
"geometry shader input");
}
-void
+static void
validate_identifier(const char *identifier, YYLTYPE loc,
struct _mesa_glsl_parse_state *state)
{
@@ -5252,7 +5249,7 @@
if (var->type->fields.structure[i].type->is_array() ||
var->type->fields.structure[i].type->is_record())
_mesa_glsl_error(&loc, state,
- "fragement shader input cannot have "
+ "fragment shader input cannot have "
"a struct that contains an "
"array or struct");
}
@@ -5451,22 +5448,21 @@
bool var_is_gl_id = is_gl_identifier(var->name);
bool is_redeclaration;
- ir_variable *declared_var =
- get_variable_being_redeclared(var, decl->get_location(), state,
- false /* allow_all_redeclarations */,
- &is_redeclaration);
+ var = get_variable_being_redeclared(&var, decl->get_location(), state,
+ false /* allow_all_redeclarations */,
+ &is_redeclaration);
if (is_redeclaration) {
if (var_is_gl_id &&
- declared_var->data.how_declared == ir_var_declared_in_block) {
+ var->data.how_declared == ir_var_declared_in_block) {
_mesa_glsl_error(&loc, state,
"`%s' has already been redeclared using "
- "gl_PerVertex", declared_var->name);
+ "gl_PerVertex", var->name);
}
- declared_var->data.how_declared = ir_var_declared_normally;
+ var->data.how_declared = ir_var_declared_normally;
}
if (decl->initializer != NULL) {
- result = process_initializer(declared_var,
+ result = process_initializer(var,
decl, this->type,
&initializer_instructions, state);
} else {
@@ -5486,7 +5482,7 @@
}
if (state->es_shader) {
- const glsl_type *const t = declared_var->type;
+ const glsl_type *const t = var->type;
/* Skip the unsized array check for TCS/TES/GS inputs & TCS outputs.
*
@@ -5508,10 +5504,10 @@
* present, as per the following table."
*/
const bool implicitly_sized =
- (declared_var->data.mode == ir_var_shader_in &&
+ (var->data.mode == ir_var_shader_in &&
state->stage >= MESA_SHADER_TESS_CTRL &&
state->stage <= MESA_SHADER_GEOMETRY) ||
- (declared_var->data.mode == ir_var_shader_out &&
+ (var->data.mode == ir_var_shader_out &&
state->stage == MESA_SHADER_TESS_CTRL);
if (t->is_unsized_array() && !implicitly_sized)
@@ -5537,6 +5533,17 @@
"GLSL ES");
}
+ /* Section 4.4.6.1 Atomic Counter Layout Qualifiers of the GLSL 4.60 spec:
+ *
+ * "It is a compile-time error to declare an unsized array of
+ * atomic_uint"
+ */
+ if (var->type->is_unsized_array() &&
+ var->type->without_array()->base_type == GLSL_TYPE_ATOMIC_UINT) {
+ _mesa_glsl_error(& loc, state,
+ "Unsized array of atomic_uint is not allowed");
+ }
+
/* If the declaration is not a redeclaration, there are a few additional
* semantic checks that must be applied. In addition, variable that was
* created for the declaration should be added to the IR stream.
@@ -5556,7 +5563,7 @@
* after the initializer if present or immediately after the name
* being declared if not."
*/
- if (!state->symbols->add_variable(declared_var)) {
+ if (!state->symbols->add_variable(var)) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(&loc, state, "name `%s' already taken in the "
"current scope", decl->identifier);
@@ -5569,7 +5576,7 @@
* global var is decled, then the function is defined with usage of
* the global var. See glslparsertest's CorrectModule.frag.
*/
- instructions->push_head(declared_var);
+ instructions->push_head(var);
}
instructions->append_list(&initializer_instructions);
@@ -6358,13 +6365,28 @@
}
+struct case_label {
+ /** Value of the case label. */
+ unsigned value;
+
+ /** Does this label occur after the default? */
+ bool after_default;
+
+ /**
+ * AST for the case label.
+ *
+ * This is only used to generate error messages for duplicate labels.
+ */
+ ast_expression *ast;
+};
+
/* Used for detection of duplicate case values, compare
* given contents directly.
*/
static bool
compare_case_value(const void *a, const void *b)
{
- return *(unsigned *) a == *(unsigned *) b;
+ return ((struct case_label *) a)->value == ((struct case_label *) b)->value;
}
@@ -6374,7 +6396,7 @@
static unsigned
key_contents(const void *key)
{
- return *(unsigned *) key;
+ return ((struct case_label *) key)->value;
}
@@ -6400,6 +6422,7 @@
state,
"switch-statement expression must be scalar "
"integer");
+ return NULL;
}
/* Track the switch-statement nesting in a stack-like manner.
@@ -6556,44 +6579,35 @@
* if default should be chosen or not.
*/
if (!default_case.is_empty()) {
+ struct hash_entry *entry;
+ ir_factory body(instructions, state);
- ir_rvalue *const true_val = new (state) ir_constant(true);
- ir_dereference_variable *deref_run_default_var =
- new(state) ir_dereference_variable(state->switch_state.run_default);
-
- /* Choose to run default case initially, following conditional
- * assignments might change this.
- */
- ir_assignment *const init_var =
- new(state) ir_assignment(deref_run_default_var, true_val);
- instructions->push_tail(init_var);
-
- /* Default case was the last one, no checks required. */
- if (after_default.is_empty()) {
- instructions->append_list(&default_case);
- return NULL;
- }
-
- foreach_in_list(ir_instruction, ir, &after_default) {
- ir_assignment *assign = ir->as_assignment();
-
- if (!assign)
- continue;
+ ir_expression *cmp = NULL;
- /* Clone the check between case label and init expression. */
- ir_expression *exp = (ir_expression*) assign->condition;
- ir_expression *clone = exp->clone(state, NULL);
-
- ir_dereference_variable *deref_var =
- new(state) ir_dereference_variable(state->switch_state.run_default);
- ir_rvalue *const false_val = new (state) ir_constant(false);
+ hash_table_foreach(state->switch_state.labels_ht, entry) {
+ const struct case_label *const l = (struct case_label *) entry->data;
- ir_assignment *const set_false =
- new(state) ir_assignment(deref_var, false_val, clone);
+ /* If the switch init-value is the value of one of the labels that
+ * occurs after the default case, disable execution of the default
+ * case.
+ */
+ if (l->after_default) {
+ ir_constant *const cnst =
+ state->switch_state.test_var->type->base_type == GLSL_TYPE_UINT
+ ? body.constant(unsigned(l->value))
+ : body.constant(int(l->value));
- instructions->push_tail(set_false);
+ cmp = cmp == NULL
+ ? equal(cnst, state->switch_state.test_var)
+ : logic_or(cmp, equal(cnst, state->switch_state.test_var));
+ }
}
+ if (cmp != NULL)
+ body.emit(assign(state->switch_state.run_default, logic_not(cmp)));
+ else
+ body.emit(assign(state->switch_state.run_default, body.constant(true)));
+
/* Append default case and all cases after it. */
instructions->append_list(&default_case);
instructions->append_list(&after_default);
@@ -6639,12 +6653,9 @@
ast_case_label::hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state)
{
- void *ctx = state;
+ ir_factory body(instructions, state);
- ir_dereference_variable *deref_fallthru_var =
- new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var);
-
- ir_rvalue *const true_val = new(ctx) ir_constant(true);
+ ir_variable *const fallthru_var = state->switch_state.is_fallthru_var;
/* If not default case, ... */
if (this->test_value != NULL) {
@@ -6652,7 +6663,8 @@
* comparison of cached test expression value to case label.
*/
ir_rvalue *const label_rval = this->test_value->hir(instructions, state);
- ir_constant *label_const = label_rval->constant_expression_value();
+ ir_constant *label_const =
+ label_rval->constant_expression_value(body.mem_ctx);
if (!label_const) {
YYLTYPE loc = this->test_value->get_location();
@@ -6662,32 +6674,44 @@
"constant expression");
/* Stuff a dummy value in to allow processing to continue. */
- label_const = new(ctx) ir_constant(0);
+ label_const = body.constant(0);
} else {
hash_entry *entry =
_mesa_hash_table_search(state->switch_state.labels_ht,
- (void *)(uintptr_t)&label_const->value.u[0]);
+ &label_const->value.u[0]);
if (entry) {
- ast_expression *previous_label = (ast_expression *) entry->data;
+ const struct case_label *const l =
+ (struct case_label *) entry->data;
+ const ast_expression *const previous_label = l->ast;
YYLTYPE loc = this->test_value->get_location();
+
_mesa_glsl_error(& loc, state, "duplicate case value");
loc = previous_label->get_location();
_mesa_glsl_error(& loc, state, "this is the previous case label");
} else {
+ struct case_label *l = ralloc(state->switch_state.labels_ht,
+ struct case_label);
+
+ l->value = label_const->value.u[0];
+ l->after_default = state->switch_state.previous_default != NULL;
+ l->ast = this->test_value;
+
_mesa_hash_table_insert(state->switch_state.labels_ht,
- (void *)(uintptr_t)&label_const->value.u[0],
- this->test_value);
+ &label_const->value.u[0],
+ l);
}
}
- ir_dereference_variable *deref_test_var =
- new(ctx) ir_dereference_variable(state->switch_state.test_var);
+ /* Create an r-value version of the ir_constant label here (after we may
+ * have created a fake one in error cases) that can be passed to
+ * apply_implicit_conversion below.
+ */
+ ir_rvalue *label = label_const;
- ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal,
- label_const,
- deref_test_var);
+ ir_rvalue *deref_test_var =
+ new(body.mem_ctx) ir_dereference_variable(state->switch_state.test_var);
/*
* From GLSL 4.40 specification section 6.2 ("Selection"):
@@ -6700,10 +6724,10 @@
* uint (see section 4.1.10 “Implicit Conversions”) before the compare
* is done."
*/
- if (label_const->type != state->switch_state.test_var->type) {
+ if (label->type != state->switch_state.test_var->type) {
YYLTYPE loc = this->test_value->get_location();
- const glsl_type *type_a = label_const->type;
+ const glsl_type *type_a = label->type;
const glsl_type *type_b = state->switch_state.test_var->type;
/* Check if int->uint implicit conversion is supported. */
@@ -6720,21 +6744,26 @@
/* Conversion of the case label. */
if (type_a->base_type == GLSL_TYPE_INT) {
if (!apply_implicit_conversion(glsl_type::uint_type,
- test_cond->operands[0], state))
+ label, state))
_mesa_glsl_error(&loc, state, "implicit type conversion error");
} else {
/* Conversion of the init-expression value. */
if (!apply_implicit_conversion(glsl_type::uint_type,
- test_cond->operands[1], state))
+ deref_test_var, state))
_mesa_glsl_error(&loc, state, "implicit type conversion error");
}
}
- }
- ir_assignment *set_fallthru_on_test =
- new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond);
+ /* If the implicit conversion was allowed, the types will already be
+ * the same. If the implicit conversion wasn't allowed, smash the
+ * type of the label anyway. This will prevent the expression
+ * constructor (below) from failing an assertion.
+ */
+ label->type = deref_test_var->type;
+ }
- instructions->push_tail(set_fallthru_on_test);
+ body.emit(assign(fallthru_var,
+ logic_or(fallthru_var, equal(label, deref_test_var))));
} else { /* default case */
if (state->switch_state.previous_default) {
YYLTYPE loc = this->get_location();
@@ -6747,18 +6776,9 @@
state->switch_state.previous_default = this;
/* Set fallthru condition on 'run_default' bool. */
- ir_dereference_variable *deref_run_default =
- new(ctx) ir_dereference_variable(state->switch_state.run_default);
- ir_rvalue *const cond_true = new(ctx) ir_constant(true);
- ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal,
- cond_true,
- deref_run_default);
-
- /* Set falltrhu state. */
- ir_assignment *set_fallthru =
- new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond);
-
- instructions->push_tail(set_fallthru);
+ body.emit(assign(fallthru_var,
+ logic_or(fallthru_var,
+ state->switch_state.run_default)));
}
/* Case statements do not have r-values. */
@@ -7236,11 +7256,6 @@
validate_matrix_layout_for_type(state, &loc, decl_type, NULL);
}
- if (qual->flags.q.read_only && qual->flags.q.write_only) {
- _mesa_glsl_error(&loc, state, "buffer variable can't be both "
- "readonly and writeonly.");
- }
-
foreach_list_typed (ast_declaration, decl, link,
&decl_list->declarations) {
YYLTYPE loc = decl->get_location();
@@ -7372,14 +7387,13 @@
qual->offset, &xfb_offset)) {
fields[i].offset = xfb_offset;
block_xfb_offset = fields[i].offset +
- MAX2(xfb_stride, (int) (4 * field_type->component_slots()));
+ 4 * field_type->component_slots();
}
} else {
if (layout && layout->flags.q.explicit_xfb_offset) {
unsigned align = field_type->is_64bit() ? 8 : 4;
fields[i].offset = glsl_align(block_xfb_offset, align);
- block_xfb_offset +=
- MAX2(xfb_stride, (int) (4 * field_type->component_slots()));
+ block_xfb_offset += 4 * field_type->component_slots();
}
}
@@ -7417,12 +7431,9 @@
/* For readonly and writeonly qualifiers the field definition,
* if set, overwrites the layout qualifier.
*/
- if (qual->flags.q.read_only) {
- fields[i].memory_read_only = true;
- fields[i].memory_write_only = false;
- } else if (qual->flags.q.write_only) {
- fields[i].memory_read_only = false;
- fields[i].memory_write_only = true;
+ if (qual->flags.q.read_only || qual->flags.q.write_only) {
+ fields[i].memory_read_only = qual->flags.q.read_only;
+ fields[i].memory_write_only = qual->flags.q.write_only;
} else {
fields[i].memory_read_only =
layout ? layout->flags.q.read_only : 0;
@@ -8199,21 +8210,21 @@
if (redeclaring_per_vertex) {
bool is_redeclaration;
- ir_variable *declared_var =
- get_variable_being_redeclared(var, loc, state,
+ var =
+ get_variable_being_redeclared(&var, loc, state,
true /* allow_all_redeclarations */,
&is_redeclaration);
if (!var_is_gl_id || !is_redeclaration) {
_mesa_glsl_error(&loc, state,
"redeclaration of gl_PerVertex can only "
"include built-in variables");
- } else if (declared_var->data.how_declared == ir_var_declared_normally) {
+ } else if (var->data.how_declared == ir_var_declared_normally) {
_mesa_glsl_error(&loc, state,
"`%s' has already been redeclared",
- declared_var->name);
+ var->name);
} else {
- declared_var->data.how_declared = ir_var_declared_in_block;
- declared_var->reinit_interface_type(block_type);
+ var->data.how_declared = ir_var_declared_in_block;
+ var->reinit_interface_type(block_type);
}
continue;
}
diff -Nru mesa-17.2.4/src/compiler/glsl/ast_type.cpp mesa-17.3.3/src/compiler/glsl/ast_type.cpp
--- mesa-17.2.4/src/compiler/glsl/ast_type.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ast_type.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -186,10 +186,7 @@
}
static void
-merge_bindless_qualifier(YYLTYPE *loc,
- _mesa_glsl_parse_state *state,
- const ast_type_qualifier &qualifier,
- const ast_type_qualifier &new_qualifier)
+merge_bindless_qualifier(_mesa_glsl_parse_state *state)
{
if (state->default_uniform_qualifier->flags.q.bindless_sampler) {
state->bindless_sampler_specified = true;
@@ -484,7 +481,7 @@
q.flags.q.bindless_image ||
q.flags.q.bound_sampler ||
q.flags.q.bound_image)
- merge_bindless_qualifier(loc, state, *this, q);
+ merge_bindless_qualifier(state);
return r;
}
@@ -866,7 +863,9 @@
ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
- ir_constant *const const_int = ir->constant_expression_value();
+ ir_constant *const const_int =
+ ir->constant_expression_value(ralloc_parent(ir));
+
if (const_int == NULL || !const_int->type->is_integer()) {
YYLTYPE loc = const_expression->get_location();
_mesa_glsl_error(&loc, state, "%s must be an integral constant "
@@ -921,7 +920,8 @@
ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
- ir_constant *const const_int = ir->constant_expression_value();
+ ir_constant *const const_int =
+ ir->constant_expression_value(ralloc_parent(ir));
if (const_int == NULL || !const_int->type->is_integer()) {
_mesa_glsl_error(loc, state, "%s must be an integral constant "
"expression", qual_indentifier);
diff -Nru mesa-17.2.4/src/compiler/glsl/blob.c mesa-17.3.3/src/compiler/glsl/blob.c
--- mesa-17.2.4/src/compiler/glsl/blob.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/blob.c 1970-01-01 00:00:00.000000000 +0000
@@ -1,344 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include
-
-#include "main/macros.h"
-#include "blob.h"
-
-#ifdef HAVE_VALGRIND
-#include
-#include
-#define VG(x) x
-#else
-#define VG(x)
-#endif
-
-#define BLOB_INITIAL_SIZE 4096
-
-/* Ensure that \blob will be able to fit an additional object of size
- * \additional. The growing (if any) will occur by doubling the existing
- * allocation.
- */
-static bool
-grow_to_fit(struct blob *blob, size_t additional)
-{
- size_t to_allocate;
- uint8_t *new_data;
-
- if (blob->out_of_memory)
- return false;
-
- if (blob->size + additional <= blob->allocated)
- return true;
-
- if (blob->allocated == 0)
- to_allocate = BLOB_INITIAL_SIZE;
- else
- to_allocate = blob->allocated * 2;
-
- to_allocate = MAX2(to_allocate, blob->allocated + additional);
-
- new_data = realloc(blob->data, to_allocate);
- if (new_data == NULL) {
- blob->out_of_memory = true;
- return false;
- }
-
- blob->data = new_data;
- blob->allocated = to_allocate;
-
- return true;
-}
-
-/* Align the blob->size so that reading or writing a value at (blob->data +
- * blob->size) will result in an access aligned to a granularity of \alignment
- * bytes.
- *
- * \return True unless allocation fails
- */
-static bool
-align_blob(struct blob *blob, size_t alignment)
-{
- const size_t new_size = ALIGN(blob->size, alignment);
-
- if (blob->size < new_size) {
- if (!grow_to_fit(blob, new_size - blob->size))
- return false;
-
- memset(blob->data + blob->size, 0, new_size - blob->size);
- blob->size = new_size;
- }
-
- return true;
-}
-
-static void
-align_blob_reader(struct blob_reader *blob, size_t alignment)
-{
- blob->current = blob->data + ALIGN(blob->current - blob->data, alignment);
-}
-
-struct blob *
-blob_create()
-{
- struct blob *blob = (struct blob *) malloc(sizeof(struct blob));
- if (blob == NULL)
- return NULL;
-
- blob->data = NULL;
- blob->allocated = 0;
- blob->size = 0;
- blob->out_of_memory = false;
-
- return blob;
-}
-
-bool
-blob_overwrite_bytes(struct blob *blob,
- size_t offset,
- const void *bytes,
- size_t to_write)
-{
- /* Detect an attempt to overwrite data out of bounds. */
- if (blob->size < offset + to_write)
- return false;
-
- VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write));
-
- memcpy(blob->data + offset, bytes, to_write);
-
- return true;
-}
-
-bool
-blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write)
-{
- if (! grow_to_fit(blob, to_write))
- return false;
-
- VG(VALGRIND_CHECK_MEM_IS_DEFINED(bytes, to_write));
-
- memcpy(blob->data + blob->size, bytes, to_write);
- blob->size += to_write;
-
- return true;
-}
-
-uint8_t *
-blob_reserve_bytes(struct blob *blob, size_t to_write)
-{
- uint8_t *ret;
-
- if (! grow_to_fit (blob, to_write))
- return NULL;
-
- ret = blob->data + blob->size;
- blob->size += to_write;
-
- return ret;
-}
-
-bool
-blob_write_uint32(struct blob *blob, uint32_t value)
-{
- align_blob(blob, sizeof(value));
-
- return blob_write_bytes(blob, &value, sizeof(value));
-}
-
-bool
-blob_overwrite_uint32 (struct blob *blob,
- size_t offset,
- uint32_t value)
-{
- return blob_overwrite_bytes(blob, offset, &value, sizeof(value));
-}
-
-bool
-blob_write_uint64(struct blob *blob, uint64_t value)
-{
- align_blob(blob, sizeof(value));
-
- return blob_write_bytes(blob, &value, sizeof(value));
-}
-
-bool
-blob_write_intptr(struct blob *blob, intptr_t value)
-{
- align_blob(blob, sizeof(value));
-
- return blob_write_bytes(blob, &value, sizeof(value));
-}
-
-bool
-blob_write_string(struct blob *blob, const char *str)
-{
- return blob_write_bytes(blob, str, strlen(str) + 1);
-}
-
-void
-blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size)
-{
- blob->data = data;
- blob->end = data + size;
- blob->current = data;
- blob->overrun = false;
-}
-
-/* Check that an object of size \size can be read from this blob.
- *
- * If not, set blob->overrun to indicate that we attempted to read too far.
- */
-static bool
-ensure_can_read(struct blob_reader *blob, size_t size)
-{
- if (blob->overrun)
- return false;
-
- if (blob->current < blob->end && blob->end - blob->current >= size)
- return true;
-
- blob->overrun = true;
-
- return false;
-}
-
-void *
-blob_read_bytes(struct blob_reader *blob, size_t size)
-{
- void *ret;
-
- if (! ensure_can_read (blob, size))
- return NULL;
-
- ret = blob->current;
-
- blob->current += size;
-
- return ret;
-}
-
-void
-blob_copy_bytes(struct blob_reader *blob, uint8_t *dest, size_t size)
-{
- uint8_t *bytes;
-
- bytes = blob_read_bytes(blob, size);
- if (bytes == NULL)
- return;
-
- memcpy(dest, bytes, size);
-}
-
-/* These next three read functions have identical form. If we add any beyond
- * these first three we should probably switch to generating these with a
- * preprocessor macro.
-*/
-uint32_t
-blob_read_uint32(struct blob_reader *blob)
-{
- uint32_t ret;
- int size = sizeof(ret);
-
- align_blob_reader(blob, size);
-
- if (! ensure_can_read(blob, size))
- return 0;
-
- ret = *((uint32_t*) blob->current);
-
- blob->current += size;
-
- return ret;
-}
-
-uint64_t
-blob_read_uint64(struct blob_reader *blob)
-{
- uint64_t ret;
- int size = sizeof(ret);
-
- align_blob_reader(blob, size);
-
- if (! ensure_can_read(blob, size))
- return 0;
-
- ret = *((uint64_t*) blob->current);
-
- blob->current += size;
-
- return ret;
-}
-
-intptr_t
-blob_read_intptr(struct blob_reader *blob)
-{
- intptr_t ret;
- int size = sizeof(ret);
-
- align_blob_reader(blob, size);
-
- if (! ensure_can_read(blob, size))
- return 0;
-
- ret = *((intptr_t *) blob->current);
-
- blob->current += size;
-
- return ret;
-}
-
-char *
-blob_read_string(struct blob_reader *blob)
-{
- int size;
- char *ret;
- uint8_t *nul;
-
- /* If we're already at the end, then this is an overrun. */
- if (blob->current >= blob->end) {
- blob->overrun = true;
- return NULL;
- }
-
- /* Similarly, if there is no zero byte in the data remaining in this blob,
- * we also consider that an overrun.
- */
- nul = memchr(blob->current, 0, blob->end - blob->current);
-
- if (nul == NULL) {
- blob->overrun = true;
- return NULL;
- }
-
- size = nul - blob->current + 1;
-
- assert(ensure_can_read(blob, size));
-
- ret = (char *) blob->current;
-
- blob->current += size;
-
- return ret;
-}
diff -Nru mesa-17.2.4/src/compiler/glsl/blob.h mesa-17.3.3/src/compiler/glsl/blob.h
--- mesa-17.2.4/src/compiler/glsl/blob.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/blob.h 1970-01-01 00:00:00.000000000 +0000
@@ -1,307 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef BLOB_H
-#define BLOB_H
-
-#include
-#include
-#include
-#include
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* The blob functions implement a simple, low-level API for serializing and
- * deserializing.
- *
- * All objects written to a blob will be serialized directly, (without any
- * additional meta-data to describe the data written). Therefore, it is the
- * caller's responsibility to ensure that any data can be read later, (either
- * by knowing exactly what data is expected, or by writing to the blob
- * sufficient meta-data to describe what has been written).
- *
- * A blob is efficient in that it dynamically grows by doubling in size, so
- * allocation costs are logarithmic.
- */
-
-struct blob {
- /* The data actually written to the blob. */
- uint8_t *data;
-
- /** Number of bytes that have been allocated for \c data. */
- size_t allocated;
-
- /** The number of bytes that have actual data written to them. */
- size_t size;
-
- /**
- * True if we've ever failed to realloc or if we go pas the end of a fixed
- * allocation blob.
- */
- bool out_of_memory;
-};
-
-/* When done reading, the caller can ensure that everything was consumed by
- * checking the following:
- *
- * 1. blob->current should be equal to blob->end, (if not, too little was
- * read).
- *
- * 2. blob->overrun should be false, (otherwise, too much was read).
- */
-struct blob_reader {
- uint8_t *data;
- uint8_t *end;
- uint8_t *current;
- bool overrun;
-};
-
-/**
- * Create a new, empty blob.
- *
- * \return The new blob, (or NULL in case of allocation failure).
- */
-struct blob *
-blob_create(void);
-
-/**
- * Destroy a blob and free its memory.
- */
-static inline void
-blob_destroy(struct blob *blob)
-{
- free(blob->data);
- free(blob);
-}
-
-/**
- * Add some unstructured, fixed-size data to a blob.
- *
- * \return True unless allocation failed.
- */
-bool
-blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write);
-
-/**
- * Reserve space in \blob for a number of bytes.
- *
- * Space will be allocated within the blob for these byes, but the bytes will
- * be left uninitialized. The caller is expected to use the return value to
- * write directly (and immediately) to these bytes.
- *
- * \note The return value is valid immediately upon return, but can be
- * invalidated by any other call to a blob function. So the caller should call
- * blob_reserve_byes immediately before writing through the returned pointer.
- *
- * This function is intended to be used when interfacing with an existing API
- * that is not aware of the blob API, (so that blob_write_bytes cannot be
- * called).
- *
- * \return A pointer to space allocated within \blob to which \to_write bytes
- * can be written, (or NULL in case of any allocation error).
- */
-uint8_t *
-blob_reserve_bytes(struct blob *blob, size_t to_write);
-
-/**
- * Overwrite some data previously written to the blob.
- *
- * Writes data to an existing portion of the blob at an offset of \offset.
- * This data range must have previously been written to the blob by one of the
- * blob_write_* calls.
- *
- * For example usage, see blob_overwrite_uint32
- *
- * \return True unless the requested offset or offset+to_write lie outside
- * the current blob's size.
- */
-bool
-blob_overwrite_bytes(struct blob *blob,
- size_t offset,
- const void *bytes,
- size_t to_write);
-
-/**
- * Add a uint32_t to a blob.
- *
- * \note This function will only write to a uint32_t-aligned offset from the
- * beginning of the blob's data, so some padding bytes may be added to the
- * blob if this write follows some unaligned write (such as
- * blob_write_string).
- *
- * \return True unless allocation failed.
- */
-bool
-blob_write_uint32(struct blob *blob, uint32_t value);
-
-/**
- * Overwrite a uint32_t previously written to the blob.
- *
- * Writes a uint32_t value to an existing portion of the blob at an offset of
- * \offset. This data range must have previously been written to the blob by
- * one of the blob_write_* calls.
- *
- *
- * The expected usage is something like the following pattern:
- *
- * size_t offset;
- *
- * offset = blob->size;
- * blob_write_uint32 (blob, 0); // placeholder
- * ... various blob write calls, writing N items ...
- * blob_overwrite_uint32 (blob, offset, N);
- *
- * \return True unless the requested position or position+to_write lie outside
- * the current blob's size.
- */
-bool
-blob_overwrite_uint32(struct blob *blob,
- size_t offset,
- uint32_t value);
-
-/**
- * Add a uint64_t to a blob.
- *
- * \note This function will only write to a uint64_t-aligned offset from the
- * beginning of the blob's data, so some padding bytes may be added to the
- * blob if this write follows some unaligned write (such as
- * blob_write_string).
- *
- * \return True unless allocation failed.
- */
-bool
-blob_write_uint64(struct blob *blob, uint64_t value);
-
-/**
- * Add an intptr_t to a blob.
- *
- * \note This function will only write to an intptr_t-aligned offset from the
- * beginning of the blob's data, so some padding bytes may be added to the
- * blob if this write follows some unaligned write (such as
- * blob_write_string).
- *
- * \return True unless allocation failed.
- */
-bool
-blob_write_intptr(struct blob *blob, intptr_t value);
-
-/**
- * Add a NULL-terminated string to a blob, (including the NULL terminator).
- *
- * \return True unless allocation failed.
- */
-bool
-blob_write_string(struct blob *blob, const char *str);
-
-/**
- * Start reading a blob, (initializing the contents of \blob for reading).
- *
- * After this call, the caller can use the various blob_read_* functions to
- * read elements from the data array.
- *
- * For all of the blob_read_* functions, if there is insufficient data
- * remaining, the functions will do nothing, (perhaps returning default values
- * such as 0). The caller can detect this by noting that the blob_reader's
- * current value is unchanged before and after the call.
- */
-void
-blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size);
-
-/**
- * Read some unstructured, fixed-size data from the current location, (and
- * update the current location to just past this data).
- *
- * \note The memory returned belongs to the data underlying the blob reader. The
- * caller must copy the data in order to use it after the lifetime of the data
- * underlying the blob reader.
- *
- * \return The bytes read (see note above about memory lifetime).
- */
-void *
-blob_read_bytes(struct blob_reader *blob, size_t size);
-
-/**
- * Read some unstructured, fixed-size data from the current location, copying
- * it to \dest (and update the current location to just past this data)
- */
-void
-blob_copy_bytes(struct blob_reader *blob, uint8_t *dest, size_t size);
-
-/**
- * Read a uint32_t from the current location, (and update the current location
- * to just past this uint32_t).
- *
- * \note This function will only read from a uint32_t-aligned offset from the
- * beginning of the blob's data, so some padding bytes may be skipped.
- *
- * \return The uint32_t read
- */
-uint32_t
-blob_read_uint32(struct blob_reader *blob);
-
-/**
- * Read a uint64_t from the current location, (and update the current location
- * to just past this uint64_t).
- *
- * \note This function will only read from a uint64_t-aligned offset from the
- * beginning of the blob's data, so some padding bytes may be skipped.
- *
- * \return The uint64_t read
- */
-uint64_t
-blob_read_uint64(struct blob_reader *blob);
-
-/**
- * Read an intptr_t value from the current location, (and update the
- * current location to just past this intptr_t).
- *
- * \note This function will only read from an intptr_t-aligned offset from the
- * beginning of the blob's data, so some padding bytes may be skipped.
- *
- * \return The intptr_t read
- */
-intptr_t
-blob_read_intptr(struct blob_reader *blob);
-
-/**
- * Read a NULL-terminated string from the current location, (and update the
- * current location to just past this string).
- *
- * \note The memory returned belongs to the data underlying the blob reader. The
- * caller must copy the string in order to use the string after the lifetime
- * of the data underlying the blob reader.
- *
- * \return The string read (see note above about memory lifetime). However, if
- * there is no NULL byte remaining within the blob, this function returns
- * NULL.
- */
-char *
-blob_read_string(struct blob_reader *blob);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BLOB_H */
diff -Nru mesa-17.2.4/src/compiler/glsl/builtin_functions.cpp mesa-17.3.3/src/compiler/glsl/builtin_functions.cpp
--- mesa-17.2.4/src/compiler/glsl/builtin_functions.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/builtin_functions.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -151,6 +151,12 @@
}
static bool
+v460_desktop(const _mesa_glsl_parse_state *state)
+{
+ return state->is_version(460, 0);
+}
+
+static bool
v130_fs_only(const _mesa_glsl_parse_state *state)
{
return state->is_version(130, 300) &&
@@ -487,6 +493,12 @@
}
static bool
+shader_atomic_counter_ops_or_v460_desktop(const _mesa_glsl_parse_state *state)
+{
+ return state->ARB_shader_atomic_counter_ops_enable || v460_desktop(state);
+}
+
+static bool
shader_ballot(const _mesa_glsl_parse_state *state)
{
return state->ARB_shader_ballot_enable;
@@ -610,6 +622,12 @@
}
static bool
+vote_or_v460_desktop(const _mesa_glsl_parse_state *state)
+{
+ return state->ARB_shader_group_vote_enable || v460_desktop(state);
+}
+
+static bool
integer_functions_supported(const _mesa_glsl_parse_state *state)
{
return state->extensions->MESA_shader_integer_functions;
@@ -962,7 +980,8 @@
ir_function_signature *_vote_intrinsic(builtin_available_predicate avail,
enum ir_intrinsic_id id);
- ir_function_signature *_vote(const char *intrinsic_name);
+ ir_function_signature *_vote(const char *intrinsic_name,
+ builtin_available_predicate avail);
#undef B0
#undef B1
@@ -1090,7 +1109,7 @@
_atomic_intrinsic2(buffer_atomics_supported,
glsl_type::int_type,
ir_intrinsic_generic_atomic_add),
- _atomic_counter_intrinsic1(shader_atomic_counter_ops,
+ _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
ir_intrinsic_atomic_counter_add),
NULL);
add_function("__intrinsic_atomic_min",
@@ -1100,7 +1119,7 @@
_atomic_intrinsic2(buffer_atomics_supported,
glsl_type::int_type,
ir_intrinsic_generic_atomic_min),
- _atomic_counter_intrinsic1(shader_atomic_counter_ops,
+ _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
ir_intrinsic_atomic_counter_min),
NULL);
add_function("__intrinsic_atomic_max",
@@ -1110,7 +1129,7 @@
_atomic_intrinsic2(buffer_atomics_supported,
glsl_type::int_type,
ir_intrinsic_generic_atomic_max),
- _atomic_counter_intrinsic1(shader_atomic_counter_ops,
+ _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
ir_intrinsic_atomic_counter_max),
NULL);
add_function("__intrinsic_atomic_and",
@@ -1120,7 +1139,7 @@
_atomic_intrinsic2(buffer_atomics_supported,
glsl_type::int_type,
ir_intrinsic_generic_atomic_and),
- _atomic_counter_intrinsic1(shader_atomic_counter_ops,
+ _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
ir_intrinsic_atomic_counter_and),
NULL);
add_function("__intrinsic_atomic_or",
@@ -1130,7 +1149,7 @@
_atomic_intrinsic2(buffer_atomics_supported,
glsl_type::int_type,
ir_intrinsic_generic_atomic_or),
- _atomic_counter_intrinsic1(shader_atomic_counter_ops,
+ _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
ir_intrinsic_atomic_counter_or),
NULL);
add_function("__intrinsic_atomic_xor",
@@ -1140,7 +1159,7 @@
_atomic_intrinsic2(buffer_atomics_supported,
glsl_type::int_type,
ir_intrinsic_generic_atomic_xor),
- _atomic_counter_intrinsic1(shader_atomic_counter_ops,
+ _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
ir_intrinsic_atomic_counter_xor),
NULL);
add_function("__intrinsic_atomic_exchange",
@@ -1150,7 +1169,7 @@
_atomic_intrinsic2(buffer_atomics_supported,
glsl_type::int_type,
ir_intrinsic_generic_atomic_exchange),
- _atomic_counter_intrinsic1(shader_atomic_counter_ops,
+ _atomic_counter_intrinsic1(shader_atomic_counter_ops_or_v460_desktop,
ir_intrinsic_atomic_counter_exchange),
NULL);
add_function("__intrinsic_atomic_comp_swap",
@@ -1160,7 +1179,7 @@
_atomic_intrinsic3(buffer_atomics_supported,
glsl_type::int_type,
ir_intrinsic_generic_atomic_comp_swap),
- _atomic_counter_intrinsic2(shader_atomic_counter_ops,
+ _atomic_counter_intrinsic2(shader_atomic_counter_ops_or_v460_desktop,
ir_intrinsic_atomic_counter_comp_swap),
NULL);
@@ -1197,13 +1216,13 @@
NULL);
add_function("__intrinsic_vote_all",
- _vote_intrinsic(vote, ir_intrinsic_vote_all),
+ _vote_intrinsic(vote_or_v460_desktop, ir_intrinsic_vote_all),
NULL);
add_function("__intrinsic_vote_any",
- _vote_intrinsic(vote, ir_intrinsic_vote_any),
+ _vote_intrinsic(vote_or_v460_desktop, ir_intrinsic_vote_any),
NULL);
add_function("__intrinsic_vote_eq",
- _vote_intrinsic(vote, ir_intrinsic_vote_eq),
+ _vote_intrinsic(vote_or_v460_desktop, ir_intrinsic_vote_eq),
NULL);
add_function("__intrinsic_ballot", _ballot_intrinsic(), NULL);
@@ -3031,6 +3050,43 @@
shader_atomic_counter_ops),
NULL);
+ add_function("atomicCounterAdd",
+ _atomic_counter_op1("__intrinsic_atomic_add",
+ v460_desktop),
+ NULL);
+ add_function("atomicCounterSubtract",
+ _atomic_counter_op1("__intrinsic_atomic_sub",
+ v460_desktop),
+ NULL);
+ add_function("atomicCounterMin",
+ _atomic_counter_op1("__intrinsic_atomic_min",
+ v460_desktop),
+ NULL);
+ add_function("atomicCounterMax",
+ _atomic_counter_op1("__intrinsic_atomic_max",
+ v460_desktop),
+ NULL);
+ add_function("atomicCounterAnd",
+ _atomic_counter_op1("__intrinsic_atomic_and",
+ v460_desktop),
+ NULL);
+ add_function("atomicCounterOr",
+ _atomic_counter_op1("__intrinsic_atomic_or",
+ v460_desktop),
+ NULL);
+ add_function("atomicCounterXor",
+ _atomic_counter_op1("__intrinsic_atomic_xor",
+ v460_desktop),
+ NULL);
+ add_function("atomicCounterExchange",
+ _atomic_counter_op1("__intrinsic_atomic_exchange",
+ v460_desktop),
+ NULL);
+ add_function("atomicCounterCompSwap",
+ _atomic_counter_op2("__intrinsic_atomic_comp_swap",
+ v460_desktop),
+ NULL);
+
add_function("atomicAdd",
_atomic_op2("__intrinsic_atomic_add",
buffer_atomics_supported,
@@ -3220,9 +3276,29 @@
glsl_type::uint64_t_type),
NULL);
- add_function("anyInvocationARB", _vote("__intrinsic_vote_any"), NULL);
- add_function("allInvocationsARB", _vote("__intrinsic_vote_all"), NULL);
- add_function("allInvocationsEqualARB", _vote("__intrinsic_vote_eq"), NULL);
+ add_function("anyInvocationARB",
+ _vote("__intrinsic_vote_any", vote),
+ NULL);
+
+ add_function("allInvocationsARB",
+ _vote("__intrinsic_vote_all", vote),
+ NULL);
+
+ add_function("allInvocationsEqualARB",
+ _vote("__intrinsic_vote_eq", vote),
+ NULL);
+
+ add_function("anyInvocation",
+ _vote("__intrinsic_vote_any", v460_desktop),
+ NULL);
+
+ add_function("allInvocations",
+ _vote("__intrinsic_vote_all", v460_desktop),
+ NULL);
+
+ add_function("allInvocationsEqual",
+ _vote("__intrinsic_vote_eq", v460_desktop),
+ NULL);
add_function("__builtin_idiv64",
generate_ir::idiv64(mem_ctx, integer_functions_supported),
@@ -6163,11 +6239,12 @@
}
ir_function_signature *
-builtin_builder::_vote(const char *intrinsic_name)
+builtin_builder::_vote(const char *intrinsic_name,
+ builtin_available_predicate avail)
{
ir_variable *value = in_var(glsl_type::bool_type, "value");
- MAKE_SIG(glsl_type::bool_type, vote, 1, value);
+ MAKE_SIG(glsl_type::bool_type, avail, 1, value);
ir_variable *retval = body.make_temp(glsl_type::bool_type, "retval");
@@ -6214,16 +6291,7 @@
s = builtins.find(state, name, actual_parameters);
mtx_unlock(&builtins_lock);
- if (s == NULL)
- return NULL;
-
- struct hash_table *ht =
- _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
- void *mem_ctx = state;
- ir_function *f = s->function()->clone(mem_ctx, ht);
- _mesa_hash_table_destroy(ht, NULL);
-
- return f->matching_signature(state, actual_parameters, true);
+ return s;
}
bool
diff -Nru mesa-17.2.4/src/compiler/glsl/builtin_functions.h mesa-17.3.3/src/compiler/glsl/builtin_functions.h
--- mesa-17.2.4/src/compiler/glsl/builtin_functions.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/builtin_functions.h 2018-01-18 21:30:28.000000000 +0000
@@ -64,6 +64,9 @@
ir_function_signature *
sign64(void *mem_ctx, builtin_available_predicate avail);
+ir_function_signature *
+udivmod64(void *mem_ctx, builtin_available_predicate avail);
+
}
#endif /* BULITIN_FUNCTIONS_H */
diff -Nru mesa-17.2.4/src/compiler/glsl/builtin_variables.cpp mesa-17.3.3/src/compiler/glsl/builtin_variables.cpp
--- mesa-17.2.4/src/compiler/glsl/builtin_variables.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/builtin_variables.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -90,9 +90,9 @@
SWIZZLE_Y,
SWIZZLE_Z,
SWIZZLE_Z)},
- {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW},
- {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX},
{"spotExponent", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW},
+ {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX},
+ {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW},
{"constantAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX},
{"linearAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY},
{"quadraticAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ},
@@ -1017,6 +1017,11 @@
if (state->is_version(130, 300))
add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID");
+ if (state->is_version(460, 0)) {
+ add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, "gl_BaseVertex");
+ add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstance");
+ add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawID");
+ }
if (state->ARB_draw_instanced_enable)
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB");
if (state->ARB_draw_instanced_enable || state->is_version(140, 300))
@@ -1290,15 +1295,10 @@
uvec3_t, "gl_LocalGroupSizeARB");
}
- if (state->ctx->Const.LowerCsDerivedVariables) {
- add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
- add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
- } else {
- add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
- uvec3_t, "gl_GlobalInvocationID");
- add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
- uint_t, "gl_LocalInvocationIndex");
- }
+ add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+ uvec3_t, "gl_GlobalInvocationID");
+ add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
+ uint_t, "gl_LocalInvocationIndex");
}
@@ -1469,84 +1469,3 @@
break;
}
}
-
-
-/**
- * Initialize compute shader variables with values that are derived from other
- * compute shader variable.
- */
-static void
-initialize_cs_derived_variables(gl_shader *shader,
- ir_function_signature *const main_sig)
-{
- assert(shader->Stage == MESA_SHADER_COMPUTE);
-
- ir_variable *gl_GlobalInvocationID =
- shader->symbols->get_variable("gl_GlobalInvocationID");
- assert(gl_GlobalInvocationID);
- ir_variable *gl_WorkGroupID =
- shader->symbols->get_variable("gl_WorkGroupID");
- assert(gl_WorkGroupID);
- ir_variable *gl_WorkGroupSize =
- shader->symbols->get_variable("gl_WorkGroupSize");
- if (gl_WorkGroupSize == NULL) {
- void *const mem_ctx = ralloc_parent(shader->ir);
- gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
- "gl_WorkGroupSize",
- ir_var_auto);
- gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
- gl_WorkGroupSize->data.read_only = true;
- shader->ir->push_head(gl_WorkGroupSize);
- }
- ir_variable *gl_LocalInvocationID =
- shader->symbols->get_variable("gl_LocalInvocationID");
- assert(gl_LocalInvocationID);
-
- /* gl_GlobalInvocationID =
- * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
- */
- ir_instruction *inst =
- assign(gl_GlobalInvocationID,
- add(mul(gl_WorkGroupID, gl_WorkGroupSize),
- gl_LocalInvocationID));
- main_sig->body.push_head(inst);
-
- /* gl_LocalInvocationIndex =
- * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
- * gl_LocalInvocationID.y * gl_WorkGroupSize.x +
- * gl_LocalInvocationID.x;
- */
- ir_expression *index_z =
- mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
- swizzle_y(gl_WorkGroupSize));
- ir_expression *index_y =
- mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
- ir_expression *index_y_plus_z = add(index_y, index_z);
- operand index_x(swizzle_x(gl_LocalInvocationID));
- ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
- ir_variable *gl_LocalInvocationIndex =
- shader->symbols->get_variable("gl_LocalInvocationIndex");
- assert(gl_LocalInvocationIndex);
- inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
- main_sig->body.push_head(inst);
-}
-
-
-/**
- * Initialize builtin variables with values based on other builtin variables.
- * These are initialized in the main function.
- */
-void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
- gl_shader *shader)
-{
- /* We only need to set CS variables currently. */
- if (shader->Stage == MESA_SHADER_COMPUTE &&
- ctx->Const.LowerCsDerivedVariables) {
- ir_function_signature *const main_sig =
- _mesa_get_main_function_signature(shader->symbols);
-
- if (main_sig != NULL)
- initialize_cs_derived_variables(shader, main_sig);
- }
-}
diff -Nru mesa-17.2.4/src/compiler/glsl/glcpp/glcpp.h mesa-17.3.3/src/compiler/glsl/glcpp/glcpp.h
--- mesa-17.2.4/src/compiler/glsl/glcpp/glcpp.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glcpp/glcpp.h 2018-01-18 21:30:28.000000000 +0000
@@ -33,6 +33,8 @@
#include "util/hash_table.h"
+#include "util/string_buffer.h"
+
#define yyscan_t void*
/* Some data types used for parser values. */
@@ -199,10 +201,8 @@
int skipping;
token_list_t *lex_from_list;
token_node_t *lex_from_node;
- char *output;
- char *info_log;
- size_t output_length;
- size_t info_log_length;
+ struct _mesa_string_buffer *output;
+ struct _mesa_string_buffer *info_log;
int error;
glcpp_extension_iterator extensions;
const struct gl_extensions *extension_list;
diff -Nru mesa-17.2.4/src/compiler/glsl/glcpp/glcpp-lex.c mesa-17.3.3/src/compiler/glsl/glcpp/glcpp-lex.c
--- mesa-17.2.4/src/compiler/glsl/glcpp/glcpp-lex.c 2017-10-30 14:50:46.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glcpp/glcpp-lex.c 2018-01-18 21:31:04.000000000 +0000
@@ -691,7 +691,14 @@
#define RETURN_STRING_TOKEN(token) \
do { \
if (! parser->skipping) { \
- yylval->str = linear_strdup(yyextra->linalloc, yytext); \
+ /* We're not doing linear_strdup here, to avoid \
+ * an implicit call on strlen() for the length \
+ * of the string, as this is already found by \
+ * flex and stored in yyleng */ \
+ void *mem_ctx = yyextra->linalloc; \
+ yylval->str = linear_alloc_child(mem_ctx, \
+ yyleng + 1); \
+ memcpy(yylval->str, yytext, yyleng + 1); \
RETURN_TOKEN_NEVER_SKIP (token); \
} \
} while(0)
@@ -758,7 +765,7 @@
strings, we have to be careful to avoid OTHER matching and hiding
something that CPP does care about. So we simply exclude all
characters that appear in any other expressions. */
-#line 762 "glsl/glcpp/glcpp-lex.c"
+#line 769 "glsl/glcpp/glcpp-lex.c"
#define INITIAL 0
#define COMMENT 1
@@ -1053,7 +1060,7 @@
}
{
-#line 199 "./glsl/glcpp/glcpp-lex.l"
+#line 206 "./glsl/glcpp/glcpp-lex.l"
glcpp_parser_t *parser = yyextra;
@@ -1114,7 +1121,7 @@
}
/* Single-line comments */
-#line 1118 "glsl/glcpp/glcpp-lex.c"
+#line 1125 "glsl/glcpp/glcpp-lex.c"
while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
{
@@ -1169,41 +1176,41 @@
case 1:
YY_RULE_SETUP
-#line 259 "./glsl/glcpp/glcpp-lex.l"
+#line 266 "./glsl/glcpp/glcpp-lex.l"
{
}
YY_BREAK
/* Multi-line comments */
case 2:
YY_RULE_SETUP
-#line 263 "./glsl/glcpp/glcpp-lex.l"
+#line 270 "./glsl/glcpp/glcpp-lex.l"
{ yy_push_state(COMMENT, yyscanner); }
YY_BREAK
case 3:
YY_RULE_SETUP
-#line 264 "./glsl/glcpp/glcpp-lex.l"
+#line 271 "./glsl/glcpp/glcpp-lex.l"
YY_BREAK
case 4:
/* rule 4 can match eol */
YY_RULE_SETUP
-#line 265 "./glsl/glcpp/glcpp-lex.l"
+#line 272 "./glsl/glcpp/glcpp-lex.l"
{ yylineno++; yycolumn = 0; parser->commented_newlines++; }
YY_BREAK
case 5:
YY_RULE_SETUP
-#line 266 "./glsl/glcpp/glcpp-lex.l"
+#line 273 "./glsl/glcpp/glcpp-lex.l"
YY_BREAK
case 6:
/* rule 6 can match eol */
YY_RULE_SETUP
-#line 267 "./glsl/glcpp/glcpp-lex.l"
+#line 274 "./glsl/glcpp/glcpp-lex.l"
{ yylineno++; yycolumn = 0; parser->commented_newlines++; }
YY_BREAK
case 7:
YY_RULE_SETUP
-#line 268 "./glsl/glcpp/glcpp-lex.l"
+#line 275 "./glsl/glcpp/glcpp-lex.l"
{
yy_pop_state(yyscanner);
/* In the start condition, we don't want any SPACE token. */
@@ -1213,7 +1220,7 @@
YY_BREAK
case 8:
YY_RULE_SETUP
-#line 275 "./glsl/glcpp/glcpp-lex.l"
+#line 282 "./glsl/glcpp/glcpp-lex.l"
{
/* If the '#' is the first non-whitespace, non-comment token on this
@@ -1231,7 +1238,7 @@
YY_BREAK
case 9:
YY_RULE_SETUP
-#line 290 "./glsl/glcpp/glcpp-lex.l"
+#line 297 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->space_tokens = 0;
@@ -1254,7 +1261,7 @@
yyg->yy_c_buf_p = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up yytext again */
YY_RULE_SETUP
-#line 305 "./glsl/glcpp/glcpp-lex.l"
+#line 312 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
}
@@ -1263,7 +1270,7 @@
* Simply pass them through to the main compiler's lexer/parser. */
case 11:
YY_RULE_SETUP
-#line 311 "./glsl/glcpp/glcpp-lex.l"
+#line 318 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
RETURN_STRING_TOKEN (PRAGMA);
@@ -1271,7 +1278,7 @@
YY_BREAK
case 12:
YY_RULE_SETUP
-#line 316 "./glsl/glcpp/glcpp-lex.l"
+#line 323 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
RETURN_TOKEN (LINE);
@@ -1280,7 +1287,7 @@
case 13:
/* rule 13 can match eol */
YY_RULE_SETUP
-#line 321 "./glsl/glcpp/glcpp-lex.l"
+#line 328 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->space_tokens = 0;
@@ -1293,7 +1300,7 @@
* even when we are otherwise skipping. */
case 14:
YY_RULE_SETUP
-#line 331 "./glsl/glcpp/glcpp-lex.l"
+#line 338 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->lexing_directive = 1;
@@ -1303,7 +1310,7 @@
YY_BREAK
case 15:
YY_RULE_SETUP
-#line 338 "./glsl/glcpp/glcpp-lex.l"
+#line 345 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->lexing_directive = 1;
@@ -1318,7 +1325,7 @@
yyg->yy_c_buf_p = yy_cp = yy_bp + 2;
YY_DO_BEFORE_ACTION; /* set up yytext again */
YY_RULE_SETUP
-#line 345 "./glsl/glcpp/glcpp-lex.l"
+#line 352 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->lexing_directive = 1;
@@ -1333,7 +1340,7 @@
yyg->yy_c_buf_p = yy_cp = yy_bp + 4;
YY_DO_BEFORE_ACTION; /* set up yytext again */
YY_RULE_SETUP
-#line 352 "./glsl/glcpp/glcpp-lex.l"
+#line 359 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->lexing_directive = 1;
@@ -1343,7 +1350,7 @@
YY_BREAK
case 18:
YY_RULE_SETUP
-#line 359 "./glsl/glcpp/glcpp-lex.l"
+#line 366 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->space_tokens = 0;
@@ -1352,7 +1359,7 @@
YY_BREAK
case 19:
YY_RULE_SETUP
-#line 365 "./glsl/glcpp/glcpp-lex.l"
+#line 372 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->space_tokens = 0;
@@ -1361,7 +1368,7 @@
YY_BREAK
case 20:
YY_RULE_SETUP
-#line 371 "./glsl/glcpp/glcpp-lex.l"
+#line 378 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
RETURN_STRING_TOKEN (ERROR_TOKEN);
@@ -1387,7 +1394,7 @@
*/
case 21:
YY_RULE_SETUP
-#line 394 "./glsl/glcpp/glcpp-lex.l"
+#line 401 "./glsl/glcpp/glcpp-lex.l"
{
if (! parser->skipping) {
BEGIN DEFINE;
@@ -1398,7 +1405,7 @@
YY_BREAK
case 22:
YY_RULE_SETUP
-#line 402 "./glsl/glcpp/glcpp-lex.l"
+#line 409 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
yyextra->space_tokens = 0;
@@ -1407,7 +1414,7 @@
YY_BREAK
case 23:
YY_RULE_SETUP
-#line 408 "./glsl/glcpp/glcpp-lex.l"
+#line 415 "./glsl/glcpp/glcpp-lex.l"
{
/* Nothing to do here. Importantly, don't leave the
* start condition, since it's legal to have space between the
@@ -1417,7 +1424,7 @@
/* This will catch any non-directive garbage after a HASH */
case 24:
YY_RULE_SETUP
-#line 415 "./glsl/glcpp/glcpp-lex.l"
+#line 422 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
RETURN_TOKEN (GARBAGE);
@@ -1429,7 +1436,7 @@
yyg->yy_c_buf_p = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up yytext again */
YY_RULE_SETUP
-#line 421 "./glsl/glcpp/glcpp-lex.l"
+#line 428 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
RETURN_STRING_TOKEN (FUNC_IDENTIFIER);
@@ -1438,7 +1445,7 @@
/* An identifier not immediately followed by '(' */
case 26:
YY_RULE_SETUP
-#line 427 "./glsl/glcpp/glcpp-lex.l"
+#line 434 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
RETURN_STRING_TOKEN (OBJ_IDENTIFIER);
@@ -1447,7 +1454,7 @@
/* Whitespace */
case 27:
YY_RULE_SETUP
-#line 433 "./glsl/glcpp/glcpp-lex.l"
+#line 440 "./glsl/glcpp/glcpp-lex.l"
{
/* Just ignore it. Nothing to do here. */
}
@@ -1456,7 +1463,7 @@
case 28:
/* rule 28 can match eol */
YY_RULE_SETUP
-#line 438 "./glsl/glcpp/glcpp-lex.l"
+#line 445 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
@@ -1467,7 +1474,7 @@
* space. This is an error. */
case 29:
YY_RULE_SETUP
-#line 446 "./glsl/glcpp/glcpp-lex.l"
+#line 453 "./glsl/glcpp/glcpp-lex.l"
{
BEGIN INITIAL;
glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
@@ -1476,98 +1483,98 @@
YY_BREAK
case 30:
YY_RULE_SETUP
-#line 452 "./glsl/glcpp/glcpp-lex.l"
+#line 459 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_STRING_TOKEN (INTEGER_STRING);
}
YY_BREAK
case 31:
YY_RULE_SETUP
-#line 456 "./glsl/glcpp/glcpp-lex.l"
+#line 463 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_STRING_TOKEN (INTEGER_STRING);
}
YY_BREAK
case 32:
YY_RULE_SETUP
-#line 460 "./glsl/glcpp/glcpp-lex.l"
+#line 467 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_STRING_TOKEN (INTEGER_STRING);
}
YY_BREAK
case 33:
YY_RULE_SETUP
-#line 464 "./glsl/glcpp/glcpp-lex.l"
+#line 471 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (LEFT_SHIFT);
}
YY_BREAK
case 34:
YY_RULE_SETUP
-#line 468 "./glsl/glcpp/glcpp-lex.l"
+#line 475 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (RIGHT_SHIFT);
}
YY_BREAK
case 35:
YY_RULE_SETUP
-#line 472 "./glsl/glcpp/glcpp-lex.l"
+#line 479 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (LESS_OR_EQUAL);
}
YY_BREAK
case 36:
YY_RULE_SETUP
-#line 476 "./glsl/glcpp/glcpp-lex.l"
+#line 483 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (GREATER_OR_EQUAL);
}
YY_BREAK
case 37:
YY_RULE_SETUP
-#line 480 "./glsl/glcpp/glcpp-lex.l"
+#line 487 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (EQUAL);
}
YY_BREAK
case 38:
YY_RULE_SETUP
-#line 484 "./glsl/glcpp/glcpp-lex.l"
+#line 491 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (NOT_EQUAL);
}
YY_BREAK
case 39:
YY_RULE_SETUP
-#line 488 "./glsl/glcpp/glcpp-lex.l"
+#line 495 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (AND);
}
YY_BREAK
case 40:
YY_RULE_SETUP
-#line 492 "./glsl/glcpp/glcpp-lex.l"
+#line 499 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (OR);
}
YY_BREAK
case 41:
YY_RULE_SETUP
-#line 496 "./glsl/glcpp/glcpp-lex.l"
+#line 503 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (PLUS_PLUS);
}
YY_BREAK
case 42:
YY_RULE_SETUP
-#line 500 "./glsl/glcpp/glcpp-lex.l"
+#line 507 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (MINUS_MINUS);
}
YY_BREAK
case 43:
YY_RULE_SETUP
-#line 504 "./glsl/glcpp/glcpp-lex.l"
+#line 511 "./glsl/glcpp/glcpp-lex.l"
{
if (! parser->skipping) {
if (parser->is_gles)
@@ -1578,42 +1585,42 @@
YY_BREAK
case 44:
YY_RULE_SETUP
-#line 512 "./glsl/glcpp/glcpp-lex.l"
+#line 519 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (DEFINED);
}
YY_BREAK
case 45:
YY_RULE_SETUP
-#line 516 "./glsl/glcpp/glcpp-lex.l"
+#line 523 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_STRING_TOKEN (IDENTIFIER);
}
YY_BREAK
case 46:
YY_RULE_SETUP
-#line 520 "./glsl/glcpp/glcpp-lex.l"
+#line 527 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_STRING_TOKEN (OTHER);
}
YY_BREAK
case 47:
YY_RULE_SETUP
-#line 524 "./glsl/glcpp/glcpp-lex.l"
+#line 531 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_TOKEN (yytext[0]);
}
YY_BREAK
case 48:
YY_RULE_SETUP
-#line 528 "./glsl/glcpp/glcpp-lex.l"
+#line 535 "./glsl/glcpp/glcpp-lex.l"
{
RETURN_STRING_TOKEN (OTHER);
}
YY_BREAK
case 49:
YY_RULE_SETUP
-#line 532 "./glsl/glcpp/glcpp-lex.l"
+#line 539 "./glsl/glcpp/glcpp-lex.l"
{
if (yyextra->space_tokens) {
RETURN_TOKEN (SPACE);
@@ -1625,7 +1632,7 @@
case 50:
/* rule 50 can match eol */
YY_RULE_SETUP
-#line 540 "./glsl/glcpp/glcpp-lex.l"
+#line 547 "./glsl/glcpp/glcpp-lex.l"
{
if (parser->commented_newlines) {
BEGIN NEWLINE_CATCHUP;
@@ -1644,7 +1651,7 @@
case YY_STATE_EOF(COMMENT):
case YY_STATE_EOF(DEFINE):
case YY_STATE_EOF(HASH):
-#line 554 "./glsl/glcpp/glcpp-lex.l"
+#line 561 "./glsl/glcpp/glcpp-lex.l"
{
if (YY_START == COMMENT)
glcpp_error(yylloc, yyextra, "Unterminated comment");
@@ -1661,7 +1668,7 @@
* of the preceding patterns to match that input. */
case 51:
YY_RULE_SETUP
-#line 569 "./glsl/glcpp/glcpp-lex.l"
+#line 576 "./glsl/glcpp/glcpp-lex.l"
{
glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext);
@@ -1677,10 +1684,10 @@
YY_BREAK
case 52:
YY_RULE_SETUP
-#line 582 "./glsl/glcpp/glcpp-lex.l"
+#line 589 "./glsl/glcpp/glcpp-lex.l"
YY_FATAL_ERROR( "flex scanner jammed" );
YY_BREAK
-#line 1684 "glsl/glcpp/glcpp-lex.c"
+#line 1691 "glsl/glcpp/glcpp-lex.c"
case YY_STATE_EOF(DONE):
case YY_STATE_EOF(NEWLINE_CATCHUP):
case YY_STATE_EOF(UNREACHABLE):
@@ -2916,7 +2923,7 @@
#define YYTABLES_NAME "yytables"
-#line 582 "./glsl/glcpp/glcpp-lex.l"
+#line 589 "./glsl/glcpp/glcpp-lex.l"
diff -Nru mesa-17.2.4/src/compiler/glsl/glcpp/glcpp-lex.l mesa-17.3.3/src/compiler/glsl/glcpp/glcpp-lex.l
--- mesa-17.2.4/src/compiler/glsl/glcpp/glcpp-lex.l 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glcpp/glcpp-lex.l 2018-01-18 21:30:28.000000000 +0000
@@ -101,7 +101,14 @@
#define RETURN_STRING_TOKEN(token) \
do { \
if (! parser->skipping) { \
- yylval->str = linear_strdup(yyextra->linalloc, yytext); \
+ /* We're not doing linear_strdup here, to avoid \
+ * an implicit call on strlen() for the length \
+ * of the string, as this is already found by \
+ * flex and stored in yyleng */ \
+ void *mem_ctx = yyextra->linalloc; \
+ yylval->str = linear_alloc_child(mem_ctx, \
+ yyleng + 1); \
+ memcpy(yylval->str, yytext, yyleng + 1); \
RETURN_TOKEN_NEVER_SKIP (token); \
} \
} while(0)
diff -Nru mesa-17.2.4/src/compiler/glsl/glcpp/glcpp-parse.c mesa-17.3.3/src/compiler/glsl/glcpp/glcpp-parse.c
--- mesa-17.2.4/src/compiler/glsl/glcpp/glcpp-parse.c 2017-10-30 14:50:47.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glcpp/glcpp-parse.c 2018-01-18 21:31:04.000000000 +0000
@@ -630,17 +630,17 @@
static const yytype_uint16 yyrline[] =
{
0, 202, 202, 204, 208, 209, 210, 214, 218, 223,
- 228, 236, 249, 252, 255, 261, 264, 265, 278, 279,
- 331, 352, 362, 368, 374, 400, 420, 420, 433, 433,
- 436, 442, 448, 451, 457, 460, 463, 469, 478, 483,
- 494, 498, 505, 516, 527, 534, 541, 548, 555, 562,
- 569, 576, 583, 590, 597, 604, 611, 618, 630, 642,
- 649, 653, 657, 661, 665, 671, 675, 682, 683, 687,
- 688, 691, 693, 699, 704, 711, 715, 719, 723, 727,
- 731, 738, 739, 740, 741, 742, 743, 744, 745, 746,
- 747, 748, 749, 750, 751, 752, 753, 754, 755, 756,
- 757, 758, 759, 760, 761, 762, 763, 764, 765, 766,
- 767, 768, 769, 770
+ 228, 233, 245, 248, 251, 257, 260, 261, 274, 275,
+ 327, 348, 358, 364, 370, 396, 416, 416, 429, 429,
+ 432, 438, 444, 447, 453, 456, 459, 465, 474, 479,
+ 490, 494, 501, 512, 523, 530, 537, 544, 551, 558,
+ 565, 572, 579, 586, 593, 600, 607, 614, 626, 638,
+ 645, 649, 653, 657, 661, 667, 671, 678, 679, 683,
+ 684, 687, 689, 695, 700, 707, 711, 715, 719, 723,
+ 727, 734, 735, 736, 737, 738, 739, 740, 741, 742,
+ 743, 744, 745, 746, 747, 748, 749, 750, 751, 752,
+ 753, 754, 755, 756, 757, 758, 759, 760, 761, 762,
+ 763, 764, 765, 766
};
#endif
@@ -1755,7 +1755,7 @@
#line 210 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
_glcpp_parser_print_expanded_token_list (parser, (yyvsp[0].token_list));
- ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n");
+ _mesa_string_buffer_append_char(parser->output, '\n');
}
#line 1761 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
@@ -1785,63 +1785,59 @@
{
parser->has_new_line_number = 1;
parser->new_line_number = (yyvsp[-1].ival);
- ralloc_asprintf_rewrite_tail (&parser->output,
- &parser->output_length,
- "#line %" PRIiMAX "\n",
- (yyvsp[-1].ival));
+ _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", (yyvsp[-1].ival));
}
-#line 1794 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1791 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 11:
-#line 236 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 233 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
parser->has_new_line_number = 1;
parser->new_line_number = (yyvsp[-2].ival);
parser->has_new_source_number = 1;
parser->new_source_number = (yyvsp[-1].ival);
- ralloc_asprintf_rewrite_tail (&parser->output,
- &parser->output_length,
- "#line %" PRIiMAX " %" PRIiMAX "\n",
- (yyvsp[-2].ival), (yyvsp[-1].ival));
+ _mesa_string_buffer_printf(parser->output,
+ "#line %" PRIiMAX " %" PRIiMAX "\n",
+ (yyvsp[-2].ival), (yyvsp[-1].ival));
}
-#line 1809 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1805 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 12:
-#line 249 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 245 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
_define_object_macro (parser, & (yylsp[-2]), (yyvsp[-2].str), (yyvsp[-1].token_list));
}
-#line 1817 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1813 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 13:
-#line 252 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 248 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
_define_function_macro (parser, & (yylsp[-4]), (yyvsp[-4].str), NULL, (yyvsp[-1].token_list));
}
-#line 1825 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1821 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 14:
-#line 255 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 251 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
_define_function_macro (parser, & (yylsp[-5]), (yyvsp[-5].str), (yyvsp[-3].string_list), (yyvsp[-1].token_list));
}
-#line 1833 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1829 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 15:
-#line 261 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 257 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
- ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n");
+ _mesa_string_buffer_append_char(parser->output, '\n');
}
-#line 1841 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1837 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 17:
-#line 265 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 261 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
if (parser->skip_stack == NULL ||
@@ -1852,11 +1848,11 @@
EXPANSION_MODE_IGNORE_DEFINED);
}
}
-#line 1856 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1852 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 19:
-#line 279 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 275 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
struct hash_entry *entry;
@@ -1909,11 +1905,11 @@
_mesa_hash_table_remove (parser->defines, entry);
}
}
-#line 1913 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1909 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 20:
-#line 331 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 327 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
/* Be careful to only evaluate the 'if' expression if
* we are not skipping. When we are skipping, we
@@ -1935,11 +1931,11 @@
parser->skip_stack->type = SKIP_TO_ENDIF;
}
}
-#line 1939 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1935 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 21:
-#line 352 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 348 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
/* #if without an expression is only an error if we
* are not skipping */
@@ -1950,33 +1946,33 @@
}
_glcpp_parser_skip_stack_push_if (parser, & (yylsp[-2]), 0);
}
-#line 1954 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1950 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 22:
-#line 362 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 358 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
struct hash_entry *entry =
_mesa_hash_table_search(parser->defines, (yyvsp[-2].str));
macro_t *macro = entry ? entry->data : NULL;
_glcpp_parser_skip_stack_push_if (parser, & (yylsp[-4]), macro != NULL);
}
-#line 1965 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1961 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 23:
-#line 368 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 364 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
struct hash_entry *entry =
_mesa_hash_table_search(parser->defines, (yyvsp[-2].str));
macro_t *macro = entry ? entry->data : NULL;
_glcpp_parser_skip_stack_push_if (parser, & (yylsp[-2]), macro == NULL);
}
-#line 1976 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 1972 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 24:
-#line 374 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 370 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
/* Be careful to only evaluate the 'elif' expression
* if we are not skipping. When we are skipping, we
@@ -2003,11 +1999,11 @@
"elif", 0);
}
}
-#line 2007 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2003 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 25:
-#line 400 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 396 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
/* #elif without an expression is an error unless we
* are skipping. */
@@ -2028,17 +2024,17 @@
glcpp_warning(& (yylsp[-2]), parser, "ignoring illegal #elif without expression");
}
}
-#line 2032 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2028 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 26:
-#line 420 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 416 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ parser->lexing_directive = 1; }
-#line 2038 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2034 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 27:
-#line 420 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 416 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
if (parser->skip_stack &&
parser->skip_stack->has_else)
@@ -2052,81 +2048,81 @@
parser->skip_stack->has_else = true;
}
}
-#line 2056 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2052 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 28:
-#line 433 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 429 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
_glcpp_parser_skip_stack_pop (parser, & (yylsp[-1]));
}
-#line 2064 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2060 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 30:
-#line 436 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 432 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
if (parser->version_set) {
glcpp_error(& (yylsp[-3]), parser, "#version must appear on the first line");
}
_glcpp_parser_handle_version_declaration(parser, (yyvsp[-1].ival), NULL, true);
}
-#line 2075 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2071 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 31:
-#line 442 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 438 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
if (parser->version_set) {
glcpp_error(& (yylsp[-4]), parser, "#version must appear on the first line");
}
_glcpp_parser_handle_version_declaration(parser, (yyvsp[-2].ival), (yyvsp[-1].str), true);
}
-#line 2086 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2082 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 32:
-#line 448 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 444 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
glcpp_parser_resolve_implicit_version(parser);
}
-#line 2094 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2090 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 33:
-#line 451 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 447 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
- ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "#%s", (yyvsp[-1].str));
+ _mesa_string_buffer_printf(parser->output, "#%s", (yyvsp[-1].str));
}
-#line 2102 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2098 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 34:
-#line 457 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 453 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
glcpp_error(& (yylsp[-2]), parser, "#%s", (yyvsp[-1].str));
}
-#line 2110 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2106 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 35:
-#line 460 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 456 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
glcpp_error (& (yylsp[-2]), parser, "#define without macro name");
}
-#line 2118 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2114 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 36:
-#line 463 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 459 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
glcpp_error (& (yylsp[-3]), parser, "Illegal non-directive after #");
}
-#line 2126 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2122 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 37:
-#line 469 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 465 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
if (strlen ((yyvsp[0].str)) >= 3 && strncmp ((yyvsp[0].str), "0x", 2) == 0) {
(yyval.ival) = strtoll ((yyvsp[0].str) + 2, NULL, 16);
@@ -2136,19 +2132,19 @@
(yyval.ival) = strtoll ((yyvsp[0].str), NULL, 10);
}
}
-#line 2140 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2136 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 38:
-#line 478 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 474 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.ival) = (yyvsp[0].ival);
}
-#line 2148 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2144 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 39:
-#line 483 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 479 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
/* Both octal and hexadecimal constants begin with 0. */
if ((yyvsp[0].str)[0] == '0' && (yyvsp[0].str)[1] != '\0') {
@@ -2158,20 +2154,20 @@
(yyval.ival) = strtoll((yyvsp[0].str), NULL, 10);
}
}
-#line 2162 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2158 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 40:
-#line 494 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 490 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[0].ival);
(yyval.expression_value).undefined_macro = NULL;
}
-#line 2171 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2167 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 41:
-#line 498 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 494 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = 0;
if (parser->is_gles)
@@ -2179,11 +2175,11 @@
else
(yyval.expression_value).undefined_macro = NULL;
}
-#line 2183 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2179 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 42:
-#line 505 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 501 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value || (yyvsp[0].expression_value).value;
@@ -2195,11 +2191,11 @@
else if (! (yyvsp[-2].expression_value).value)
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2199 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2195 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 43:
-#line 516 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 512 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value && (yyvsp[0].expression_value).value;
@@ -2211,11 +2207,11 @@
else if ((yyvsp[-2].expression_value).value)
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2215 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2211 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 44:
-#line 527 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 523 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value | (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2223,11 +2219,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2227 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2223 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 45:
-#line 534 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 530 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value ^ (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2235,11 +2231,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2239 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2235 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 46:
-#line 541 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 537 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value & (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2247,11 +2243,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2251 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2247 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 47:
-#line 548 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 544 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value != (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2259,11 +2255,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2263 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2259 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 48:
-#line 555 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 551 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value == (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2271,11 +2267,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2275 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2271 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 49:
-#line 562 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 558 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value >= (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2283,11 +2279,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2287 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2283 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 50:
-#line 569 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 565 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value <= (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2295,11 +2291,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2299 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2295 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 51:
-#line 576 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 572 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value > (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2307,11 +2303,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2311 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2307 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 52:
-#line 583 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 579 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value < (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2319,11 +2315,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2323 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2319 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 53:
-#line 590 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 586 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value >> (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2331,11 +2327,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2335 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2331 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 54:
-#line 597 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 593 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value << (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2343,11 +2339,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2347 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2343 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 55:
-#line 604 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 600 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value - (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2355,11 +2351,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2359 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2355 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 56:
-#line 611 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 607 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value + (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2367,11 +2363,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2371 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2367 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 57:
-#line 618 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 614 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
if ((yyvsp[0].expression_value).value == 0) {
yyerror (& (yylsp[-2]), parser,
@@ -2384,11 +2380,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2388 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2384 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 58:
-#line 630 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 626 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
if ((yyvsp[0].expression_value).value == 0) {
yyerror (& (yylsp[-2]), parser,
@@ -2401,11 +2397,11 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2405 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2401 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 59:
-#line 642 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 638 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = (yyvsp[-2].expression_value).value * (yyvsp[0].expression_value).value;
if ((yyvsp[-2].expression_value).undefined_macro)
@@ -2413,364 +2409,364 @@
else
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2417 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2413 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 60:
-#line 649 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 645 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = ! (yyvsp[0].expression_value).value;
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2426 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2422 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 61:
-#line 653 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 649 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = ~ (yyvsp[0].expression_value).value;
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2435 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2431 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 62:
-#line 657 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 653 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = - (yyvsp[0].expression_value).value;
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2444 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2440 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 63:
-#line 661 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 657 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value).value = + (yyvsp[0].expression_value).value;
(yyval.expression_value).undefined_macro = (yyvsp[0].expression_value).undefined_macro;
}
-#line 2453 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2449 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 64:
-#line 665 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 661 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.expression_value) = (yyvsp[-1].expression_value);
}
-#line 2461 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2457 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 65:
-#line 671 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 667 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.string_list) = _string_list_create (parser);
_string_list_append_item (parser, (yyval.string_list), (yyvsp[0].str));
}
-#line 2470 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2466 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 66:
-#line 675 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 671 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.string_list) = (yyvsp[-2].string_list);
_string_list_append_item (parser, (yyval.string_list), (yyvsp[0].str));
}
-#line 2479 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2475 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 67:
-#line 682 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 678 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.token_list) = NULL; }
-#line 2485 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2481 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 69:
-#line 687 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 683 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.token_list) = NULL; }
-#line 2491 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2487 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 72:
-#line 693 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 689 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
glcpp_error(&(yylsp[0]), parser, "extra tokens at end of directive");
}
-#line 2499 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2495 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 73:
-#line 699 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 695 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
parser->space_tokens = 1;
(yyval.token_list) = _token_list_create (parser);
_token_list_append (parser, (yyval.token_list), (yyvsp[0].token));
}
-#line 2509 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2505 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 74:
-#line 704 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 700 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.token_list) = (yyvsp[-1].token_list);
_token_list_append (parser, (yyval.token_list), (yyvsp[0].token));
}
-#line 2518 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2514 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 75:
-#line 711 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 707 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.token) = _token_create_str (parser, IDENTIFIER, (yyvsp[0].str));
(yyval.token)->location = yylloc;
}
-#line 2527 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2523 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 76:
-#line 715 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 711 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.token) = _token_create_str (parser, INTEGER_STRING, (yyvsp[0].str));
(yyval.token)->location = yylloc;
}
-#line 2536 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2532 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 77:
-#line 719 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 715 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.token) = _token_create_ival (parser, (yyvsp[0].ival), (yyvsp[0].ival));
(yyval.token)->location = yylloc;
}
-#line 2545 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2541 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 78:
-#line 723 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 719 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.token) = _token_create_ival (parser, DEFINED, DEFINED);
(yyval.token)->location = yylloc;
}
-#line 2554 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2550 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 79:
-#line 727 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 723 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.token) = _token_create_str (parser, OTHER, (yyvsp[0].str));
(yyval.token)->location = yylloc;
}
-#line 2563 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2559 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 80:
-#line 731 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 727 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{
(yyval.token) = _token_create_ival (parser, SPACE, SPACE);
(yyval.token)->location = yylloc;
}
-#line 2572 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2568 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 81:
-#line 738 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 734 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '['; }
-#line 2578 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2574 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 82:
-#line 739 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 735 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = ']'; }
-#line 2584 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2580 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 83:
-#line 740 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 736 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '('; }
-#line 2590 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2586 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 84:
-#line 741 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 737 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = ')'; }
-#line 2596 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2592 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 85:
-#line 742 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 738 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '{'; }
-#line 2602 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2598 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 86:
-#line 743 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 739 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '}'; }
-#line 2608 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2604 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 87:
-#line 744 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 740 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '.'; }
-#line 2614 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2610 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 88:
-#line 745 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 741 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '&'; }
-#line 2620 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2616 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 89:
-#line 746 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 742 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '*'; }
-#line 2626 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2622 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 90:
-#line 747 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 743 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '+'; }
-#line 2632 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2628 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 91:
-#line 748 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 744 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '-'; }
-#line 2638 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2634 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 92:
-#line 749 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 745 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '~'; }
-#line 2644 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2640 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 93:
-#line 750 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 746 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '!'; }
-#line 2650 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2646 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 94:
-#line 751 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 747 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '/'; }
-#line 2656 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2652 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 95:
-#line 752 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 748 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '%'; }
-#line 2662 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2658 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 96:
-#line 753 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 749 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = LEFT_SHIFT; }
-#line 2668 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2664 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 97:
-#line 754 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 750 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = RIGHT_SHIFT; }
-#line 2674 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2670 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 98:
-#line 755 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 751 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '<'; }
-#line 2680 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2676 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 99:
-#line 756 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 752 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '>'; }
-#line 2686 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2682 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 100:
-#line 757 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 753 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = LESS_OR_EQUAL; }
-#line 2692 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2688 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 101:
-#line 758 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 754 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = GREATER_OR_EQUAL; }
-#line 2698 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2694 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 102:
-#line 759 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 755 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = EQUAL; }
-#line 2704 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2700 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 103:
-#line 760 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 756 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = NOT_EQUAL; }
-#line 2710 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2706 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 104:
-#line 761 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 757 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '^'; }
-#line 2716 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2712 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 105:
-#line 762 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 758 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '|'; }
-#line 2722 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2718 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 106:
-#line 763 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 759 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = AND; }
-#line 2728 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2724 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 107:
-#line 764 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 760 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = OR; }
-#line 2734 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2730 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 108:
-#line 765 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 761 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = ';'; }
-#line 2740 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2736 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 109:
-#line 766 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 762 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = ','; }
-#line 2746 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2742 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 110:
-#line 767 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 763 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = '='; }
-#line 2752 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2748 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 111:
-#line 768 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 764 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = PASTE; }
-#line 2758 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2754 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 112:
-#line 769 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 765 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = PLUS_PLUS; }
-#line 2764 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2760 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
case 113:
-#line 770 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
+#line 766 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1646 */
{ (yyval.ival) = MINUS_MINUS; }
-#line 2770 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2766 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
break;
-#line 2774 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
+#line 2770 "glsl/glcpp/glcpp-parse.c" /* yacc.c:1646 */
default: break;
}
/* User semantic actions sometimes alter yychar, and that requires
@@ -3005,7 +3001,7 @@
#endif
return yyresult;
}
-#line 773 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1906 */
+#line 769 "./glsl/glcpp/glcpp-parse.y" /* yacc.c:1906 */
string_list_t *
@@ -3363,60 +3359,60 @@
}
static void
-_token_print(char **out, size_t *len, token_t *token)
+_token_print(struct _mesa_string_buffer *out, token_t *token)
{
if (token->type < 256) {
- ralloc_asprintf_rewrite_tail (out, len, "%c", token->type);
+ _mesa_string_buffer_append_char(out, token->type);
return;
}
switch (token->type) {
case INTEGER:
- ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival);
+ _mesa_string_buffer_printf(out, "%" PRIiMAX, token->value.ival);
break;
case IDENTIFIER:
case INTEGER_STRING:
case OTHER:
- ralloc_asprintf_rewrite_tail (out, len, "%s", token->value.str);
+ _mesa_string_buffer_append(out, token->value.str);
break;
case SPACE:
- ralloc_asprintf_rewrite_tail (out, len, " ");
+ _mesa_string_buffer_append_char(out, ' ');
break;
case LEFT_SHIFT:
- ralloc_asprintf_rewrite_tail (out, len, "<<");
+ _mesa_string_buffer_append(out, "<<");
break;
case RIGHT_SHIFT:
- ralloc_asprintf_rewrite_tail (out, len, ">>");
+ _mesa_string_buffer_append(out, ">>");
break;
case LESS_OR_EQUAL:
- ralloc_asprintf_rewrite_tail (out, len, "<=");
+ _mesa_string_buffer_append(out, "<=");
break;
case GREATER_OR_EQUAL:
- ralloc_asprintf_rewrite_tail (out, len, ">=");
+ _mesa_string_buffer_append(out, ">=");
break;
case EQUAL:
- ralloc_asprintf_rewrite_tail (out, len, "==");
+ _mesa_string_buffer_append(out, "==");
break;
case NOT_EQUAL:
- ralloc_asprintf_rewrite_tail (out, len, "!=");
+ _mesa_string_buffer_append(out, "!=");
break;
case AND:
- ralloc_asprintf_rewrite_tail (out, len, "&&");
+ _mesa_string_buffer_append(out, "&&");
break;
case OR:
- ralloc_asprintf_rewrite_tail (out, len, "||");
+ _mesa_string_buffer_append(out, "||");
break;
case PASTE:
- ralloc_asprintf_rewrite_tail (out, len, "##");
+ _mesa_string_buffer_append(out, "##");
break;
case PLUS_PLUS:
- ralloc_asprintf_rewrite_tail (out, len, "++");
+ _mesa_string_buffer_append(out, "++");
break;
case MINUS_MINUS:
- ralloc_asprintf_rewrite_tail (out, len, "--");
+ _mesa_string_buffer_append(out, "--");
break;
case DEFINED:
- ralloc_asprintf_rewrite_tail (out, len, "defined");
+ _mesa_string_buffer_append(out, "defined");
break;
case PLACEHOLDER:
/* Nothing to print. */
@@ -3543,11 +3539,11 @@
FAIL:
glcpp_error (&token->location, parser, "");
- ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "Pasting \"");
- _token_print (&parser->info_log, &parser->info_log_length, token);
- ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" and \"");
- _token_print (&parser->info_log, &parser->info_log_length, other);
- ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" does not give a valid preprocessing token.\n");
+ _mesa_string_buffer_append(parser->info_log, "Pasting \"");
+ _token_print(parser->info_log, token);
+ _mesa_string_buffer_append(parser->info_log, "\" and \"");
+ _token_print(parser->info_log, other);
+ _mesa_string_buffer_append(parser->info_log, "\" does not give a valid preprocessing token.\n");
return token;
}
@@ -3561,7 +3557,7 @@
return;
for (node = list->head; node; node = node->next)
- _token_print (&parser->output, &parser->output_length, node->token);
+ _token_print(parser->output, node->token);
}
void
@@ -3583,6 +3579,11 @@
_define_object_macro(parser, NULL, name, list);
}
+/* Initial output buffer size, 4096 minus ralloc() overhead. It was selected
+ * to minimize total amount of allocated memory during shader-db run.
+ */
+#define INITIAL_PP_OUTPUT_BUF_SIZE 4048
+
glcpp_parser_t *
glcpp_parser_create(const struct gl_extensions *extension_list,
glcpp_extension_iterator extensions, void *state, gl_api api)
@@ -3613,10 +3614,10 @@
parser->lex_from_list = NULL;
parser->lex_from_node = NULL;
- parser->output = ralloc_strdup(parser, "");
- parser->output_length = 0;
- parser->info_log = ralloc_strdup(parser, "");
- parser->info_log_length = 0;
+ parser->output = _mesa_string_buffer_create(parser,
+ INITIAL_PP_OUTPUT_BUF_SIZE);
+ parser->info_log = _mesa_string_buffer_create(parser,
+ INITIAL_PP_OUTPUT_BUF_SIZE);
parser->error = 0;
parser->extensions = extensions;
@@ -4607,10 +4608,10 @@
}
if (explicitly_set) {
- ralloc_asprintf_rewrite_tail(&parser->output, &parser->output_length,
- "#version %" PRIiMAX "%s%s", version,
- es_identifier ? " " : "",
- es_identifier ? es_identifier : "");
+ _mesa_string_buffer_printf(parser->output,
+ "#version %" PRIiMAX "%s%s", version,
+ es_identifier ? " " : "",
+ es_identifier ? es_identifier : "");
}
}
diff -Nru mesa-17.2.4/src/compiler/glsl/glcpp/glcpp-parse.y mesa-17.3.3/src/compiler/glsl/glcpp/glcpp-parse.y
--- mesa-17.2.4/src/compiler/glsl/glcpp/glcpp-parse.y 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glcpp/glcpp-parse.y 2018-01-18 21:30:28.000000000 +0000
@@ -209,7 +209,7 @@
| SPACE control_line
| text_line {
_glcpp_parser_print_expanded_token_list (parser, $1);
- ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n");
+ _mesa_string_buffer_append_char(parser->output, '\n');
}
| expanded_line
;
@@ -228,20 +228,16 @@
| LINE_EXPANDED integer_constant NEWLINE {
parser->has_new_line_number = 1;
parser->new_line_number = $2;
- ralloc_asprintf_rewrite_tail (&parser->output,
- &parser->output_length,
- "#line %" PRIiMAX "\n",
- $2);
+ _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2);
}
| LINE_EXPANDED integer_constant integer_constant NEWLINE {
parser->has_new_line_number = 1;
parser->new_line_number = $2;
parser->has_new_source_number = 1;
parser->new_source_number = $3;
- ralloc_asprintf_rewrite_tail (&parser->output,
- &parser->output_length,
- "#line %" PRIiMAX " %" PRIiMAX "\n",
- $2, $3);
+ _mesa_string_buffer_printf(parser->output,
+ "#line %" PRIiMAX " %" PRIiMAX "\n",
+ $2, $3);
}
;
@@ -259,7 +255,7 @@
control_line:
control_line_success {
- ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n");
+ _mesa_string_buffer_append_char(parser->output, '\n');
}
| control_line_error
| HASH_TOKEN LINE pp_tokens NEWLINE {
@@ -449,7 +445,7 @@
glcpp_parser_resolve_implicit_version(parser);
}
| HASH_TOKEN PRAGMA NEWLINE {
- ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "#%s", $2);
+ _mesa_string_buffer_printf(parser->output, "#%s", $2);
}
;
@@ -1127,60 +1123,60 @@
}
static void
-_token_print(char **out, size_t *len, token_t *token)
+_token_print(struct _mesa_string_buffer *out, token_t *token)
{
if (token->type < 256) {
- ralloc_asprintf_rewrite_tail (out, len, "%c", token->type);
+ _mesa_string_buffer_append_char(out, token->type);
return;
}
switch (token->type) {
case INTEGER:
- ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival);
+ _mesa_string_buffer_printf(out, "%" PRIiMAX, token->value.ival);
break;
case IDENTIFIER:
case INTEGER_STRING:
case OTHER:
- ralloc_asprintf_rewrite_tail (out, len, "%s", token->value.str);
+ _mesa_string_buffer_append(out, token->value.str);
break;
case SPACE:
- ralloc_asprintf_rewrite_tail (out, len, " ");
+ _mesa_string_buffer_append_char(out, ' ');
break;
case LEFT_SHIFT:
- ralloc_asprintf_rewrite_tail (out, len, "<<");
+ _mesa_string_buffer_append(out, "<<");
break;
case RIGHT_SHIFT:
- ralloc_asprintf_rewrite_tail (out, len, ">>");
+ _mesa_string_buffer_append(out, ">>");
break;
case LESS_OR_EQUAL:
- ralloc_asprintf_rewrite_tail (out, len, "<=");
+ _mesa_string_buffer_append(out, "<=");
break;
case GREATER_OR_EQUAL:
- ralloc_asprintf_rewrite_tail (out, len, ">=");
+ _mesa_string_buffer_append(out, ">=");
break;
case EQUAL:
- ralloc_asprintf_rewrite_tail (out, len, "==");
+ _mesa_string_buffer_append(out, "==");
break;
case NOT_EQUAL:
- ralloc_asprintf_rewrite_tail (out, len, "!=");
+ _mesa_string_buffer_append(out, "!=");
break;
case AND:
- ralloc_asprintf_rewrite_tail (out, len, "&&");
+ _mesa_string_buffer_append(out, "&&");
break;
case OR:
- ralloc_asprintf_rewrite_tail (out, len, "||");
+ _mesa_string_buffer_append(out, "||");
break;
case PASTE:
- ralloc_asprintf_rewrite_tail (out, len, "##");
+ _mesa_string_buffer_append(out, "##");
break;
case PLUS_PLUS:
- ralloc_asprintf_rewrite_tail (out, len, "++");
+ _mesa_string_buffer_append(out, "++");
break;
case MINUS_MINUS:
- ralloc_asprintf_rewrite_tail (out, len, "--");
+ _mesa_string_buffer_append(out, "--");
break;
case DEFINED:
- ralloc_asprintf_rewrite_tail (out, len, "defined");
+ _mesa_string_buffer_append(out, "defined");
break;
case PLACEHOLDER:
/* Nothing to print. */
@@ -1307,11 +1303,11 @@
FAIL:
glcpp_error (&token->location, parser, "");
- ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "Pasting \"");
- _token_print (&parser->info_log, &parser->info_log_length, token);
- ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" and \"");
- _token_print (&parser->info_log, &parser->info_log_length, other);
- ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" does not give a valid preprocessing token.\n");
+ _mesa_string_buffer_append(parser->info_log, "Pasting \"");
+ _token_print(parser->info_log, token);
+ _mesa_string_buffer_append(parser->info_log, "\" and \"");
+ _token_print(parser->info_log, other);
+ _mesa_string_buffer_append(parser->info_log, "\" does not give a valid preprocessing token.\n");
return token;
}
@@ -1325,7 +1321,7 @@
return;
for (node = list->head; node; node = node->next)
- _token_print (&parser->output, &parser->output_length, node->token);
+ _token_print(parser->output, node->token);
}
void
@@ -1347,6 +1343,11 @@
_define_object_macro(parser, NULL, name, list);
}
+/* Initial output buffer size, 4096 minus ralloc() overhead. It was selected
+ * to minimize total amount of allocated memory during shader-db run.
+ */
+#define INITIAL_PP_OUTPUT_BUF_SIZE 4048
+
glcpp_parser_t *
glcpp_parser_create(const struct gl_extensions *extension_list,
glcpp_extension_iterator extensions, void *state, gl_api api)
@@ -1377,10 +1378,10 @@
parser->lex_from_list = NULL;
parser->lex_from_node = NULL;
- parser->output = ralloc_strdup(parser, "");
- parser->output_length = 0;
- parser->info_log = ralloc_strdup(parser, "");
- parser->info_log_length = 0;
+ parser->output = _mesa_string_buffer_create(parser,
+ INITIAL_PP_OUTPUT_BUF_SIZE);
+ parser->info_log = _mesa_string_buffer_create(parser,
+ INITIAL_PP_OUTPUT_BUF_SIZE);
parser->error = 0;
parser->extensions = extensions;
@@ -2371,10 +2372,10 @@
}
if (explicitly_set) {
- ralloc_asprintf_rewrite_tail(&parser->output, &parser->output_length,
- "#version %" PRIiMAX "%s%s", version,
- es_identifier ? " " : "",
- es_identifier ? es_identifier : "");
+ _mesa_string_buffer_printf(parser->output,
+ "#version %" PRIiMAX "%s%s", version,
+ es_identifier ? " " : "",
+ es_identifier ? es_identifier : "");
}
}
diff -Nru mesa-17.2.4/src/compiler/glsl/glcpp/meson.build mesa-17.3.3/src/compiler/glsl/glcpp/meson.build
--- mesa-17.2.4/src/compiler/glsl/glcpp/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glcpp/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,56 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+glcpp_parse = custom_target(
+ 'glcpp-parse.[ch]',
+ input : 'glcpp-parse.y',
+ output : ['glcpp-parse.c', 'glcpp-parse.h'],
+ command : [prog_bison, '-o', '@OUTPUT0@', '-p', 'glcpp_parser_',
+ '--defines=@OUTPUT1@', '@INPUT@'],
+)
+
+glcpp_lex = custom_target(
+ 'glcpp-lex.c',
+ input : 'glcpp-lex.l',
+ output : 'glcpp-lex.c',
+ command : [prog_flex, '-o', '@OUTPUT@', '@INPUT@'],
+)
+
+libglcpp = static_library(
+ 'glcpp',
+ [glcpp_lex, glcpp_parse, files('glcpp.h', 'pp.c')],
+ link_with : libmesa_util,
+ include_directories : [inc_common],
+ c_args : [c_vis_args, no_override_init_args, c_msvc_compat_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ build_by_default : false,
+)
+
+glcpp = executable(
+ 'glcpp',
+ 'glcpp.c',
+ dependencies : [dep_m],
+ include_directories : [inc_common],
+ link_with : [libglcpp, libglsl_util],
+ c_args : [c_vis_args, no_override_init_args, c_msvc_compat_args],
+ build_by_default : false,
+)
+
+# TODO: figure out how to make all of these tests work.
diff -Nru mesa-17.2.4/src/compiler/glsl/glcpp/pp.c mesa-17.3.3/src/compiler/glsl/glcpp/pp.c
--- mesa-17.2.4/src/compiler/glsl/glcpp/pp.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glcpp/pp.c 2018-01-18 21:30:28.000000000 +0000
@@ -32,20 +32,16 @@
va_list ap;
parser->error = 1;
- ralloc_asprintf_rewrite_tail(&parser->info_log,
- &parser->info_log_length,
- "%u:%u(%u): "
- "preprocessor error: ",
- locp->source,
- locp->first_line,
- locp->first_column);
+ _mesa_string_buffer_printf(parser->info_log,
+ "%u:%u(%u): "
+ "preprocessor error: ",
+ locp->source,
+ locp->first_line,
+ locp->first_column);
va_start(ap, fmt);
- ralloc_vasprintf_rewrite_tail(&parser->info_log,
- &parser->info_log_length,
- fmt, ap);
+ _mesa_string_buffer_vprintf(parser->info_log, fmt, ap);
va_end(ap);
- ralloc_asprintf_rewrite_tail(&parser->info_log,
- &parser->info_log_length, "\n");
+ _mesa_string_buffer_append_char(parser->info_log, '\n');
}
void
@@ -53,20 +49,16 @@
{
va_list ap;
- ralloc_asprintf_rewrite_tail(&parser->info_log,
- &parser->info_log_length,
+ _mesa_string_buffer_printf(parser->info_log,
"%u:%u(%u): "
"preprocessor warning: ",
locp->source,
locp->first_line,
locp->first_column);
va_start(ap, fmt);
- ralloc_vasprintf_rewrite_tail(&parser->info_log,
- &parser->info_log_length,
- fmt, ap);
+ _mesa_string_buffer_vprintf(parser->info_log, fmt, ap);
va_end(ap);
- ralloc_asprintf_rewrite_tail(&parser->info_log,
- &parser->info_log_length, "\n");
+ _mesa_string_buffer_append_char(parser->info_log, '\n');
}
/* Given str, (that's expected to start with a newline terminator of some
@@ -105,17 +97,25 @@
return ret;
}
+/* Initial output buffer size, 4096 minus ralloc() overhead. It was selected
+ * to minimize total amount of allocated memory during shader-db run.
+ */
+#define INITIAL_PP_OUTPUT_BUF_SIZE 4048
+
/* Remove any line continuation characters in the shader, (whether in
* preprocessing directives or in GLSL code).
*/
static char *
remove_line_continuations(glcpp_parser_t *ctx, const char *shader)
{
- char *clean = ralloc_strdup(ctx, "");
+ struct _mesa_string_buffer *sb =
+ _mesa_string_buffer_create(ctx, INITIAL_PP_OUTPUT_BUF_SIZE);
+
const char *backslash, *newline, *search_start;
const char *cr, *lf;
char newline_separator[3];
int collapsed_newlines = 0;
+ int separator_len;
backslash = strchr(shader, '\\');
@@ -161,6 +161,7 @@
newline_separator[0] = '\n';
newline_separator[1] = '\r';
}
+ separator_len = strlen(newline_separator);
while (true) {
/* If we have previously collapsed any line-continuations,
@@ -180,10 +181,12 @@
if (newline &&
(backslash == NULL || newline < backslash))
{
- ralloc_strncat(&clean, shader,
- newline - shader + 1);
+ _mesa_string_buffer_append_len(sb, shader,
+ newline - shader + 1);
while (collapsed_newlines) {
- ralloc_strcat(&clean, newline_separator);
+ _mesa_string_buffer_append_len(sb,
+ newline_separator,
+ separator_len);
collapsed_newlines--;
}
shader = skip_newline (newline);
@@ -204,7 +207,7 @@
if (backslash[1] == '\r' || backslash[1] == '\n')
{
collapsed_newlines++;
- ralloc_strncat(&clean, shader, backslash - shader);
+ _mesa_string_buffer_append_len(sb, shader, backslash - shader);
shader = skip_newline (backslash + 1);
search_start = shader;
}
@@ -212,9 +215,9 @@
backslash = strchr(search_start, '\\');
}
- ralloc_strcat(&clean, shader);
+ _mesa_string_buffer_append(sb, shader);
- return clean;
+ return sb->buf;
}
int
@@ -238,10 +241,13 @@
glcpp_parser_resolve_implicit_version(parser);
- ralloc_strcat(info_log, parser->info_log);
+ ralloc_strcat(info_log, parser->info_log->buf);
+
+ /* Crimp the buffer first, to conserve memory */
+ _mesa_string_buffer_crimp_to_fit(parser->output);
- ralloc_steal(ralloc_ctx, parser->output);
- *shader = parser->output;
+ ralloc_steal(ralloc_ctx, parser->output->buf);
+ *shader = parser->output->buf;
errors = parser->error;
glcpp_parser_destroy (parser);
diff -Nru mesa-17.2.4/src/compiler/glsl/glsl_lexer.cpp mesa-17.3.3/src/compiler/glsl/glsl_lexer.cpp
--- mesa-17.2.4/src/compiler/glsl/glsl_lexer.cpp 2017-10-30 14:50:46.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glsl_lexer.cpp 2018-01-18 21:31:04.000000000 +0000
@@ -1176,6 +1176,7 @@
#define YY_NO_UNISTD_H
#endif
+#define YY_NO_INPUT
#define YY_USER_ACTION \
do { \
yylloc->first_column = yycolumn + 1; \
@@ -1222,8 +1223,13 @@
"illegal use of reserved word `%s'", yytext); \
return ERROR_TOK; \
} else { \
- void *mem_ctx = yyextra->linalloc; \
- yylval->identifier = linear_strdup(mem_ctx, yytext); \
+ /* We're not doing linear_strdup here, to avoid an implicit \
+ * call on strlen() for the length of the string, as this is \
+ * already found by flex and stored in yyleng */ \
+ void *mem_ctx = yyextra->linalloc; \
+ char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1); \
+ memcpy(id, yytext, yyleng + 1); \
+ yylval->identifier = id; \
return classify_identifier(yyextra, yytext); \
} \
} while (0)
@@ -1302,7 +1308,7 @@
* update the "Internal compiler error" catch-all rule near the end of
* this file. */
-#line 1306 "glsl/glsl_lexer.cpp"
+#line 1312 "glsl/glsl_lexer.cpp"
#define INITIAL 0
#define PP 1
@@ -1588,10 +1594,10 @@
}
{
-#line 180 "./glsl/glsl_lexer.ll"
+#line 186 "./glsl/glsl_lexer.ll"
-#line 1595 "glsl/glsl_lexer.cpp"
+#line 1601 "glsl/glsl_lexer.cpp"
while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
{
@@ -1647,7 +1653,7 @@
case 1:
YY_RULE_SETUP
-#line 182 "./glsl/glsl_lexer.ll"
+#line 188 "./glsl/glsl_lexer.ll"
;
YY_BREAK
/* Preprocessor tokens. */
@@ -1656,17 +1662,17 @@
yyg->yy_c_buf_p = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up yytext again */
YY_RULE_SETUP
-#line 185 "./glsl/glsl_lexer.ll"
+#line 191 "./glsl/glsl_lexer.ll"
;
YY_BREAK
case 3:
YY_RULE_SETUP
-#line 186 "./glsl/glsl_lexer.ll"
+#line 192 "./glsl/glsl_lexer.ll"
{ BEGIN PP; return VERSION_TOK; }
YY_BREAK
case 4:
YY_RULE_SETUP
-#line 187 "./glsl/glsl_lexer.ll"
+#line 193 "./glsl/glsl_lexer.ll"
{ BEGIN PP; return EXTENSION; }
YY_BREAK
case 5:
@@ -1674,7 +1680,7 @@
yyg->yy_c_buf_p = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up yytext again */
YY_RULE_SETUP
-#line 188 "./glsl/glsl_lexer.ll"
+#line 194 "./glsl/glsl_lexer.ll"
{
/* Eat characters until the first digit is
* encountered
@@ -1705,7 +1711,7 @@
yyg->yy_c_buf_p = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up yytext again */
YY_RULE_SETUP
-#line 212 "./glsl/glsl_lexer.ll"
+#line 218 "./glsl/glsl_lexer.ll"
{
/* Eat characters until the first digit is
* encountered
@@ -1731,7 +1737,7 @@
YY_BREAK
case 7:
YY_RULE_SETUP
-#line 234 "./glsl/glsl_lexer.ll"
+#line 240 "./glsl/glsl_lexer.ll"
{
BEGIN PP;
return PRAGMA_DEBUG_ON;
@@ -1739,7 +1745,7 @@
YY_BREAK
case 8:
YY_RULE_SETUP
-#line 238 "./glsl/glsl_lexer.ll"
+#line 244 "./glsl/glsl_lexer.ll"
{
BEGIN PP;
return PRAGMA_DEBUG_OFF;
@@ -1747,7 +1753,7 @@
YY_BREAK
case 9:
YY_RULE_SETUP
-#line 242 "./glsl/glsl_lexer.ll"
+#line 248 "./glsl/glsl_lexer.ll"
{
BEGIN PP;
return PRAGMA_OPTIMIZE_ON;
@@ -1755,7 +1761,7 @@
YY_BREAK
case 10:
YY_RULE_SETUP
-#line 246 "./glsl/glsl_lexer.ll"
+#line 252 "./glsl/glsl_lexer.ll"
{
BEGIN PP;
return PRAGMA_OPTIMIZE_OFF;
@@ -1763,7 +1769,7 @@
YY_BREAK
case 11:
YY_RULE_SETUP
-#line 250 "./glsl/glsl_lexer.ll"
+#line 256 "./glsl/glsl_lexer.ll"
{
BEGIN PP;
return PRAGMA_INVARIANT_ALL;
@@ -1771,47 +1777,53 @@
YY_BREAK
case 12:
YY_RULE_SETUP
-#line 254 "./glsl/glsl_lexer.ll"
+#line 260 "./glsl/glsl_lexer.ll"
{ BEGIN PRAGMA; }
YY_BREAK
case 13:
/* rule 13 can match eol */
YY_RULE_SETUP
-#line 256 "./glsl/glsl_lexer.ll"
+#line 262 "./glsl/glsl_lexer.ll"
{ BEGIN 0; yylineno++; yycolumn = 0; }
YY_BREAK
case 14:
YY_RULE_SETUP
-#line 257 "./glsl/glsl_lexer.ll"
+#line 263 "./glsl/glsl_lexer.ll"
{ }
YY_BREAK
case 15:
YY_RULE_SETUP
-#line 259 "./glsl/glsl_lexer.ll"
+#line 265 "./glsl/glsl_lexer.ll"
{ }
YY_BREAK
case 16:
YY_RULE_SETUP
-#line 260 "./glsl/glsl_lexer.ll"
+#line 266 "./glsl/glsl_lexer.ll"
{ }
YY_BREAK
case 17:
YY_RULE_SETUP
-#line 261 "./glsl/glsl_lexer.ll"
+#line 267 "./glsl/glsl_lexer.ll"
return COLON;
YY_BREAK
case 18:
YY_RULE_SETUP
-#line 262 "./glsl/glsl_lexer.ll"
+#line 268 "./glsl/glsl_lexer.ll"
{
- void *mem_ctx = yyextra->linalloc;
- yylval->identifier = linear_strdup(mem_ctx, yytext);
+ /* We're not doing linear_strdup here, to avoid an implicit call
+ * on strlen() for the length of the string, as this is already
+ * found by flex and stored in yyleng
+ */
+ void *mem_ctx = yyextra->linalloc;
+ char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1);
+ memcpy(id, yytext, yyleng + 1);
+ yylval->identifier = id;
return IDENTIFIER;
}
YY_BREAK
case 19:
YY_RULE_SETUP
-#line 267 "./glsl/glsl_lexer.ll"
+#line 279 "./glsl/glsl_lexer.ll"
{
yylval->n = strtol(yytext, NULL, 10);
return INTCONSTANT;
@@ -1819,7 +1831,7 @@
YY_BREAK
case 20:
YY_RULE_SETUP
-#line 271 "./glsl/glsl_lexer.ll"
+#line 283 "./glsl/glsl_lexer.ll"
{
yylval->n = 0;
return INTCONSTANT;
@@ -1828,388 +1840,388 @@
case 21:
/* rule 21 can match eol */
YY_RULE_SETUP
-#line 275 "./glsl/glsl_lexer.ll"
+#line 287 "./glsl/glsl_lexer.ll"
{ BEGIN 0; yylineno++; yycolumn = 0; return EOL; }
YY_BREAK
case 22:
YY_RULE_SETUP
-#line 276 "./glsl/glsl_lexer.ll"
+#line 288 "./glsl/glsl_lexer.ll"
{ return yytext[0]; }
YY_BREAK
case 23:
/* rule 23 can match eol */
YY_RULE_SETUP
-#line 278 "./glsl/glsl_lexer.ll"
+#line 290 "./glsl/glsl_lexer.ll"
{ yylineno++; yycolumn = 0; }
YY_BREAK
case 24:
YY_RULE_SETUP
-#line 280 "./glsl/glsl_lexer.ll"
+#line 292 "./glsl/glsl_lexer.ll"
DEPRECATED_ES_KEYWORD(ATTRIBUTE);
YY_BREAK
case 25:
YY_RULE_SETUP
-#line 281 "./glsl/glsl_lexer.ll"
+#line 293 "./glsl/glsl_lexer.ll"
return CONST_TOK;
YY_BREAK
case 26:
YY_RULE_SETUP
-#line 282 "./glsl/glsl_lexer.ll"
+#line 294 "./glsl/glsl_lexer.ll"
return BOOL_TOK;
YY_BREAK
case 27:
YY_RULE_SETUP
-#line 283 "./glsl/glsl_lexer.ll"
+#line 295 "./glsl/glsl_lexer.ll"
return FLOAT_TOK;
YY_BREAK
case 28:
YY_RULE_SETUP
-#line 284 "./glsl/glsl_lexer.ll"
+#line 296 "./glsl/glsl_lexer.ll"
return INT_TOK;
YY_BREAK
case 29:
YY_RULE_SETUP
-#line 285 "./glsl/glsl_lexer.ll"
+#line 297 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, UINT_TOK);
YY_BREAK
case 30:
YY_RULE_SETUP
-#line 287 "./glsl/glsl_lexer.ll"
+#line 299 "./glsl/glsl_lexer.ll"
return BREAK;
YY_BREAK
case 31:
YY_RULE_SETUP
-#line 288 "./glsl/glsl_lexer.ll"
+#line 300 "./glsl/glsl_lexer.ll"
return CONTINUE;
YY_BREAK
case 32:
YY_RULE_SETUP
-#line 289 "./glsl/glsl_lexer.ll"
+#line 301 "./glsl/glsl_lexer.ll"
return DO;
YY_BREAK
case 33:
YY_RULE_SETUP
-#line 290 "./glsl/glsl_lexer.ll"
+#line 302 "./glsl/glsl_lexer.ll"
return WHILE;
YY_BREAK
case 34:
YY_RULE_SETUP
-#line 291 "./glsl/glsl_lexer.ll"
+#line 303 "./glsl/glsl_lexer.ll"
return ELSE;
YY_BREAK
case 35:
YY_RULE_SETUP
-#line 292 "./glsl/glsl_lexer.ll"
+#line 304 "./glsl/glsl_lexer.ll"
return FOR;
YY_BREAK
case 36:
YY_RULE_SETUP
-#line 293 "./glsl/glsl_lexer.ll"
+#line 305 "./glsl/glsl_lexer.ll"
return IF;
YY_BREAK
case 37:
YY_RULE_SETUP
-#line 294 "./glsl/glsl_lexer.ll"
+#line 306 "./glsl/glsl_lexer.ll"
return DISCARD;
YY_BREAK
case 38:
YY_RULE_SETUP
-#line 295 "./glsl/glsl_lexer.ll"
+#line 307 "./glsl/glsl_lexer.ll"
return RETURN;
YY_BREAK
case 39:
YY_RULE_SETUP
-#line 297 "./glsl/glsl_lexer.ll"
+#line 309 "./glsl/glsl_lexer.ll"
return BVEC2;
YY_BREAK
case 40:
YY_RULE_SETUP
-#line 298 "./glsl/glsl_lexer.ll"
+#line 310 "./glsl/glsl_lexer.ll"
return BVEC3;
YY_BREAK
case 41:
YY_RULE_SETUP
-#line 299 "./glsl/glsl_lexer.ll"
+#line 311 "./glsl/glsl_lexer.ll"
return BVEC4;
YY_BREAK
case 42:
YY_RULE_SETUP
-#line 300 "./glsl/glsl_lexer.ll"
+#line 312 "./glsl/glsl_lexer.ll"
return IVEC2;
YY_BREAK
case 43:
YY_RULE_SETUP
-#line 301 "./glsl/glsl_lexer.ll"
+#line 313 "./glsl/glsl_lexer.ll"
return IVEC3;
YY_BREAK
case 44:
YY_RULE_SETUP
-#line 302 "./glsl/glsl_lexer.ll"
+#line 314 "./glsl/glsl_lexer.ll"
return IVEC4;
YY_BREAK
case 45:
YY_RULE_SETUP
-#line 303 "./glsl/glsl_lexer.ll"
+#line 315 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, UVEC2);
YY_BREAK
case 46:
YY_RULE_SETUP
-#line 304 "./glsl/glsl_lexer.ll"
+#line 316 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, UVEC3);
YY_BREAK
case 47:
YY_RULE_SETUP
-#line 305 "./glsl/glsl_lexer.ll"
+#line 317 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, UVEC4);
YY_BREAK
case 48:
YY_RULE_SETUP
-#line 306 "./glsl/glsl_lexer.ll"
+#line 318 "./glsl/glsl_lexer.ll"
return VEC2;
YY_BREAK
case 49:
YY_RULE_SETUP
-#line 307 "./glsl/glsl_lexer.ll"
+#line 319 "./glsl/glsl_lexer.ll"
return VEC3;
YY_BREAK
case 50:
YY_RULE_SETUP
-#line 308 "./glsl/glsl_lexer.ll"
+#line 320 "./glsl/glsl_lexer.ll"
return VEC4;
YY_BREAK
case 51:
YY_RULE_SETUP
-#line 309 "./glsl/glsl_lexer.ll"
+#line 321 "./glsl/glsl_lexer.ll"
return MAT2X2;
YY_BREAK
case 52:
YY_RULE_SETUP
-#line 310 "./glsl/glsl_lexer.ll"
+#line 322 "./glsl/glsl_lexer.ll"
return MAT3X3;
YY_BREAK
case 53:
YY_RULE_SETUP
-#line 311 "./glsl/glsl_lexer.ll"
+#line 323 "./glsl/glsl_lexer.ll"
return MAT4X4;
YY_BREAK
case 54:
YY_RULE_SETUP
-#line 312 "./glsl/glsl_lexer.ll"
+#line 324 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT2X2);
YY_BREAK
case 55:
YY_RULE_SETUP
-#line 313 "./glsl/glsl_lexer.ll"
+#line 325 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT2X3);
YY_BREAK
case 56:
YY_RULE_SETUP
-#line 314 "./glsl/glsl_lexer.ll"
+#line 326 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT2X4);
YY_BREAK
case 57:
YY_RULE_SETUP
-#line 315 "./glsl/glsl_lexer.ll"
+#line 327 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT3X2);
YY_BREAK
case 58:
YY_RULE_SETUP
-#line 316 "./glsl/glsl_lexer.ll"
+#line 328 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT3X3);
YY_BREAK
case 59:
YY_RULE_SETUP
-#line 317 "./glsl/glsl_lexer.ll"
+#line 329 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT3X4);
YY_BREAK
case 60:
YY_RULE_SETUP
-#line 318 "./glsl/glsl_lexer.ll"
+#line 330 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT4X2);
YY_BREAK
case 61:
YY_RULE_SETUP
-#line 319 "./glsl/glsl_lexer.ll"
+#line 331 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT4X3);
YY_BREAK
case 62:
YY_RULE_SETUP
-#line 320 "./glsl/glsl_lexer.ll"
+#line 332 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, MAT4X4);
YY_BREAK
case 63:
YY_RULE_SETUP
-#line 322 "./glsl/glsl_lexer.ll"
+#line 334 "./glsl/glsl_lexer.ll"
return IN_TOK;
YY_BREAK
case 64:
YY_RULE_SETUP
-#line 323 "./glsl/glsl_lexer.ll"
+#line 335 "./glsl/glsl_lexer.ll"
return OUT_TOK;
YY_BREAK
case 65:
YY_RULE_SETUP
-#line 324 "./glsl/glsl_lexer.ll"
+#line 336 "./glsl/glsl_lexer.ll"
return INOUT_TOK;
YY_BREAK
case 66:
YY_RULE_SETUP
-#line 325 "./glsl/glsl_lexer.ll"
+#line 337 "./glsl/glsl_lexer.ll"
return UNIFORM;
YY_BREAK
case 67:
YY_RULE_SETUP
-#line 326 "./glsl/glsl_lexer.ll"
+#line 338 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 430, 310, yyextra->ARB_shader_storage_buffer_object_enable, BUFFER);
YY_BREAK
case 68:
YY_RULE_SETUP
-#line 327 "./glsl/glsl_lexer.ll"
+#line 339 "./glsl/glsl_lexer.ll"
DEPRECATED_ES_KEYWORD(VARYING);
YY_BREAK
case 69:
YY_RULE_SETUP
-#line 328 "./glsl/glsl_lexer.ll"
+#line 340 "./glsl/glsl_lexer.ll"
KEYWORD(120, 300, 120, 300, CENTROID);
YY_BREAK
case 70:
YY_RULE_SETUP
-#line 329 "./glsl/glsl_lexer.ll"
+#line 341 "./glsl/glsl_lexer.ll"
KEYWORD(120, 100, 120, 100, INVARIANT);
YY_BREAK
case 71:
YY_RULE_SETUP
-#line 330 "./glsl/glsl_lexer.ll"
+#line 342 "./glsl/glsl_lexer.ll"
KEYWORD(130, 100, 130, 300, FLAT);
YY_BREAK
case 72:
YY_RULE_SETUP
-#line 331 "./glsl/glsl_lexer.ll"
+#line 343 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, SMOOTH);
YY_BREAK
case 73:
YY_RULE_SETUP
-#line 332 "./glsl/glsl_lexer.ll"
+#line 344 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 0, NOPERSPECTIVE);
YY_BREAK
case 74:
YY_RULE_SETUP
-#line 333 "./glsl/glsl_lexer.ll"
+#line 345 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 300, 400, 320, yyextra->has_tessellation_shader(), PATCH);
YY_BREAK
case 75:
YY_RULE_SETUP
-#line 335 "./glsl/glsl_lexer.ll"
+#line 347 "./glsl/glsl_lexer.ll"
DEPRECATED_ES_KEYWORD(SAMPLER1D);
YY_BREAK
case 76:
YY_RULE_SETUP
-#line 336 "./glsl/glsl_lexer.ll"
+#line 348 "./glsl/glsl_lexer.ll"
return SAMPLER2D;
YY_BREAK
case 77:
YY_RULE_SETUP
-#line 337 "./glsl/glsl_lexer.ll"
+#line 349 "./glsl/glsl_lexer.ll"
return SAMPLER3D;
YY_BREAK
case 78:
YY_RULE_SETUP
-#line 338 "./glsl/glsl_lexer.ll"
+#line 350 "./glsl/glsl_lexer.ll"
return SAMPLERCUBE;
YY_BREAK
case 79:
YY_RULE_SETUP
-#line 339 "./glsl/glsl_lexer.ll"
+#line 351 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 0, SAMPLER1DARRAY);
YY_BREAK
case 80:
YY_RULE_SETUP
-#line 340 "./glsl/glsl_lexer.ll"
+#line 352 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, SAMPLER2DARRAY);
YY_BREAK
case 81:
YY_RULE_SETUP
-#line 341 "./glsl/glsl_lexer.ll"
+#line 353 "./glsl/glsl_lexer.ll"
DEPRECATED_ES_KEYWORD(SAMPLER1DSHADOW);
YY_BREAK
case 82:
YY_RULE_SETUP
-#line 342 "./glsl/glsl_lexer.ll"
+#line 354 "./glsl/glsl_lexer.ll"
return SAMPLER2DSHADOW;
YY_BREAK
case 83:
YY_RULE_SETUP
-#line 343 "./glsl/glsl_lexer.ll"
+#line 355 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, SAMPLERCUBESHADOW);
YY_BREAK
case 84:
YY_RULE_SETUP
-#line 344 "./glsl/glsl_lexer.ll"
+#line 356 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 0, SAMPLER1DARRAYSHADOW);
YY_BREAK
case 85:
YY_RULE_SETUP
-#line 345 "./glsl/glsl_lexer.ll"
+#line 357 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, SAMPLER2DARRAYSHADOW);
YY_BREAK
case 86:
YY_RULE_SETUP
-#line 346 "./glsl/glsl_lexer.ll"
+#line 358 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 0, ISAMPLER1D);
YY_BREAK
case 87:
YY_RULE_SETUP
-#line 347 "./glsl/glsl_lexer.ll"
+#line 359 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, ISAMPLER2D);
YY_BREAK
case 88:
YY_RULE_SETUP
-#line 348 "./glsl/glsl_lexer.ll"
+#line 360 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, ISAMPLER3D);
YY_BREAK
case 89:
YY_RULE_SETUP
-#line 349 "./glsl/glsl_lexer.ll"
+#line 361 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, ISAMPLERCUBE);
YY_BREAK
case 90:
YY_RULE_SETUP
-#line 350 "./glsl/glsl_lexer.ll"
+#line 362 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 0, ISAMPLER1DARRAY);
YY_BREAK
case 91:
YY_RULE_SETUP
-#line 351 "./glsl/glsl_lexer.ll"
+#line 363 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, ISAMPLER2DARRAY);
YY_BREAK
case 92:
YY_RULE_SETUP
-#line 352 "./glsl/glsl_lexer.ll"
+#line 364 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 0, USAMPLER1D);
YY_BREAK
case 93:
YY_RULE_SETUP
-#line 353 "./glsl/glsl_lexer.ll"
+#line 365 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, USAMPLER2D);
YY_BREAK
case 94:
YY_RULE_SETUP
-#line 354 "./glsl/glsl_lexer.ll"
+#line 366 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, USAMPLER3D);
YY_BREAK
case 95:
YY_RULE_SETUP
-#line 355 "./glsl/glsl_lexer.ll"
+#line 367 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, USAMPLERCUBE);
YY_BREAK
case 96:
YY_RULE_SETUP
-#line 356 "./glsl/glsl_lexer.ll"
+#line 368 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 0, USAMPLER1DARRAY);
YY_BREAK
case 97:
YY_RULE_SETUP
-#line 357 "./glsl/glsl_lexer.ll"
+#line 369 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, USAMPLER2DARRAY);
YY_BREAK
/* additional keywords in ARB_texture_multisample, included in GLSL 1.50 */
@@ -2217,58 +2229,58 @@
/* [iu]sampler2DMS are defined in GLSL ES 3.10 */
case 98:
YY_RULE_SETUP
-#line 362 "./glsl/glsl_lexer.ll"
+#line 374 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS);
YY_BREAK
case 99:
YY_RULE_SETUP
-#line 363 "./glsl/glsl_lexer.ll"
+#line 375 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS);
YY_BREAK
case 100:
YY_RULE_SETUP
-#line 364 "./glsl/glsl_lexer.ll"
+#line 376 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS);
YY_BREAK
case 101:
YY_RULE_SETUP
-#line 365 "./glsl/glsl_lexer.ll"
+#line 377 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, SAMPLER2DMSARRAY);
YY_BREAK
case 102:
YY_RULE_SETUP
-#line 366 "./glsl/glsl_lexer.ll"
+#line 378 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, ISAMPLER2DMSARRAY);
YY_BREAK
case 103:
YY_RULE_SETUP
-#line 367 "./glsl/glsl_lexer.ll"
+#line 379 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY);
YY_BREAK
/* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */
case 104:
YY_RULE_SETUP
-#line 370 "./glsl/glsl_lexer.ll"
+#line 382 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(400, 310, 400, 320, yyextra->ARB_texture_cube_map_array_enable || yyextra->OES_texture_cube_map_array_enable || yyextra->EXT_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
YY_BREAK
case 105:
YY_RULE_SETUP
-#line 371 "./glsl/glsl_lexer.ll"
+#line 383 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(400, 310, 400, 320, yyextra->ARB_texture_cube_map_array_enable || yyextra->OES_texture_cube_map_array_enable || yyextra->EXT_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
YY_BREAK
case 106:
YY_RULE_SETUP
-#line 372 "./glsl/glsl_lexer.ll"
+#line 384 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(400, 310, 400, 320, yyextra->ARB_texture_cube_map_array_enable || yyextra->OES_texture_cube_map_array_enable || yyextra->EXT_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
YY_BREAK
case 107:
YY_RULE_SETUP
-#line 373 "./glsl/glsl_lexer.ll"
+#line 385 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(400, 310, 400, 320, yyextra->ARB_texture_cube_map_array_enable || yyextra->OES_texture_cube_map_array_enable || yyextra->EXT_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);
YY_BREAK
case 108:
YY_RULE_SETUP
-#line 375 "./glsl/glsl_lexer.ll"
+#line 387 "./glsl/glsl_lexer.ll"
{
if (yyextra->OES_EGL_image_external_enable)
return SAMPLEREXTERNALOES;
@@ -2279,243 +2291,243 @@
/* keywords available with ARB_gpu_shader5 */
case 109:
YY_RULE_SETUP
-#line 383 "./glsl/glsl_lexer.ll"
+#line 395 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(400, 310, 400, 320, yyextra->ARB_gpu_shader5_enable || yyextra->EXT_gpu_shader5_enable || yyextra->OES_gpu_shader5_enable, PRECISE);
YY_BREAK
/* keywords available with ARB_shader_image_load_store */
case 110:
YY_RULE_SETUP
-#line 386 "./glsl/glsl_lexer.ll"
+#line 398 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1D);
YY_BREAK
case 111:
YY_RULE_SETUP
-#line 387 "./glsl/glsl_lexer.ll"
+#line 399 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2D);
YY_BREAK
case 112:
YY_RULE_SETUP
-#line 388 "./glsl/glsl_lexer.ll"
+#line 400 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE3D);
YY_BREAK
case 113:
YY_RULE_SETUP
-#line 389 "./glsl/glsl_lexer.ll"
+#line 401 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DRECT);
YY_BREAK
case 114:
YY_RULE_SETUP
-#line 390 "./glsl/glsl_lexer.ll"
+#line 402 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE);
YY_BREAK
case 115:
YY_RULE_SETUP
-#line 391 "./glsl/glsl_lexer.ll"
+#line 403 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, IMAGEBUFFER);
YY_BREAK
case 116:
YY_RULE_SETUP
-#line 392 "./glsl/glsl_lexer.ll"
+#line 404 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1DARRAY);
YY_BREAK
case 117:
YY_RULE_SETUP
-#line 393 "./glsl/glsl_lexer.ll"
+#line 405 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY);
YY_BREAK
case 118:
YY_RULE_SETUP
-#line 394 "./glsl/glsl_lexer.ll"
+#line 406 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->OES_texture_cube_map_array_enable || yyextra->EXT_texture_cube_map_array_enable, IMAGECUBEARRAY);
YY_BREAK
case 119:
YY_RULE_SETUP
-#line 395 "./glsl/glsl_lexer.ll"
+#line 407 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMS);
YY_BREAK
case 120:
YY_RULE_SETUP
-#line 396 "./glsl/glsl_lexer.ll"
+#line 408 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMSARRAY);
YY_BREAK
case 121:
YY_RULE_SETUP
-#line 397 "./glsl/glsl_lexer.ll"
+#line 409 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1D);
YY_BREAK
case 122:
YY_RULE_SETUP
-#line 398 "./glsl/glsl_lexer.ll"
+#line 410 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2D);
YY_BREAK
case 123:
YY_RULE_SETUP
-#line 399 "./glsl/glsl_lexer.ll"
+#line 411 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D);
YY_BREAK
case 124:
YY_RULE_SETUP
-#line 400 "./glsl/glsl_lexer.ll"
+#line 412 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DRECT);
YY_BREAK
case 125:
YY_RULE_SETUP
-#line 401 "./glsl/glsl_lexer.ll"
+#line 413 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE);
YY_BREAK
case 126:
YY_RULE_SETUP
-#line 402 "./glsl/glsl_lexer.ll"
+#line 414 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, IIMAGEBUFFER);
YY_BREAK
case 127:
YY_RULE_SETUP
-#line 403 "./glsl/glsl_lexer.ll"
+#line 415 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1DARRAY);
YY_BREAK
case 128:
YY_RULE_SETUP
-#line 404 "./glsl/glsl_lexer.ll"
+#line 416 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY);
YY_BREAK
case 129:
YY_RULE_SETUP
-#line 405 "./glsl/glsl_lexer.ll"
+#line 417 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->OES_texture_cube_map_array_enable || yyextra->EXT_texture_cube_map_array_enable, IIMAGECUBEARRAY);
YY_BREAK
case 130:
YY_RULE_SETUP
-#line 406 "./glsl/glsl_lexer.ll"
+#line 418 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMS);
YY_BREAK
case 131:
YY_RULE_SETUP
-#line 407 "./glsl/glsl_lexer.ll"
+#line 419 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMSARRAY);
YY_BREAK
case 132:
YY_RULE_SETUP
-#line 408 "./glsl/glsl_lexer.ll"
+#line 420 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1D);
YY_BREAK
case 133:
YY_RULE_SETUP
-#line 409 "./glsl/glsl_lexer.ll"
+#line 421 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2D);
YY_BREAK
case 134:
YY_RULE_SETUP
-#line 410 "./glsl/glsl_lexer.ll"
+#line 422 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D);
YY_BREAK
case 135:
YY_RULE_SETUP
-#line 411 "./glsl/glsl_lexer.ll"
+#line 423 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DRECT);
YY_BREAK
case 136:
YY_RULE_SETUP
-#line 412 "./glsl/glsl_lexer.ll"
+#line 424 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE);
YY_BREAK
case 137:
YY_RULE_SETUP
-#line 413 "./glsl/glsl_lexer.ll"
+#line 425 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, UIMAGEBUFFER);
YY_BREAK
case 138:
YY_RULE_SETUP
-#line 414 "./glsl/glsl_lexer.ll"
+#line 426 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1DARRAY);
YY_BREAK
case 139:
YY_RULE_SETUP
-#line 415 "./glsl/glsl_lexer.ll"
+#line 427 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY);
YY_BREAK
case 140:
YY_RULE_SETUP
-#line 416 "./glsl/glsl_lexer.ll"
+#line 428 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 320, yyextra->ARB_shader_image_load_store_enable || yyextra->OES_texture_cube_map_array_enable || yyextra->EXT_texture_cube_map_array_enable, UIMAGECUBEARRAY);
YY_BREAK
case 141:
YY_RULE_SETUP
-#line 417 "./glsl/glsl_lexer.ll"
+#line 429 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMS);
YY_BREAK
case 142:
YY_RULE_SETUP
-#line 418 "./glsl/glsl_lexer.ll"
+#line 430 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMSARRAY);
YY_BREAK
case 143:
YY_RULE_SETUP
-#line 419 "./glsl/glsl_lexer.ll"
+#line 431 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 0, 0, IMAGE1DSHADOW);
YY_BREAK
case 144:
YY_RULE_SETUP
-#line 420 "./glsl/glsl_lexer.ll"
+#line 432 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 0, 0, IMAGE2DSHADOW);
YY_BREAK
case 145:
YY_RULE_SETUP
-#line 421 "./glsl/glsl_lexer.ll"
+#line 433 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW);
YY_BREAK
case 146:
YY_RULE_SETUP
-#line 422 "./glsl/glsl_lexer.ll"
+#line 434 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW);
YY_BREAK
case 147:
YY_RULE_SETUP
-#line 424 "./glsl/glsl_lexer.ll"
+#line 436 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, COHERENT);
YY_BREAK
case 148:
YY_RULE_SETUP
-#line 425 "./glsl/glsl_lexer.ll"
+#line 437 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, VOLATILE);
YY_BREAK
case 149:
YY_RULE_SETUP
-#line 426 "./glsl/glsl_lexer.ll"
+#line 438 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, RESTRICT);
YY_BREAK
case 150:
YY_RULE_SETUP
-#line 427 "./glsl/glsl_lexer.ll"
+#line 439 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, READONLY);
YY_BREAK
case 151:
YY_RULE_SETUP
-#line 428 "./glsl/glsl_lexer.ll"
+#line 440 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, WRITEONLY);
YY_BREAK
case 152:
YY_RULE_SETUP
-#line 430 "./glsl/glsl_lexer.ll"
+#line 442 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT);
YY_BREAK
case 153:
YY_RULE_SETUP
-#line 432 "./glsl/glsl_lexer.ll"
+#line 444 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(430, 310, 430, 310, yyextra->ARB_compute_shader_enable, SHARED);
YY_BREAK
case 154:
YY_RULE_SETUP
-#line 434 "./glsl/glsl_lexer.ll"
+#line 446 "./glsl/glsl_lexer.ll"
return STRUCT;
YY_BREAK
case 155:
YY_RULE_SETUP
-#line 435 "./glsl/glsl_lexer.ll"
+#line 447 "./glsl/glsl_lexer.ll"
return VOID_TOK;
YY_BREAK
case 156:
YY_RULE_SETUP
-#line 437 "./glsl/glsl_lexer.ll"
+#line 449 "./glsl/glsl_lexer.ll"
{
if ((yyextra->is_version(140, 300))
|| yyextra->AMD_conservative_depth_enable
@@ -2530,147 +2542,153 @@
|| yyextra->ARB_tessellation_shader_enable) {
return LAYOUT_TOK;
} else {
- void *mem_ctx = yyextra->linalloc;
- yylval->identifier = linear_strdup(mem_ctx, yytext);
+ /* We're not doing linear_strdup here, to avoid an implicit call
+ * on strlen() for the length of the string, as this is already
+ * found by flex and stored in yyleng
+ */
+ void *mem_ctx = yyextra->linalloc;
+ char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1);
+ memcpy(id, yytext, yyleng + 1);
+ yylval->identifier = id;
return classify_identifier(yyextra, yytext);
}
}
YY_BREAK
case 157:
YY_RULE_SETUP
-#line 457 "./glsl/glsl_lexer.ll"
+#line 475 "./glsl/glsl_lexer.ll"
return INC_OP;
YY_BREAK
case 158:
YY_RULE_SETUP
-#line 458 "./glsl/glsl_lexer.ll"
+#line 476 "./glsl/glsl_lexer.ll"
return DEC_OP;
YY_BREAK
case 159:
YY_RULE_SETUP
-#line 459 "./glsl/glsl_lexer.ll"
+#line 477 "./glsl/glsl_lexer.ll"
return LE_OP;
YY_BREAK
case 160:
YY_RULE_SETUP
-#line 460 "./glsl/glsl_lexer.ll"
+#line 478 "./glsl/glsl_lexer.ll"
return GE_OP;
YY_BREAK
case 161:
YY_RULE_SETUP
-#line 461 "./glsl/glsl_lexer.ll"
+#line 479 "./glsl/glsl_lexer.ll"
return EQ_OP;
YY_BREAK
case 162:
YY_RULE_SETUP
-#line 462 "./glsl/glsl_lexer.ll"
+#line 480 "./glsl/glsl_lexer.ll"
return NE_OP;
YY_BREAK
case 163:
YY_RULE_SETUP
-#line 463 "./glsl/glsl_lexer.ll"
+#line 481 "./glsl/glsl_lexer.ll"
return AND_OP;
YY_BREAK
case 164:
YY_RULE_SETUP
-#line 464 "./glsl/glsl_lexer.ll"
+#line 482 "./glsl/glsl_lexer.ll"
return OR_OP;
YY_BREAK
case 165:
YY_RULE_SETUP
-#line 465 "./glsl/glsl_lexer.ll"
+#line 483 "./glsl/glsl_lexer.ll"
return XOR_OP;
YY_BREAK
case 166:
YY_RULE_SETUP
-#line 466 "./glsl/glsl_lexer.ll"
+#line 484 "./glsl/glsl_lexer.ll"
return LEFT_OP;
YY_BREAK
case 167:
YY_RULE_SETUP
-#line 467 "./glsl/glsl_lexer.ll"
+#line 485 "./glsl/glsl_lexer.ll"
return RIGHT_OP;
YY_BREAK
case 168:
YY_RULE_SETUP
-#line 469 "./glsl/glsl_lexer.ll"
+#line 487 "./glsl/glsl_lexer.ll"
return MUL_ASSIGN;
YY_BREAK
case 169:
YY_RULE_SETUP
-#line 470 "./glsl/glsl_lexer.ll"
+#line 488 "./glsl/glsl_lexer.ll"
return DIV_ASSIGN;
YY_BREAK
case 170:
YY_RULE_SETUP
-#line 471 "./glsl/glsl_lexer.ll"
+#line 489 "./glsl/glsl_lexer.ll"
return ADD_ASSIGN;
YY_BREAK
case 171:
YY_RULE_SETUP
-#line 472 "./glsl/glsl_lexer.ll"
+#line 490 "./glsl/glsl_lexer.ll"
return MOD_ASSIGN;
YY_BREAK
case 172:
YY_RULE_SETUP
-#line 473 "./glsl/glsl_lexer.ll"
+#line 491 "./glsl/glsl_lexer.ll"
return LEFT_ASSIGN;
YY_BREAK
case 173:
YY_RULE_SETUP
-#line 474 "./glsl/glsl_lexer.ll"
+#line 492 "./glsl/glsl_lexer.ll"
return RIGHT_ASSIGN;
YY_BREAK
case 174:
YY_RULE_SETUP
-#line 475 "./glsl/glsl_lexer.ll"
+#line 493 "./glsl/glsl_lexer.ll"
return AND_ASSIGN;
YY_BREAK
case 175:
YY_RULE_SETUP
-#line 476 "./glsl/glsl_lexer.ll"
+#line 494 "./glsl/glsl_lexer.ll"
return XOR_ASSIGN;
YY_BREAK
case 176:
YY_RULE_SETUP
-#line 477 "./glsl/glsl_lexer.ll"
+#line 495 "./glsl/glsl_lexer.ll"
return OR_ASSIGN;
YY_BREAK
case 177:
YY_RULE_SETUP
-#line 478 "./glsl/glsl_lexer.ll"
+#line 496 "./glsl/glsl_lexer.ll"
return SUB_ASSIGN;
YY_BREAK
case 178:
YY_RULE_SETUP
-#line 480 "./glsl/glsl_lexer.ll"
+#line 498 "./glsl/glsl_lexer.ll"
{
return LITERAL_INTEGER(10);
}
YY_BREAK
case 179:
YY_RULE_SETUP
-#line 483 "./glsl/glsl_lexer.ll"
+#line 501 "./glsl/glsl_lexer.ll"
{
return LITERAL_INTEGER(16);
}
YY_BREAK
case 180:
YY_RULE_SETUP
-#line 486 "./glsl/glsl_lexer.ll"
+#line 504 "./glsl/glsl_lexer.ll"
{
return LITERAL_INTEGER(8);
}
YY_BREAK
case 181:
-#line 491 "./glsl/glsl_lexer.ll"
+#line 509 "./glsl/glsl_lexer.ll"
case 182:
-#line 492 "./glsl/glsl_lexer.ll"
+#line 510 "./glsl/glsl_lexer.ll"
case 183:
-#line 493 "./glsl/glsl_lexer.ll"
+#line 511 "./glsl/glsl_lexer.ll"
case 184:
YY_RULE_SETUP
-#line 493 "./glsl/glsl_lexer.ll"
+#line 511 "./glsl/glsl_lexer.ll"
{
struct _mesa_glsl_parse_state *state = yyextra;
char suffix = yytext[strlen(yytext) - 1];
@@ -2684,14 +2702,14 @@
}
YY_BREAK
case 185:
-#line 506 "./glsl/glsl_lexer.ll"
+#line 524 "./glsl/glsl_lexer.ll"
case 186:
-#line 507 "./glsl/glsl_lexer.ll"
+#line 525 "./glsl/glsl_lexer.ll"
case 187:
-#line 508 "./glsl/glsl_lexer.ll"
+#line 526 "./glsl/glsl_lexer.ll"
case 188:
YY_RULE_SETUP
-#line 508 "./glsl/glsl_lexer.ll"
+#line 526 "./glsl/glsl_lexer.ll"
{
if (!yyextra->is_version(400, 0) &&
!yyextra->ARB_gpu_shader_fp64_enable)
@@ -2702,7 +2720,7 @@
YY_BREAK
case 189:
YY_RULE_SETUP
-#line 516 "./glsl/glsl_lexer.ll"
+#line 534 "./glsl/glsl_lexer.ll"
{
yylval->n = 1;
return BOOLCONSTANT;
@@ -2710,7 +2728,7 @@
YY_BREAK
case 190:
YY_RULE_SETUP
-#line 520 "./glsl/glsl_lexer.ll"
+#line 538 "./glsl/glsl_lexer.ll"
{
yylval->n = 0;
return BOOLCONSTANT;
@@ -2719,448 +2737,454 @@
/* Reserved words in GLSL 1.10. */
case 191:
YY_RULE_SETUP
-#line 527 "./glsl/glsl_lexer.ll"
+#line 545 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, ASM);
YY_BREAK
case 192:
YY_RULE_SETUP
-#line 528 "./glsl/glsl_lexer.ll"
+#line 546 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, CLASS);
YY_BREAK
case 193:
YY_RULE_SETUP
-#line 529 "./glsl/glsl_lexer.ll"
+#line 547 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, UNION);
YY_BREAK
case 194:
YY_RULE_SETUP
-#line 530 "./glsl/glsl_lexer.ll"
+#line 548 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, ENUM);
YY_BREAK
case 195:
YY_RULE_SETUP
-#line 531 "./glsl/glsl_lexer.ll"
+#line 549 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, TYPEDEF);
YY_BREAK
case 196:
YY_RULE_SETUP
-#line 532 "./glsl/glsl_lexer.ll"
+#line 550 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, TEMPLATE);
YY_BREAK
case 197:
YY_RULE_SETUP
-#line 533 "./glsl/glsl_lexer.ll"
+#line 551 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, THIS);
YY_BREAK
case 198:
YY_RULE_SETUP
-#line 534 "./glsl/glsl_lexer.ll"
+#line 552 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 140, 300, yyextra->ARB_uniform_buffer_object_enable, PACKED_TOK);
YY_BREAK
case 199:
YY_RULE_SETUP
-#line 535 "./glsl/glsl_lexer.ll"
+#line 553 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, GOTO);
YY_BREAK
case 200:
YY_RULE_SETUP
-#line 536 "./glsl/glsl_lexer.ll"
+#line 554 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 130, 300, SWITCH);
YY_BREAK
case 201:
YY_RULE_SETUP
-#line 537 "./glsl/glsl_lexer.ll"
+#line 555 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 130, 300, DEFAULT);
YY_BREAK
case 202:
YY_RULE_SETUP
-#line 538 "./glsl/glsl_lexer.ll"
+#line 556 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, INLINE_TOK);
YY_BREAK
case 203:
YY_RULE_SETUP
-#line 539 "./glsl/glsl_lexer.ll"
+#line 557 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, NOINLINE);
YY_BREAK
case 204:
YY_RULE_SETUP
-#line 540 "./glsl/glsl_lexer.ll"
+#line 558 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, PUBLIC_TOK);
YY_BREAK
case 205:
YY_RULE_SETUP
-#line 541 "./glsl/glsl_lexer.ll"
+#line 559 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, STATIC);
YY_BREAK
case 206:
YY_RULE_SETUP
-#line 542 "./glsl/glsl_lexer.ll"
+#line 560 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, EXTERN);
YY_BREAK
case 207:
YY_RULE_SETUP
-#line 543 "./glsl/glsl_lexer.ll"
+#line 561 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, EXTERNAL);
YY_BREAK
case 208:
YY_RULE_SETUP
-#line 544 "./glsl/glsl_lexer.ll"
+#line 562 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, INTERFACE);
YY_BREAK
case 209:
YY_RULE_SETUP
-#line 545 "./glsl/glsl_lexer.ll"
+#line 563 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, LONG_TOK);
YY_BREAK
case 210:
YY_RULE_SETUP
-#line 546 "./glsl/glsl_lexer.ll"
+#line 564 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, SHORT_TOK);
YY_BREAK
case 211:
YY_RULE_SETUP
-#line 547 "./glsl/glsl_lexer.ll"
+#line 565 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DOUBLE_TOK);
YY_BREAK
case 212:
YY_RULE_SETUP
-#line 548 "./glsl/glsl_lexer.ll"
+#line 566 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, HALF);
YY_BREAK
case 213:
YY_RULE_SETUP
-#line 549 "./glsl/glsl_lexer.ll"
+#line 567 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, FIXED_TOK);
YY_BREAK
case 214:
YY_RULE_SETUP
-#line 550 "./glsl/glsl_lexer.ll"
+#line 568 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, UNSIGNED);
YY_BREAK
case 215:
YY_RULE_SETUP
-#line 551 "./glsl/glsl_lexer.ll"
+#line 569 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, INPUT_TOK);
YY_BREAK
case 216:
YY_RULE_SETUP
-#line 552 "./glsl/glsl_lexer.ll"
+#line 570 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, OUTPUT);
YY_BREAK
case 217:
YY_RULE_SETUP
-#line 553 "./glsl/glsl_lexer.ll"
+#line 571 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, HVEC2);
YY_BREAK
case 218:
YY_RULE_SETUP
-#line 554 "./glsl/glsl_lexer.ll"
+#line 572 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, HVEC3);
YY_BREAK
case 219:
YY_RULE_SETUP
-#line 555 "./glsl/glsl_lexer.ll"
+#line 573 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, HVEC4);
YY_BREAK
case 220:
YY_RULE_SETUP
-#line 556 "./glsl/glsl_lexer.ll"
+#line 574 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC2);
YY_BREAK
case 221:
YY_RULE_SETUP
-#line 557 "./glsl/glsl_lexer.ll"
+#line 575 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC3);
YY_BREAK
case 222:
YY_RULE_SETUP
-#line 558 "./glsl/glsl_lexer.ll"
+#line 576 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC4);
YY_BREAK
case 223:
YY_RULE_SETUP
-#line 559 "./glsl/glsl_lexer.ll"
+#line 577 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2);
YY_BREAK
case 224:
YY_RULE_SETUP
-#line 560 "./glsl/glsl_lexer.ll"
+#line 578 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3);
YY_BREAK
case 225:
YY_RULE_SETUP
-#line 561 "./glsl/glsl_lexer.ll"
+#line 579 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4);
YY_BREAK
case 226:
YY_RULE_SETUP
-#line 562 "./glsl/glsl_lexer.ll"
+#line 580 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2);
YY_BREAK
case 227:
YY_RULE_SETUP
-#line 563 "./glsl/glsl_lexer.ll"
+#line 581 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X3);
YY_BREAK
case 228:
YY_RULE_SETUP
-#line 564 "./glsl/glsl_lexer.ll"
+#line 582 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X4);
YY_BREAK
case 229:
YY_RULE_SETUP
-#line 565 "./glsl/glsl_lexer.ll"
+#line 583 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X2);
YY_BREAK
case 230:
YY_RULE_SETUP
-#line 566 "./glsl/glsl_lexer.ll"
+#line 584 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3);
YY_BREAK
case 231:
YY_RULE_SETUP
-#line 567 "./glsl/glsl_lexer.ll"
+#line 585 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X4);
YY_BREAK
case 232:
YY_RULE_SETUP
-#line 568 "./glsl/glsl_lexer.ll"
+#line 586 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X2);
YY_BREAK
case 233:
YY_RULE_SETUP
-#line 569 "./glsl/glsl_lexer.ll"
+#line 587 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X3);
YY_BREAK
case 234:
YY_RULE_SETUP
-#line 570 "./glsl/glsl_lexer.ll"
+#line 588 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4);
YY_BREAK
case 235:
YY_RULE_SETUP
-#line 571 "./glsl/glsl_lexer.ll"
+#line 589 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, FVEC2);
YY_BREAK
case 236:
YY_RULE_SETUP
-#line 572 "./glsl/glsl_lexer.ll"
+#line 590 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, FVEC3);
YY_BREAK
case 237:
YY_RULE_SETUP
-#line 573 "./glsl/glsl_lexer.ll"
+#line 591 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, FVEC4);
YY_BREAK
case 238:
YY_RULE_SETUP
-#line 574 "./glsl/glsl_lexer.ll"
+#line 592 "./glsl/glsl_lexer.ll"
DEPRECATED_ES_KEYWORD(SAMPLER2DRECT);
YY_BREAK
case 239:
YY_RULE_SETUP
-#line 575 "./glsl/glsl_lexer.ll"
+#line 593 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, SAMPLER3DRECT);
YY_BREAK
case 240:
YY_RULE_SETUP
-#line 576 "./glsl/glsl_lexer.ll"
+#line 594 "./glsl/glsl_lexer.ll"
DEPRECATED_ES_KEYWORD(SAMPLER2DRECTSHADOW);
YY_BREAK
case 241:
YY_RULE_SETUP
-#line 577 "./glsl/glsl_lexer.ll"
+#line 595 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, SIZEOF);
YY_BREAK
case 242:
YY_RULE_SETUP
-#line 578 "./glsl/glsl_lexer.ll"
+#line 596 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, CAST);
YY_BREAK
case 243:
YY_RULE_SETUP
-#line 579 "./glsl/glsl_lexer.ll"
+#line 597 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, NAMESPACE);
YY_BREAK
case 244:
YY_RULE_SETUP
-#line 580 "./glsl/glsl_lexer.ll"
+#line 598 "./glsl/glsl_lexer.ll"
KEYWORD(110, 100, 0, 0, USING);
YY_BREAK
/* Additional reserved words in GLSL 1.20. */
case 245:
YY_RULE_SETUP
-#line 583 "./glsl/glsl_lexer.ll"
+#line 601 "./glsl/glsl_lexer.ll"
KEYWORD(120, 100, 130, 100, LOWP);
YY_BREAK
case 246:
YY_RULE_SETUP
-#line 584 "./glsl/glsl_lexer.ll"
+#line 602 "./glsl/glsl_lexer.ll"
KEYWORD(120, 100, 130, 100, MEDIUMP);
YY_BREAK
case 247:
YY_RULE_SETUP
-#line 585 "./glsl/glsl_lexer.ll"
+#line 603 "./glsl/glsl_lexer.ll"
KEYWORD(120, 100, 130, 100, HIGHP);
YY_BREAK
case 248:
YY_RULE_SETUP
-#line 586 "./glsl/glsl_lexer.ll"
+#line 604 "./glsl/glsl_lexer.ll"
KEYWORD(120, 100, 130, 100, PRECISION);
YY_BREAK
/* Additional reserved words in GLSL 1.30. */
case 249:
YY_RULE_SETUP
-#line 589 "./glsl/glsl_lexer.ll"
+#line 607 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 130, 300, CASE);
YY_BREAK
case 250:
YY_RULE_SETUP
-#line 590 "./glsl/glsl_lexer.ll"
+#line 608 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 0, 0, COMMON);
YY_BREAK
case 251:
YY_RULE_SETUP
-#line 591 "./glsl/glsl_lexer.ll"
+#line 609 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 0, 0, PARTITION);
YY_BREAK
case 252:
YY_RULE_SETUP
-#line 592 "./glsl/glsl_lexer.ll"
+#line 610 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 0, 0, ACTIVE);
YY_BREAK
case 253:
YY_RULE_SETUP
-#line 593 "./glsl/glsl_lexer.ll"
+#line 611 "./glsl/glsl_lexer.ll"
KEYWORD(130, 100, 0, 0, SUPERP);
YY_BREAK
case 254:
YY_RULE_SETUP
-#line 594 "./glsl/glsl_lexer.ll"
+#line 612 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 300, 140, 320, yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, SAMPLERBUFFER);
YY_BREAK
case 255:
YY_RULE_SETUP
-#line 595 "./glsl/glsl_lexer.ll"
+#line 613 "./glsl/glsl_lexer.ll"
KEYWORD(130, 300, 0, 0, FILTER);
YY_BREAK
case 256:
YY_RULE_SETUP
-#line 596 "./glsl/glsl_lexer.ll"
+#line 614 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(130, 0, 140, 0, yyextra->ARB_uniform_buffer_object_enable && !yyextra->es_shader, ROW_MAJOR);
YY_BREAK
/* Additional reserved words in GLSL 1.40 */
case 257:
YY_RULE_SETUP
-#line 599 "./glsl/glsl_lexer.ll"
+#line 617 "./glsl/glsl_lexer.ll"
KEYWORD(140, 300, 140, 0, ISAMPLER2DRECT);
YY_BREAK
case 258:
YY_RULE_SETUP
-#line 600 "./glsl/glsl_lexer.ll"
+#line 618 "./glsl/glsl_lexer.ll"
KEYWORD(140, 300, 140, 0, USAMPLER2DRECT);
YY_BREAK
case 259:
YY_RULE_SETUP
-#line 601 "./glsl/glsl_lexer.ll"
+#line 619 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(140, 300, 140, 320, yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, ISAMPLERBUFFER);
YY_BREAK
case 260:
YY_RULE_SETUP
-#line 602 "./glsl/glsl_lexer.ll"
+#line 620 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(140, 300, 140, 320, yyextra->EXT_texture_buffer_enable || yyextra->OES_texture_buffer_enable, USAMPLERBUFFER);
YY_BREAK
/* Additional reserved words in GLSL ES 3.00 */
case 261:
YY_RULE_SETUP
-#line 605 "./glsl/glsl_lexer.ll"
+#line 623 "./glsl/glsl_lexer.ll"
KEYWORD(420, 300, 0, 0, RESOURCE);
YY_BREAK
case 262:
YY_RULE_SETUP
-#line 606 "./glsl/glsl_lexer.ll"
+#line 624 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(400, 300, 400, 320, yyextra->ARB_gpu_shader5_enable || yyextra->OES_shader_multisample_interpolation_enable, SAMPLE);
YY_BREAK
case 263:
YY_RULE_SETUP
-#line 607 "./glsl/glsl_lexer.ll"
+#line 625 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE);
YY_BREAK
/* Additional words for ARB_gpu_shader_int64 */
case 264:
YY_RULE_SETUP
-#line 610 "./glsl/glsl_lexer.ll"
+#line 628 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, INT64_TOK);
YY_BREAK
case 265:
YY_RULE_SETUP
-#line 611 "./glsl/glsl_lexer.ll"
+#line 629 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, I64VEC2);
YY_BREAK
case 266:
YY_RULE_SETUP
-#line 612 "./glsl/glsl_lexer.ll"
+#line 630 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, I64VEC3);
YY_BREAK
case 267:
YY_RULE_SETUP
-#line 613 "./glsl/glsl_lexer.ll"
+#line 631 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, I64VEC4);
YY_BREAK
case 268:
YY_RULE_SETUP
-#line 615 "./glsl/glsl_lexer.ll"
+#line 633 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, UINT64_TOK);
YY_BREAK
case 269:
YY_RULE_SETUP
-#line 616 "./glsl/glsl_lexer.ll"
+#line 634 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, U64VEC2);
YY_BREAK
case 270:
YY_RULE_SETUP
-#line 617 "./glsl/glsl_lexer.ll"
+#line 635 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, U64VEC3);
YY_BREAK
case 271:
YY_RULE_SETUP
-#line 618 "./glsl/glsl_lexer.ll"
+#line 636 "./glsl/glsl_lexer.ll"
KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, U64VEC4);
YY_BREAK
case 272:
YY_RULE_SETUP
-#line 620 "./glsl/glsl_lexer.ll"
+#line 638 "./glsl/glsl_lexer.ll"
{
struct _mesa_glsl_parse_state *state = yyextra;
void *ctx = state->linalloc;
- if (state->es_shader && strlen(yytext) > 1024) {
+ if (state->es_shader && yyleng > 1024) {
_mesa_glsl_error(yylloc, state,
"Identifier `%s' exceeds 1024 characters",
yytext);
} else {
- yylval->identifier = linear_strdup(ctx, yytext);
+ /* We're not doing linear_strdup here, to avoid an implicit call
+ * on strlen() for the length of the string, as this is already
+ * found by flex and stored in yyleng
+ */
+ char *id = (char *) linear_alloc_child(ctx, yyleng + 1);
+ memcpy(id, yytext, yyleng + 1);
+ yylval->identifier = id;
}
return classify_identifier(state, yytext);
}
YY_BREAK
case 273:
YY_RULE_SETUP
-#line 633 "./glsl/glsl_lexer.ll"
+#line 657 "./glsl/glsl_lexer.ll"
{ struct _mesa_glsl_parse_state *state = yyextra;
state->is_field = true;
return DOT_TOK; }
YY_BREAK
case 274:
YY_RULE_SETUP
-#line 637 "./glsl/glsl_lexer.ll"
+#line 661 "./glsl/glsl_lexer.ll"
{ return yytext[0]; }
YY_BREAK
case 275:
YY_RULE_SETUP
-#line 639 "./glsl/glsl_lexer.ll"
+#line 663 "./glsl/glsl_lexer.ll"
YY_FATAL_ERROR( "flex scanner jammed" );
YY_BREAK
-#line 3164 "glsl/glsl_lexer.cpp"
+#line 3188 "glsl/glsl_lexer.cpp"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(PP):
case YY_STATE_EOF(PRAGMA):
@@ -4321,7 +4345,7 @@
#define YYTABLES_NAME "yytables"
-#line 639 "./glsl/glsl_lexer.ll"
+#line 663 "./glsl/glsl_lexer.ll"
diff -Nru mesa-17.2.4/src/compiler/glsl/glsl_lexer.ll mesa-17.3.3/src/compiler/glsl/glsl_lexer.ll
--- mesa-17.2.4/src/compiler/glsl/glsl_lexer.ll 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glsl_lexer.ll 2018-01-18 21:30:28.000000000 +0000
@@ -34,6 +34,7 @@
#define YY_NO_UNISTD_H
#endif
+#define YY_NO_INPUT
#define YY_USER_ACTION \
do { \
yylloc->first_column = yycolumn + 1; \
@@ -80,8 +81,13 @@
"illegal use of reserved word `%s'", yytext); \
return ERROR_TOK; \
} else { \
- void *mem_ctx = yyextra->linalloc; \
- yylval->identifier = linear_strdup(mem_ctx, yytext); \
+ /* We're not doing linear_strdup here, to avoid an implicit \
+ * call on strlen() for the length of the string, as this is \
+ * already found by flex and stored in yyleng */ \
+ void *mem_ctx = yyextra->linalloc; \
+ char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1); \
+ memcpy(id, yytext, yyleng + 1); \
+ yylval->identifier = id; \
return classify_identifier(yyextra, yytext); \
} \
} while (0)
@@ -260,8 +266,14 @@
[ \t\r]* { }
: return COLON;
[_a-zA-Z][_a-zA-Z0-9]* {
- void *mem_ctx = yyextra->linalloc;
- yylval->identifier = linear_strdup(mem_ctx, yytext);
+ /* We're not doing linear_strdup here, to avoid an implicit call
+ * on strlen() for the length of the string, as this is already
+ * found by flex and stored in yyleng
+ */
+ void *mem_ctx = yyextra->linalloc;
+ char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1);
+ memcpy(id, yytext, yyleng + 1);
+ yylval->identifier = id;
return IDENTIFIER;
}
[1-9][0-9]* {
@@ -448,8 +460,14 @@
|| yyextra->ARB_tessellation_shader_enable) {
return LAYOUT_TOK;
} else {
- void *mem_ctx = yyextra->linalloc;
- yylval->identifier = linear_strdup(mem_ctx, yytext);
+ /* We're not doing linear_strdup here, to avoid an implicit call
+ * on strlen() for the length of the string, as this is already
+ * found by flex and stored in yyleng
+ */
+ void *mem_ctx = yyextra->linalloc;
+ char *id = (char *) linear_alloc_child(mem_ctx, yyleng + 1);
+ memcpy(id, yytext, yyleng + 1);
+ yylval->identifier = id;
return classify_identifier(yyextra, yytext);
}
}
@@ -620,12 +638,18 @@
[_a-zA-Z][_a-zA-Z0-9]* {
struct _mesa_glsl_parse_state *state = yyextra;
void *ctx = state->linalloc;
- if (state->es_shader && strlen(yytext) > 1024) {
+ if (state->es_shader && yyleng > 1024) {
_mesa_glsl_error(yylloc, state,
"Identifier `%s' exceeds 1024 characters",
yytext);
} else {
- yylval->identifier = linear_strdup(ctx, yytext);
+ /* We're not doing linear_strdup here, to avoid an implicit call
+ * on strlen() for the length of the string, as this is already
+ * found by flex and stored in yyleng
+ */
+ char *id = (char *) linear_alloc_child(ctx, yyleng + 1);
+ memcpy(id, yytext, yyleng + 1);
+ yylval->identifier = id;
}
return classify_identifier(state, yytext);
}
diff -Nru mesa-17.2.4/src/compiler/glsl/glsl_parser_extras.cpp mesa-17.3.3/src/compiler/glsl/glsl_parser_extras.cpp
--- mesa-17.2.4/src/compiler/glsl/glsl_parser_extras.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glsl_parser_extras.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -54,9 +54,9 @@
static const unsigned known_desktop_glsl_versions[] =
- { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 };
+ { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450, 460 };
static const unsigned known_desktop_gl_versions[] =
- { 20, 21, 30, 31, 32, 33, 40, 41, 42, 43, 44, 45 };
+ { 20, 21, 30, 31, 32, 33, 40, 41, 42, 43, 44, 45, 46 };
_mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
@@ -1079,7 +1079,7 @@
}
}
-void
+static void
_mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
{
if (q->is_subroutine_decl())
@@ -1863,11 +1863,53 @@
shader->bound_image = state->bound_image_specified;
}
+/* src can be NULL if only the symbols found in the exec_list should be
+ * copied
+ */
+void
+_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir,
+ struct glsl_symbol_table *src,
+ struct glsl_symbol_table *dest)
+{
+ foreach_in_list (ir_instruction, ir, shader_ir) {
+ switch (ir->ir_type) {
+ case ir_type_function:
+ dest->add_function((ir_function *) ir);
+ break;
+ case ir_type_variable: {
+ ir_variable *const var = (ir_variable *) ir;
+
+ if (var->data.mode != ir_var_temporary)
+ dest->add_variable(var);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (src != NULL) {
+ /* Explicitly copy the gl_PerVertex interface definitions because these
+ * are needed to check they are the same during the interstage link.
+ * They can’t necessarily be found via the exec_list because the members
+ * might not be referenced. The GL spec still requires that they match
+ * in that case.
+ */
+ const glsl_type *iface =
+ src->get_interface("gl_PerVertex", ir_var_shader_in);
+ if (iface)
+ dest->add_interface(iface->name, iface, ir_var_shader_in);
+
+ iface = src->get_interface("gl_PerVertex", ir_var_shader_out);
+ if (iface)
+ dest->add_interface(iface->name, iface, ir_var_shader_out);
+ }
+}
+
extern "C" {
static void
-assign_subroutine_indexes(struct gl_shader *sh,
- struct _mesa_glsl_parse_state *state)
+assign_subroutine_indexes(struct _mesa_glsl_parse_state *state)
{
int j, k;
int index = 0;
@@ -1937,6 +1979,7 @@
static void
opt_shader_and_create_symbol_table(struct gl_context *ctx,
+ struct glsl_symbol_table *source_symbols,
struct gl_shader *shader)
{
assert(shader->CompileStatus != compile_failure &&
@@ -1994,24 +2037,8 @@
* We don't have to worry about types or interface-types here because those
* are fly-weights that are looked up by glsl_type.
*/
- foreach_in_list (ir_instruction, ir, shader->ir) {
- switch (ir->ir_type) {
- case ir_type_function:
- shader->symbols->add_function((ir_function *) ir);
- break;
- case ir_type_variable: {
- ir_variable *const var = (ir_variable *) ir;
-
- if (var->data.mode != ir_var_temporary)
- shader->symbols->add_variable(var);
- break;
- }
- default:
- break;
- }
- }
-
- _mesa_glsl_initialize_derived_variables(ctx, shader);
+ _mesa_glsl_copy_symbols_from_table(shader->ir, source_symbols,
+ shader->symbols);
}
void
@@ -2048,7 +2075,9 @@
return;
if (shader->CompileStatus == compiled_no_opts) {
- opt_shader_and_create_symbol_table(ctx, shader);
+ opt_shader_and_create_symbol_table(ctx,
+ NULL, /* source_symbols */
+ shader);
shader->CompileStatus = compile_success;
return;
}
@@ -2105,11 +2134,11 @@
shader->IsES = state->es_shader;
if (!state->error && !shader->ir->is_empty()) {
- assign_subroutine_indexes(shader, state);
+ assign_subroutine_indexes(state);
lower_subroutine(shader->ir, state);
if (!ctx->Cache || force_recompile)
- opt_shader_and_create_symbol_table(ctx, shader);
+ opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
else {
reparent_ir(shader->ir, shader->ir);
shader->CompileStatus = compiled_no_opts;
@@ -2217,8 +2246,13 @@
if (options->MaxUnrollIterations) {
loop_state *ls = analyze_loop_variables(ir);
if (ls->loop_found) {
- OPT(set_loop_controls, ir, ls);
- OPT(unroll_loops, ir, ls, options);
+ bool loop_progress = unroll_loops(ir, ls, options);
+ while (loop_progress) {
+ loop_progress = false;
+ loop_progress |= do_constant_propagation(ir);
+ loop_progress |= do_if_simplification(ir);
+ }
+ progress |= loop_progress;
}
delete ls;
}
diff -Nru mesa-17.2.4/src/compiler/glsl/glsl_parser_extras.h mesa-17.3.3/src/compiler/glsl/glsl_parser_extras.h
--- mesa-17.2.4/src/compiler/glsl/glsl_parser_extras.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glsl_parser_extras.h 2018-01-18 21:30:28.000000000 +0000
@@ -354,7 +354,7 @@
unsigned ver;
uint8_t gl_ver;
bool es;
- } supported_versions[16];
+ } supported_versions[17];
bool es_shader;
bool compat_shader;
@@ -948,6 +948,11 @@
extern void _mesa_destroy_shader_compiler(void);
extern void _mesa_destroy_shader_compiler_caches(void);
+extern void
+_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir,
+ struct glsl_symbol_table *src,
+ struct glsl_symbol_table *dest);
+
#ifdef __cplusplus
}
#endif
diff -Nru mesa-17.2.4/src/compiler/glsl/glsl_to_nir.cpp mesa-17.3.3/src/compiler/glsl/glsl_to_nir.cpp
--- mesa-17.2.4/src/compiler/glsl/glsl_to_nir.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/glsl_to_nir.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -163,15 +163,22 @@
* two locations. For instance, if we have in the IR code a dvec3 attr0 in
* location 0 and vec4 attr1 in location 1, in NIR attr0 will use
* locations/slots 0 and 1, and attr1 will use location/slot 2 */
- if (shader->stage == MESA_SHADER_VERTEX)
+ if (shader->info.stage == MESA_SHADER_VERTEX)
nir_remap_attributes(shader);
shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
if (shader_prog->Label)
shader->info.label = ralloc_strdup(shader, shader_prog->Label);
+
+ /* Check for transform feedback varyings specified via the API */
shader->info.has_transform_feedback_varyings =
shader_prog->TransformFeedback.NumVarying > 0;
+ /* Check for transform feedback varyings specified in the Shader */
+ if (shader_prog->last_vert_prog)
+ shader->info.has_transform_feedback_varyings |=
+ shader_prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
+
return shader;
}
@@ -278,24 +285,13 @@
break;
case GLSL_TYPE_STRUCT:
- ret->elements = ralloc_array(mem_ctx, nir_constant *,
- ir->type->length);
- ret->num_elements = ir->type->length;
-
- i = 0;
- foreach_in_list(ir_constant, field, &ir->components) {
- ret->elements[i] = constant_copy(field, mem_ctx);
- i++;
- }
- break;
-
case GLSL_TYPE_ARRAY:
ret->elements = ralloc_array(mem_ctx, nir_constant *,
ir->type->length);
ret->num_elements = ir->type->length;
for (i = 0; i < ir->type->length; i++)
- ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
+ ret->elements[i] = constant_copy(ir->const_elements[i], mem_ctx);
break;
default:
@@ -319,6 +315,7 @@
var->type = ir->type;
var->name = ralloc_strdup(var, ir->name);
+ var->data.always_active_io = ir->data.always_active_io;
var->data.read_only = ir->data.read_only;
var->data.centroid = ir->data.centroid;
var->data.sample = ir->data.sample;
@@ -344,12 +341,12 @@
break;
case ir_var_shader_in:
- if (shader->stage == MESA_SHADER_FRAGMENT &&
+ if (shader->info.stage == MESA_SHADER_FRAGMENT &&
ir->data.location == VARYING_SLOT_FACE) {
/* For whatever reason, GLSL IR makes gl_FrontFacing an input */
var->data.location = SYSTEM_VALUE_FRONT_FACE;
var->data.mode = nir_var_system_value;
- } else if (shader->stage == MESA_SHADER_GEOMETRY &&
+ } else if (shader->info.stage == MESA_SHADER_GEOMETRY &&
ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
/* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
@@ -357,7 +354,7 @@
} else {
var->data.mode = nir_var_shader_in;
- if (shader->stage == MESA_SHADER_TESS_EVAL &&
+ if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
(ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
var->data.compact = ir->type->without_array()->is_scalar();
@@ -375,7 +372,7 @@
case ir_var_shader_out:
var->data.mode = nir_var_shader_out;
- if (shader->stage == MESA_SHADER_TESS_CTRL &&
+ if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
(ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
var->data.compact = ir->type->without_array()->is_scalar();
@@ -1485,11 +1482,11 @@
}
nir_ssa_def *srcs[4];
- for (unsigned i = 0; i < ir->get_num_operands(); i++)
+ for (unsigned i = 0; i < ir->num_operands; i++)
srcs[i] = evaluate_rvalue(ir->operands[i]);
glsl_base_type types[4];
- for (unsigned i = 0; i < ir->get_num_operands(); i++)
+ for (unsigned i = 0; i < ir->num_operands; i++)
if (supports_ints)
types[i] = ir->operands[i]->type->base_type;
else
@@ -2196,7 +2193,7 @@
{
ir->record->accept(this);
- int field_index = this->deref_tail->type->field_index(ir->field);
+ int field_index = ir->field_idx;
assert(field_index >= 0);
nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_builder_print_visitor.cpp mesa-17.3.3/src/compiler/glsl/ir_builder_print_visitor.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_builder_print_visitor.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_builder_print_visitor.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -117,7 +117,7 @@
case ir_type_expression: {
const ir_expression *expr = (ir_expression *) ir;
- for (unsigned i = 0; i < expr->get_num_operands(); i++) {
+ for (unsigned i = 0; i < expr->num_operands; i++) {
if (!is_simple_operand(expr->operands[i], depth - 1))
return false;
}
@@ -485,7 +485,7 @@
return visit_continue;
if (rhs_expr != NULL) {
- const unsigned num_op = rhs_expr->get_num_operands();
+ const unsigned num_op = rhs_expr->num_operands;
for (unsigned i = 0; i < num_op; i++) {
if (is_simple_operand(rhs_expr->operands[i]))
@@ -538,7 +538,7 @@
void
ir_builder_print_visitor::print_without_declaration(const ir_expression *ir)
{
- const unsigned num_op = ir->get_num_operands();
+ const unsigned num_op = ir->num_operands;
static const char *const arity[] = {
"", "unop", "binop", "triop", "quadop"
@@ -594,7 +594,7 @@
ir_visitor_status
ir_builder_print_visitor::visit_enter(ir_expression *ir)
{
- const unsigned num_op = ir->get_num_operands();
+ const unsigned num_op = ir->num_operands;
for (unsigned i = 0; i < num_op; i++) {
if (is_simple_operand(ir->operands[i]))
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_clone.cpp mesa-17.3.3/src/compiler/glsl/ir_clone.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_clone.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_clone.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -160,7 +160,7 @@
ir_rvalue *op[ARRAY_SIZE(this->operands)] = { NULL, };
unsigned int i;
- for (i = 0; i < get_num_operands(); i++) {
+ for (i = 0; i < num_operands; i++) {
op[i] = this->operands[i]->clone(mem_ctx, ht);
}
@@ -194,8 +194,10 @@
ir_dereference_record *
ir_dereference_record::clone(void *mem_ctx, struct hash_table *ht) const
{
+ const char *field_name =
+ this->record->type->fields.structure[this->field_idx].name;
return new(mem_ctx) ir_dereference_record(this->record->clone(mem_ctx, ht),
- this->field);
+ field_name);
}
ir_texture *
@@ -343,28 +345,14 @@
case GLSL_TYPE_IMAGE:
return new(mem_ctx) ir_constant(this->type, &this->value);
- case GLSL_TYPE_STRUCT: {
- ir_constant *c = new(mem_ctx) ir_constant;
-
- c->type = this->type;
- for (const exec_node *node = this->components.get_head_raw()
- ; !node->is_tail_sentinel()
- ; node = node->next) {
- ir_constant *const orig = (ir_constant *) node;
-
- c->components.push_tail(orig->clone(mem_ctx, NULL));
- }
-
- return c;
- }
-
+ case GLSL_TYPE_STRUCT:
case GLSL_TYPE_ARRAY: {
ir_constant *c = new(mem_ctx) ir_constant;
c->type = this->type;
- c->array_elements = ralloc_array(c, ir_constant *, this->type->length);
+ c->const_elements = ralloc_array(c, ir_constant *, this->type->length);
for (unsigned i = 0; i < this->type->length; i++) {
- c->array_elements[i] = this->array_elements[i]->clone(mem_ctx, NULL);
+ c->const_elements[i] = this->const_elements[i]->clone(mem_ctx, NULL);
}
return c;
}
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_constant_expression.cpp mesa-17.3.3/src/compiler/glsl/ir_constant_expression.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_constant_expression.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_constant_expression.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -485,7 +485,7 @@
*/
assert(suboffset == 0);
- store = substore->get_record_field(dr->field);
+ store = substore->get_record_field(dr->field_idx);
break;
}
@@ -509,7 +509,7 @@
ir_constant *
-ir_rvalue::constant_expression_value(struct hash_table *)
+ir_rvalue::constant_expression_value(void *, struct hash_table *)
{
assert(this->type->is_error());
return NULL;
@@ -628,8 +628,11 @@
}
ir_constant *
-ir_expression::constant_expression_value(struct hash_table *variable_context)
+ir_expression::constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context)
{
+ assert(mem_ctx);
+
if (this->type->is_error())
return NULL;
@@ -638,8 +641,10 @@
memset(&data, 0, sizeof(data));
- for (unsigned operand = 0; operand < this->get_num_operands(); operand++) {
- op[operand] = this->operands[operand]->constant_expression_value(variable_context);
+ for (unsigned operand = 0; operand < this->num_operands; operand++) {
+ op[operand] =
+ this->operands[operand]->constant_expression_value(mem_ctx,
+ variable_context);
if (!op[operand])
return NULL;
}
@@ -676,16 +681,14 @@
components = op[1]->type->components();
}
- void *ctx = ralloc_parent(this);
-
/* Handle array operations here, rather than below. */
if (op[0]->type->is_array()) {
assert(op[1] != NULL && op[1]->type->is_array());
switch (this->operation) {
case ir_binop_all_equal:
- return new(ctx) ir_constant(op[0]->has_value(op[1]));
+ return new(mem_ctx) ir_constant(op[0]->has_value(op[1]));
case ir_binop_any_nequal:
- return new(ctx) ir_constant(!op[0]->has_value(op[1]));
+ return new(mem_ctx) ir_constant(!op[0]->has_value(op[1]));
default:
break;
}
@@ -694,12 +697,12 @@
#include "ir_expression_operation_constant.h"
- return new(ctx) ir_constant(this->type, &data);
+ return new(mem_ctx) ir_constant(this->type, &data);
}
ir_constant *
-ir_texture::constant_expression_value(struct hash_table *)
+ir_texture::constant_expression_value(void *, struct hash_table *)
{
/* texture lookups aren't constant expressions */
return NULL;
@@ -707,9 +710,13 @@
ir_constant *
-ir_swizzle::constant_expression_value(struct hash_table *variable_context)
+ir_swizzle::constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context)
{
- ir_constant *v = this->val->constant_expression_value(variable_context);
+ assert(mem_ctx);
+
+ ir_constant *v = this->val->constant_expression_value(mem_ctx,
+ variable_context);
if (v != NULL) {
ir_constant_data data = { { 0 } };
@@ -731,17 +738,18 @@
}
}
- void *ctx = ralloc_parent(this);
- return new(ctx) ir_constant(this->type, &data);
+ return new(mem_ctx) ir_constant(this->type, &data);
}
return NULL;
}
ir_constant *
-ir_dereference_variable::constant_expression_value(struct hash_table *variable_context)
+ir_dereference_variable::constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context)
{
assert(var);
+ assert(mem_ctx);
/* Give priority to the context hashtable, if it exists */
if (variable_context) {
@@ -760,18 +768,20 @@
if (!var->constant_value)
return NULL;
- return var->constant_value->clone(ralloc_parent(var), NULL);
+ return var->constant_value->clone(mem_ctx, NULL);
}
ir_constant *
-ir_dereference_array::constant_expression_value(struct hash_table *variable_context)
+ir_dereference_array::constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context)
{
- ir_constant *array = this->array->constant_expression_value(variable_context);
- ir_constant *idx = this->array_index->constant_expression_value(variable_context);
+ assert(mem_ctx);
+
+ ir_constant *array = this->array->constant_expression_value(mem_ctx, variable_context);
+ ir_constant *idx = this->array_index->constant_expression_value(mem_ctx, variable_context);
if ((array != NULL) && (idx != NULL)) {
- void *ctx = ralloc_parent(this);
if (array->type->is_matrix()) {
/* Array access of a matrix results in a vector.
*/
@@ -811,14 +821,14 @@
break;
}
- return new(ctx) ir_constant(column_type, &data);
+ return new(mem_ctx) ir_constant(column_type, &data);
} else if (array->type->is_vector()) {
const unsigned component = idx->value.u[0];
- return new(ctx) ir_constant(array, component);
+ return new(mem_ctx) ir_constant(array, component);
} else {
const unsigned index = idx->value.u[0];
- return array->get_array_element(index)->clone(ctx, NULL);
+ return array->get_array_element(index)->clone(mem_ctx, NULL);
}
}
return NULL;
@@ -826,16 +836,19 @@
ir_constant *
-ir_dereference_record::constant_expression_value(struct hash_table *)
+ir_dereference_record::constant_expression_value(void *mem_ctx,
+ struct hash_table *)
{
- ir_constant *v = this->record->constant_expression_value();
+ assert(mem_ctx);
- return (v != NULL) ? v->get_record_field(this->field) : NULL;
+ ir_constant *v = this->record->constant_expression_value(mem_ctx);
+
+ return (v != NULL) ? v->get_record_field(this->field_idx) : NULL;
}
ir_constant *
-ir_assignment::constant_expression_value(struct hash_table *)
+ir_assignment::constant_expression_value(void *, struct hash_table *)
{
/* FINISHME: Handle CEs involving assignment (return RHS) */
return NULL;
@@ -843,23 +856,30 @@
ir_constant *
-ir_constant::constant_expression_value(struct hash_table *)
+ir_constant::constant_expression_value(void *, struct hash_table *)
{
return this;
}
ir_constant *
-ir_call::constant_expression_value(struct hash_table *variable_context)
+ir_call::constant_expression_value(void *mem_ctx, struct hash_table *variable_context)
{
- return this->callee->constant_expression_value(&this->actual_parameters, variable_context);
+ assert(mem_ctx);
+
+ return this->callee->constant_expression_value(mem_ctx,
+ &this->actual_parameters,
+ variable_context);
}
-bool ir_function_signature::constant_expression_evaluate_expression_list(const struct exec_list &body,
+bool ir_function_signature::constant_expression_evaluate_expression_list(void *mem_ctx,
+ const struct exec_list &body,
struct hash_table *variable_context,
ir_constant **result)
{
+ assert(mem_ctx);
+
foreach_in_list(ir_instruction, inst, &body) {
switch(inst->ir_type) {
@@ -874,7 +894,9 @@
case ir_type_assignment: {
ir_assignment *asg = inst->as_assignment();
if (asg->condition) {
- ir_constant *cond = asg->condition->constant_expression_value(variable_context);
+ ir_constant *cond =
+ asg->condition->constant_expression_value(mem_ctx,
+ variable_context);
if (!cond)
return false;
if (!cond->get_bool_component(0))
@@ -887,7 +909,8 @@
if (!constant_referenced(asg->lhs, variable_context, store, offset))
return false;
- ir_constant *value = asg->rhs->constant_expression_value(variable_context);
+ ir_constant *value =
+ asg->rhs->constant_expression_value(mem_ctx, variable_context);
if (!value)
return false;
@@ -899,7 +922,9 @@
/* (return (expression)) */
case ir_type_return:
assert (result);
- *result = inst->as_return()->value->constant_expression_value(variable_context);
+ *result =
+ inst->as_return()->value->constant_expression_value(mem_ctx,
+ variable_context);
return *result != NULL;
/* (call name (ref) (params))*/
@@ -920,7 +945,8 @@
store, offset))
return false;
- ir_constant *value = call->constant_expression_value(variable_context);
+ ir_constant *value =
+ call->constant_expression_value(mem_ctx, variable_context);
if(!value)
return false;
@@ -933,14 +959,18 @@
case ir_type_if: {
ir_if *iif = inst->as_if();
- ir_constant *cond = iif->condition->constant_expression_value(variable_context);
+ ir_constant *cond =
+ iif->condition->constant_expression_value(mem_ctx,
+ variable_context);
if (!cond || !cond->type->is_boolean())
return false;
exec_list &branch = cond->get_bool_component(0) ? iif->then_instructions : iif->else_instructions;
*result = NULL;
- if (!constant_expression_evaluate_expression_list(branch, variable_context, result))
+ if (!constant_expression_evaluate_expression_list(mem_ctx, branch,
+ variable_context,
+ result))
return false;
/* If there was a return in the branch chosen, drop out now. */
@@ -964,8 +994,12 @@
}
ir_constant *
-ir_function_signature::constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context)
+ir_function_signature::constant_expression_value(void *mem_ctx,
+ exec_list *actual_parameters,
+ struct hash_table *variable_context)
{
+ assert(mem_ctx);
+
const glsl_type *type = this->return_type;
if (type == glsl_type::void_type)
return NULL;
@@ -1000,7 +1034,8 @@
const exec_node *parameter_info = origin ? origin->parameters.get_head_raw() : parameters.get_head_raw();
foreach_in_list(ir_rvalue, n, actual_parameters) {
- ir_constant *constant = n->constant_expression_value(variable_context);
+ ir_constant *constant =
+ n->constant_expression_value(mem_ctx, variable_context);
if (constant == NULL) {
_mesa_hash_table_destroy(deref_hash, NULL);
return NULL;
@@ -1018,8 +1053,9 @@
/* Now run the builtin function until something non-constant
* happens or we get the result.
*/
- if (constant_expression_evaluate_expression_list(origin ? origin->body : body, deref_hash, &result) && result)
- result = result->clone(ralloc_parent(this), NULL);
+ if (constant_expression_evaluate_expression_list(mem_ctx, origin ? origin->body : body, deref_hash, &result) &&
+ result)
+ result = result->clone(mem_ctx, NULL);
_mesa_hash_table_destroy(deref_hash, NULL);
diff -Nru mesa-17.2.4/src/compiler/glsl/ir.cpp mesa-17.3.3/src/compiler/glsl/ir.cpp
--- mesa-17.2.4/src/compiler/glsl/ir.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -203,11 +203,16 @@
this->operands[1] = op1;
this->operands[2] = op2;
this->operands[3] = op3;
+ init_num_operands();
+
#ifndef NDEBUG
- int num_operands = get_num_operands(this->operation);
- for (int i = num_operands; i < 4; i++) {
+ for (unsigned i = num_operands; i < 4; i++) {
assert(this->operands[i] == NULL);
}
+
+ for (unsigned i = 0; i < num_operands; i++) {
+ assert(this->operands[i] != NULL);
+ }
#endif
}
@@ -221,6 +226,9 @@
this->operands[3] = NULL;
assert(op <= ir_last_unop);
+ init_num_operands();
+ assert(num_operands == 1);
+ assert(this->operands[0]);
switch (this->operation) {
case ir_unop_bit_not:
@@ -425,6 +433,11 @@
this->operands[3] = NULL;
assert(op > ir_last_unop);
+ init_num_operands();
+ assert(num_operands == 2);
+ for (unsigned i = 0; i < num_operands; i++) {
+ assert(this->operands[i] != NULL);
+ }
switch (this->operation) {
case ir_binop_all_equal:
@@ -519,6 +532,11 @@
this->operands[3] = NULL;
assert(op > ir_last_binop && op <= ir_last_triop);
+ init_num_operands();
+ assert(num_operands == 3);
+ for (unsigned i = 0; i < num_operands; i++) {
+ assert(this->operands[i] != NULL);
+ }
switch (this->operation) {
case ir_triop_fma:
@@ -538,7 +556,11 @@
}
}
-unsigned int
+/**
+ * This is only here for ir_reader to used for testing purposes. Please use
+ * the precomputed num_operands field if you need the number of operands.
+ */
+unsigned
ir_expression::get_num_operands(ir_expression_operation op)
{
assert(op <= ir_last_opcode);
@@ -555,8 +577,7 @@
if (op <= ir_last_quadop)
return 4;
- assert(false);
- return 0;
+ unreachable("Could not calculate number of operands");
}
#include "ir_expression_operation_strings.h"
@@ -606,14 +627,14 @@
ir_constant::ir_constant()
: ir_rvalue(ir_type_constant)
{
- this->array_elements = NULL;
+ this->const_elements = NULL;
}
ir_constant::ir_constant(const struct glsl_type *type,
const ir_constant_data *data)
: ir_rvalue(ir_type_constant)
{
- this->array_elements = NULL;
+ this->const_elements = NULL;
assert((type->base_type >= GLSL_TYPE_UINT)
&& (type->base_type <= GLSL_TYPE_IMAGE));
@@ -716,7 +737,7 @@
ir_constant::ir_constant(const ir_constant *c, unsigned i)
: ir_rvalue(ir_type_constant)
{
- this->array_elements = NULL;
+ this->const_elements = NULL;
this->type = c->type->get_base_type();
switch (this->type->base_type) {
@@ -732,34 +753,25 @@
ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
: ir_rvalue(ir_type_constant)
{
- this->array_elements = NULL;
+ this->const_elements = NULL;
this->type = type;
assert(type->is_scalar() || type->is_vector() || type->is_matrix()
|| type->is_record() || type->is_array());
- if (type->is_array()) {
- this->array_elements = ralloc_array(this, ir_constant *, type->length);
- unsigned i = 0;
- foreach_in_list(ir_constant, value, value_list) {
- assert(value->as_constant() != NULL);
-
- this->array_elements[i++] = value;
- }
- return;
- }
-
/* If the constant is a record, the types of each of the entries in
* value_list must be a 1-for-1 match with the structure components. Each
* entry must also be a constant. Just move the nodes from the value_list
* to the list in the ir_constant.
*/
- /* FINISHME: Should there be some type checking and / or assertions here? */
- /* FINISHME: Should the new constant take ownership of the nodes from
- * FINISHME: value_list, or should it make copies?
- */
- if (type->is_record()) {
- value_list->move_nodes_to(& this->components);
+ if (type->is_array() || type->is_record()) {
+ this->const_elements = ralloc_array(this, ir_constant *, type->length);
+ unsigned i = 0;
+ foreach_in_list(ir_constant, value, value_list) {
+ assert(value->as_constant() != NULL);
+
+ this->const_elements[i++] = value;
+ }
return;
}
@@ -903,16 +915,18 @@
memset(&c->value, 0, sizeof(c->value));
if (type->is_array()) {
- c->array_elements = ralloc_array(c, ir_constant *, type->length);
+ c->const_elements = ralloc_array(c, ir_constant *, type->length);
for (unsigned i = 0; i < type->length; i++)
- c->array_elements[i] = ir_constant::zero(c, type->fields.array);
+ c->const_elements[i] = ir_constant::zero(c, type->fields.array);
}
if (type->is_record()) {
+ c->const_elements = ralloc_array(c, ir_constant *, type->length);
+
for (unsigned i = 0; i < type->length; i++) {
- ir_constant *comp = ir_constant::zero(mem_ctx, type->fields.structure[i].type);
- c->components.push_tail(comp);
+ c->const_elements[i] =
+ ir_constant::zero(mem_ctx, type->fields.structure[i].type);
}
}
@@ -1079,32 +1093,16 @@
else if (i >= this->type->length)
i = this->type->length - 1;
- return array_elements[i];
+ return const_elements[i];
}
ir_constant *
-ir_constant::get_record_field(const char *name)
+ir_constant::get_record_field(int idx)
{
- int idx = this->type->field_index(name);
-
- if (idx < 0)
- return NULL;
-
- if (this->components.is_empty())
- return NULL;
-
- exec_node *node = this->components.get_head_raw();
- for (int i = 0; i < idx; i++) {
- node = node->next;
+ assert(this->type->is_record());
+ assert(idx >= 0 && (unsigned) idx < this->type->length);
- /* If the end of the list is encountered before the element matching the
- * requested field is found, return NULL.
- */
- if (node->is_tail_sentinel())
- return NULL;
- }
-
- return (ir_constant *) node;
+ return const_elements[idx];
}
void
@@ -1150,19 +1148,11 @@
break;
}
- case GLSL_TYPE_STRUCT: {
- assert (src->type == this->type);
- this->components.make_empty();
- foreach_in_list(ir_constant, orig, &src->components) {
- this->components.push_tail(orig->clone(this, NULL));
- }
- break;
- }
-
+ case GLSL_TYPE_STRUCT:
case GLSL_TYPE_ARRAY: {
assert (src->type == this->type);
for (unsigned i = 0; i < this->type->length; i++) {
- this->array_elements[i] = src->array_elements[i]->clone(this, NULL);
+ this->const_elements[i] = src->const_elements[i]->clone(this, NULL);
}
break;
}
@@ -1222,34 +1212,14 @@
if (this->type != c->type)
return false;
- if (this->type->is_array()) {
+ if (this->type->is_array() || this->type->is_record()) {
for (unsigned i = 0; i < this->type->length; i++) {
- if (!this->array_elements[i]->has_value(c->array_elements[i]))
+ if (!this->const_elements[i]->has_value(c->const_elements[i]))
return false;
}
return true;
}
- if (this->type->is_record()) {
- const exec_node *a_node = this->components.get_head_raw();
- const exec_node *b_node = c->components.get_head_raw();
-
- while (!a_node->is_tail_sentinel()) {
- assert(!b_node->is_tail_sentinel());
-
- const ir_constant *const a_field = (ir_constant *) a_node;
- const ir_constant *const b_field = (ir_constant *) b_node;
-
- if (!a_field->has_value(b_field))
- return false;
-
- a_node = a_node->next;
- b_node = b_node->next;
- }
-
- return true;
- }
-
for (unsigned i = 0; i < this->type->components(); i++) {
switch (this->type->base_type) {
case GLSL_TYPE_UINT:
@@ -1431,8 +1401,8 @@
assert(value != NULL);
this->record = value;
- this->field = ralloc_strdup(this, field);
this->type = this->record->type->field_type(field);
+ this->field_idx = this->record->type->field_index(field);
}
@@ -1443,8 +1413,8 @@
void *ctx = ralloc_parent(var);
this->record = new(ctx) ir_dereference_variable(var);
- this->field = ralloc_strdup(this, field);
this->type = this->record->type->field_type(field);
+ this->field_idx = this->record->type->field_index(field);
}
bool
@@ -1950,15 +1920,10 @@
/* The components of aggregate constants are not visited by the normal
* visitor, so steal their values by hand.
*/
- if (constant != NULL) {
- if (constant->type->is_record()) {
- foreach_in_list(ir_constant, field, &constant->components) {
- steal_memory(field, ir);
- }
- } else if (constant->type->is_array()) {
- for (unsigned int i = 0; i < constant->type->length; i++) {
- steal_memory(constant->array_elements[i], ir);
- }
+ if (constant != NULL &&
+ (constant->type->is_array() || constant->type->is_record())) {
+ for (unsigned int i = 0; i < constant->type->length; i++) {
+ steal_memory(constant->const_elements[i], ir);
}
}
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_equals.cpp mesa-17.3.3/src/compiler/glsl/ir_equals.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_equals.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_equals.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -202,7 +202,7 @@
if (operation != other->operation)
return false;
- for (unsigned i = 0; i < get_num_operands(); i++) {
+ for (unsigned i = 0; i < num_operands; i++) {
if (!operands[i]->equals(other->operands[i], ignore))
return false;
}
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_expression_flattening.cpp mesa-17.3.3/src/compiler/glsl/ir_expression_flattening.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_expression_flattening.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_expression_flattening.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -77,9 +77,7 @@
var = new(ctx) ir_variable(ir->type, "flattening_tmp", ir_var_temporary);
base_ir->insert_before(var);
- assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var),
- ir,
- NULL);
+ assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), ir);
base_ir->insert_before(assign);
*rvalue = new(ctx) ir_dereference_variable(var);
diff -Nru mesa-17.2.4/src/compiler/glsl/ir.h mesa-17.3.3/src/compiler/glsl/ir.h
--- mesa-17.2.4/src/compiler/glsl/ir.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir.h 2018-01-18 21:30:28.000000000 +0000
@@ -229,7 +229,8 @@
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
ir_rvalue *as_rvalue_to_saturate();
@@ -1170,7 +1171,9 @@
* given a list of the actual parameters and the variable context.
* Returns NULL for non-built-ins.
*/
- ir_constant *constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context);
+ ir_constant *constant_expression_value(void *mem_ctx,
+ exec_list *actual_parameters,
+ struct hash_table *variable_context);
/**
* Get the name of the function for which this is a signature
@@ -1273,7 +1276,8 @@
* Returns false if the expression is not constant, true otherwise,
* and the value in *result if result is non-NULL.
*/
- bool constant_expression_evaluate_expression_list(const struct exec_list &body,
+ bool constant_expression_evaluate_expression_list(void *mem_ctx,
+ const struct exec_list &body,
struct hash_table *variable_context,
ir_constant **result);
};
@@ -1429,7 +1433,8 @@
virtual ir_assignment *clone(void *mem_ctx, struct hash_table *ht) const;
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
virtual void accept(ir_visitor *v)
{
@@ -1535,21 +1540,14 @@
* If the expression cannot be constant folded, this method will return
* \c NULL.
*/
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
/**
- * Determine the number of operands used by an expression
+ * This is only here for ir_reader to used for testing purposes please use
+ * the precomputed num_operands field if you need the number of operands.
*/
- static unsigned int get_num_operands(ir_expression_operation);
-
- /**
- * Determine the number of operands used by an expression
- */
- unsigned int get_num_operands() const
- {
- return (this->operation == ir_quadop_vector)
- ? this->type->vector_elements : get_num_operands(operation);
- }
+ static unsigned get_num_operands(ir_expression_operation);
/**
* Return whether the expression operates on vectors horizontally.
@@ -1579,8 +1577,21 @@
virtual ir_variable *variable_referenced() const;
+ /**
+ * Determine the number of operands used by an expression
+ */
+ void init_num_operands()
+ {
+ if (operation == ir_quadop_vector) {
+ num_operands = this->type->vector_elements;
+ } else {
+ num_operands = get_num_operands(operation);
+ }
+ }
+
ir_expression_operation operation;
ir_rvalue *operands[4];
+ uint8_t num_operands;
};
@@ -1597,7 +1608,6 @@
{
assert(callee->return_type != NULL);
actual_parameters->move_nodes_to(& this->actual_parameters);
- this->use_builtin = callee->is_builtin();
}
ir_call(ir_function_signature *callee,
@@ -1608,12 +1618,12 @@
{
assert(callee->return_type != NULL);
actual_parameters->move_nodes_to(& this->actual_parameters);
- this->use_builtin = callee->is_builtin();
}
virtual ir_call *clone(void *mem_ctx, struct hash_table *ht) const;
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
virtual void accept(ir_visitor *v)
{
@@ -1650,9 +1660,6 @@
/* List of ir_rvalue of paramaters passed in this call. */
exec_list actual_parameters;
- /** Should this call only bind to a built-in function? */
- bool use_builtin;
-
/*
* ARB_shader_subroutine support -
* the subroutine uniform variable and array index
@@ -1838,7 +1845,8 @@
virtual ir_texture *clone(void *mem_ctx, struct hash_table *) const;
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
virtual void accept(ir_visitor *v)
{
@@ -1935,7 +1943,8 @@
virtual ir_swizzle *clone(void *mem_ctx, struct hash_table *) const;
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
/**
* Construct an ir_swizzle from the textual representation. Can fail.
@@ -2001,7 +2010,8 @@
virtual ir_dereference_variable *clone(void *mem_ctx,
struct hash_table *) const;
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
virtual bool equals(const ir_instruction *ir,
enum ir_node_type ignore = ir_type_unset) const;
@@ -2048,7 +2058,8 @@
virtual ir_dereference_array *clone(void *mem_ctx,
struct hash_table *) const;
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
virtual bool equals(const ir_instruction *ir,
enum ir_node_type ignore = ir_type_unset) const;
@@ -2085,7 +2096,8 @@
virtual ir_dereference_record *clone(void *mem_ctx,
struct hash_table *) const;
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
/**
* Get the variable that is ultimately referenced by an r-value
@@ -2103,7 +2115,7 @@
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
ir_rvalue *record;
- const char *field;
+ int field_idx;
};
@@ -2156,7 +2168,8 @@
virtual ir_constant *clone(void *mem_ctx, struct hash_table *) const;
- virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
+ virtual ir_constant *constant_expression_value(void *mem_ctx,
+ struct hash_table *variable_context = NULL);
virtual void accept(ir_visitor *v)
{
@@ -2187,7 +2200,7 @@
ir_constant *get_array_element(unsigned i) const;
- ir_constant *get_record_field(const char *name);
+ ir_constant *get_record_field(int idx);
/**
* Copy the values on another constant at a given offset.
@@ -2249,11 +2262,8 @@
*/
union ir_constant_data value;
- /* Array elements */
- ir_constant **array_elements;
-
- /* Structure fields */
- exec_list components;
+ /* Array elements and structure fields */
+ ir_constant **const_elements;
private:
/**
@@ -2403,10 +2413,6 @@
struct _mesa_glsl_parse_state *state);
extern void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
- gl_shader *shader);
-
-extern void
reparent_ir(exec_list *list, void *mem_ctx);
extern void
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_hv_accept.cpp mesa-17.3.3/src/compiler/glsl/ir_hv_accept.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_hv_accept.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_hv_accept.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -137,7 +137,7 @@
if (s != visit_continue)
return (s == visit_continue_with_parent) ? visit_continue : s;
- for (unsigned i = 0; i < this->get_num_operands(); i++) {
+ for (unsigned i = 0; i < this->num_operands; i++) {
switch (this->operands[i]->accept(v)) {
case visit_continue:
break;
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_optimization.h mesa-17.3.3/src/compiler/glsl/ir_optimization.h
--- mesa-17.2.4/src/compiler/glsl/ir_optimization.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_optimization.h 2018-01-18 21:30:28.000000000 +0000
@@ -143,10 +143,11 @@
gl_linked_shader *shader);
void lower_output_reads(unsigned stage, exec_list *instructions);
bool lower_packing_builtins(exec_list *instructions, int op_mask);
-void lower_shared_reference(struct gl_linked_shader *shader,
- unsigned *shared_size);
+void lower_shared_reference(struct gl_context *ctx,
+ struct gl_shader_program *prog,
+ struct gl_linked_shader *shader);
void lower_ubo_reference(struct gl_linked_shader *shader,
- bool clamp_block_indices);
+ bool clamp_block_indices, bool use_std430_as_default);
void lower_packed_varyings(void *mem_ctx,
unsigned locations_used,
const uint8_t *components,
@@ -165,14 +166,17 @@
bool lower_tess_level(gl_linked_shader *shader);
bool lower_vertex_id(gl_linked_shader *shader);
+bool lower_cs_derived(gl_linked_shader *shader);
bool lower_blend_equation_advanced(gl_linked_shader *shader);
bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
void propagate_invariance(exec_list *instructions);
-ir_rvalue *
-compare_index_block(exec_list *instructions, ir_variable *index,
- unsigned base, unsigned components, void *mem_ctx);
+namespace ir_builder { class ir_factory; };
+
+ir_variable *compare_index_block(ir_builder::ir_factory &body,
+ ir_variable *index,
+ unsigned base, unsigned components);
bool lower_64bit_integer_instructions(exec_list *instructions,
unsigned what_to_lower);
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_print_visitor.cpp mesa-17.3.3/src/compiler/glsl/ir_print_visitor.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_print_visitor.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_print_visitor.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -291,7 +291,7 @@
fprintf(f, " %s ", ir_expression_operation_strings[ir->operation]);
- for (unsigned i = 0; i < ir->get_num_operands(); i++) {
+ for (unsigned i = 0; i < ir->num_operands; i++) {
ir->operands[i]->accept(this);
}
@@ -423,7 +423,10 @@
{
fprintf(f, "(record_ref ");
ir->record->accept(this);
- fprintf(f, " %s) ", ir->field);
+
+ const char *field_name =
+ ir->record->type->fields.structure[ir->field_idx].name;
+ fprintf(f, " %s) ", field_name);
}
@@ -466,13 +469,10 @@
for (unsigned i = 0; i < ir->type->length; i++)
ir->get_array_element(i)->accept(this);
} else if (ir->type->is_record()) {
- ir_constant *value = (ir_constant *) ir->components.get_head();
for (unsigned i = 0; i < ir->type->length; i++) {
fprintf(f, "(%s ", ir->type->fields.structure[i].name);
- value->accept(this);
+ ir->get_record_field(i)->accept(this);
fprintf(f, ")");
-
- value = (ir_constant *) value->next;
}
} else {
for (unsigned i = 0; i < ir->type->components(); i++) {
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_rvalue_visitor.cpp mesa-17.3.3/src/compiler/glsl/ir_rvalue_visitor.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_rvalue_visitor.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_rvalue_visitor.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -39,7 +39,7 @@
{
unsigned int operand;
- for (operand = 0; operand < ir->get_num_operands(); operand++) {
+ for (operand = 0; operand < ir->num_operands; operand++) {
handle_rvalue(&ir->operands[operand]);
}
diff -Nru mesa-17.2.4/src/compiler/glsl/ir_validate.cpp mesa-17.3.3/src/compiler/glsl/ir_validate.cpp
--- mesa-17.2.4/src/compiler/glsl/ir_validate.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/ir_validate.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -36,6 +36,7 @@
#include "ir.h"
#include "ir_hierarchical_visitor.h"
#include "util/hash_table.h"
+#include "util/macros.h"
#include "util/set.h"
#include "compiler/glsl_types.h"
@@ -236,6 +237,14 @@
ir_visitor_status
ir_validate::visit_leave(ir_expression *ir)
{
+ for (unsigned i = ir->num_operands; i < 4; i++) {
+ assert(ir->operands[i] == NULL);
+ }
+
+ for (unsigned i = 0; i < ir->num_operands; i++) {
+ assert(ir->operands[i] != NULL);
+ }
+
switch (ir->operation) {
case ir_unop_bit_not:
assert(ir->operands[0]->type == ir->type);
@@ -1035,7 +1044,7 @@
_mesa_set_add(ir_set, ir);
}
-void
+MAYBE_UNUSED static void
check_node_type(ir_instruction *ir, void *data)
{
(void) data;
diff -Nru mesa-17.2.4/src/compiler/glsl/link_atomics.cpp mesa-17.3.3/src/compiler/glsl/link_atomics.cpp
--- mesa-17.2.4/src/compiler/glsl/link_atomics.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/link_atomics.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -207,7 +207,7 @@
active_atomic_buffer *abs =
find_active_atomic_counters(ctx, prog, &num_buffers);
- prog->data->AtomicBuffers = rzalloc_array(prog, gl_active_atomic_buffer,
+ prog->data->AtomicBuffers = rzalloc_array(prog->data, gl_active_atomic_buffer,
num_buffers);
prog->data->NumAtomicBuffers = num_buffers;
@@ -270,7 +270,7 @@
struct gl_program *gl_prog = prog->_LinkedShaders[j]->Program;
gl_prog->info.num_abos = num_atomic_buffers[j];
gl_prog->sh.AtomicBuffers =
- rzalloc_array(prog, gl_active_atomic_buffer *,
+ rzalloc_array(gl_prog, gl_active_atomic_buffer *,
num_atomic_buffers[j]);
unsigned intra_stage_idx = 0;
diff -Nru mesa-17.2.4/src/compiler/glsl/linker.cpp mesa-17.3.3/src/compiler/glsl/linker.cpp
--- mesa-17.2.4/src/compiler/glsl/linker.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/linker.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -248,14 +248,7 @@
virtual ir_visitor_status visit_leave(ir_dereference_record *ir)
{
- for (unsigned i = 0; i < ir->record->type->length; i++) {
- const struct glsl_struct_field *field =
- &ir->record->type->fields.structure[i];
- if (strcmp(field->name, ir->field) == 0) {
- ir->type = field->type;
- break;
- }
- }
+ ir->type = ir->record->type->fields.structure[ir->field_idx].type;
return visit_continue;
}
};
@@ -435,7 +428,7 @@
if (!ir->variable_referenced()->type->contains_sampler())
return visit_continue;
- if (!ir->array_index->constant_expression_value()) {
+ if (!ir->array_index->constant_expression_value(ralloc_parent(ir))) {
dynamic_sampler_array_indexing = true;
return visit_stop;
}
@@ -682,7 +675,7 @@
*
* \param shader Vertex shader executable to be verified
*/
-void
+static void
validate_vertex_shader_executable(struct gl_shader_program *prog,
struct gl_linked_shader *shader,
struct gl_context *ctx)
@@ -737,7 +730,7 @@
&shader->Program->info.cull_distance_array_size);
}
-void
+static void
validate_tess_eval_shader_executable(struct gl_shader_program *prog,
struct gl_linked_shader *shader,
struct gl_context *ctx)
@@ -756,7 +749,7 @@
*
* \param shader Fragment shader executable to be verified
*/
-void
+static void
validate_fragment_shader_executable(struct gl_shader_program *prog,
struct gl_linked_shader *shader)
{
@@ -782,7 +775,7 @@
*
* \param shader Geometry shader executable to be verified
*/
-void
+static void
validate_geometry_shader_executable(struct gl_shader_program *prog,
struct gl_linked_shader *shader,
struct gl_context *ctx)
@@ -899,7 +892,7 @@
/**
* Perform validation of global variables used across multiple shaders
*/
-void
+static void
cross_validate_globals(struct gl_shader_program *prog,
struct exec_list *ir, glsl_symbol_table *variables,
bool uniforms_only)
@@ -1128,10 +1121,16 @@
if (prog->IsES && (prog->data->Version != 310 ||
!var->get_interface_type()) &&
existing->data.precision != var->data.precision) {
- linker_error(prog, "declarations for %s `%s` have "
- "mismatching precision qualifiers\n",
- mode_string(var), var->name);
- return;
+ if ((existing->data.used && var->data.used) || prog->data->Version >= 300) {
+ linker_error(prog, "declarations for %s `%s` have "
+ "mismatching precision qualifiers\n",
+ mode_string(var), var->name);
+ return;
+ } else {
+ linker_warning(prog, "declarations for %s `%s` have "
+ "mismatching precision qualifiers\n",
+ mode_string(var), var->name);
+ }
}
} else
variables->add_variable(var);
@@ -1142,7 +1141,7 @@
/**
* Perform validation of uniforms used across multiple shader stages
*/
-void
+static void
cross_validate_uniforms(struct gl_shader_program *prog)
{
glsl_symbol_table variables;
@@ -1202,8 +1201,8 @@
}
for (unsigned int j = 0; j < sh_num_blocks; j++) {
- int index = link_cross_validate_uniform_block(prog, &blks, num_blks,
- sh_blks[j]);
+ int index = link_cross_validate_uniform_block(prog->data, &blks,
+ num_blks, sh_blks[j]);
if (index == -1) {
linker_error(prog, "buffer block `%s' has mismatching "
@@ -1262,21 +1261,11 @@
* Populates a shaders symbol table with all global declarations
*/
static void
-populate_symbol_table(gl_linked_shader *sh)
+populate_symbol_table(gl_linked_shader *sh, glsl_symbol_table *symbols)
{
sh->symbols = new(sh) glsl_symbol_table;
- foreach_in_list(ir_instruction, inst, sh->ir) {
- ir_variable *var;
- ir_function *func;
-
- if ((func = inst->as_function()) != NULL) {
- sh->symbols->add_function(func);
- } else if ((var = inst->as_variable()) != NULL) {
- if (var->data.mode != ir_var_temporary)
- sh->symbols->add_variable(var);
- }
- }
+ _mesa_glsl_copy_symbols_from_table(sh->ir, symbols, sh->symbols);
}
@@ -1299,7 +1288,7 @@
* \param instructions Instruction stream where new variable declarations
* should be added.
*/
-void
+static void
remap_variables(ir_instruction *inst, struct gl_linked_shader *target,
hash_table *temps)
{
@@ -1373,7 +1362,7 @@
* is suitable for use as the \c last parameter of a later call to this
* function.
*/
-exec_node *
+static exec_node *
move_non_declarations(exec_list *instructions, exec_node *last,
bool make_copies, gl_linked_shader *target)
{
@@ -1659,7 +1648,6 @@
static void
link_xfb_stride_layout_qualifiers(struct gl_context *ctx,
struct gl_shader_program *prog,
- struct gl_linked_shader *linked_shader,
struct gl_shader **shader_list,
unsigned num_shaders)
{
@@ -1697,7 +1685,6 @@
*/
static void
link_bindless_layout_qualifiers(struct gl_shader_program *prog,
- struct gl_program *gl_prog,
struct gl_shader **shader_list,
unsigned num_shaders)
{
@@ -2277,8 +2264,7 @@
return NULL;
}
- if (!prog->data->cache_fallback)
- _mesa_reference_shader_program_data(ctx, &gl_prog->sh.data, prog->data);
+ _mesa_reference_shader_program_data(ctx, &gl_prog->sh.data, prog->data);
/* Don't use _mesa_reference_program() just take ownership */
linked->Program = gl_prog;
@@ -2293,12 +2279,11 @@
link_cs_input_layout_qualifiers(prog, gl_prog, shader_list, num_shaders);
if (linked->Stage != MESA_SHADER_FRAGMENT)
- link_xfb_stride_layout_qualifiers(ctx, prog, linked, shader_list,
- num_shaders);
+ link_xfb_stride_layout_qualifiers(ctx, prog, shader_list, num_shaders);
- link_bindless_layout_qualifiers(prog, gl_prog, shader_list, num_shaders);
+ link_bindless_layout_qualifiers(prog, shader_list, num_shaders);
- populate_symbol_table(linked);
+ populate_symbol_table(linked, shader_list[0]->symbols);
/* The pointer to the main function in the final linked shader (i.e., the
* copy of the original shader that contained the main function).
@@ -2336,34 +2321,32 @@
v.run(linked->ir);
v.fixup_unnamed_interface_types();
- if (!prog->data->cache_fallback) {
- /* Link up uniform blocks defined within this stage. */
- link_uniform_blocks(mem_ctx, ctx, prog, linked, &ubo_blocks,
- &num_ubo_blocks, &ssbo_blocks, &num_ssbo_blocks);
-
- if (!prog->data->LinkStatus) {
- _mesa_delete_linked_shader(ctx, linked);
- return NULL;
- }
-
- /* Copy ubo blocks to linked shader list */
- linked->Program->sh.UniformBlocks =
- ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
- ralloc_steal(linked, ubo_blocks);
- for (unsigned i = 0; i < num_ubo_blocks; i++) {
- linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
- }
- linked->Program->info.num_ubos = num_ubo_blocks;
-
- /* Copy ssbo blocks to linked shader list */
- linked->Program->sh.ShaderStorageBlocks =
- ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
- ralloc_steal(linked, ssbo_blocks);
- for (unsigned i = 0; i < num_ssbo_blocks; i++) {
- linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
- }
- linked->Program->info.num_ssbos = num_ssbo_blocks;
+ /* Link up uniform blocks defined within this stage. */
+ link_uniform_blocks(mem_ctx, ctx, prog, linked, &ubo_blocks,
+ &num_ubo_blocks, &ssbo_blocks, &num_ssbo_blocks);
+
+ if (!prog->data->LinkStatus) {
+ _mesa_delete_linked_shader(ctx, linked);
+ return NULL;
+ }
+
+ /* Copy ubo blocks to linked shader list */
+ linked->Program->sh.UniformBlocks =
+ ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
+ ralloc_steal(linked, ubo_blocks);
+ for (unsigned i = 0; i < num_ubo_blocks; i++) {
+ linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
+ }
+ linked->Program->info.num_ubos = num_ubo_blocks;
+
+ /* Copy ssbo blocks to linked shader list */
+ linked->Program->sh.ShaderStorageBlocks =
+ ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
+ ralloc_steal(linked, ssbo_blocks);
+ for (unsigned i = 0; i < num_ssbo_blocks; i++) {
+ linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
}
+ linked->Program->info.num_ssbos = num_ssbo_blocks;
/* At this point linked should contain all of the linked IR, so
* validate it to make sure nothing went wrong.
@@ -2384,6 +2367,9 @@
if (ctx->Const.VertexID_is_zero_based)
lower_vertex_id(linked);
+ if (ctx->Const.LowerCsDerivedVariables)
+ lower_cs_derived(linked);
+
#ifdef DEBUG
/* Compute the source checksum. */
linked->SourceChecksum = 0;
@@ -2555,7 +2541,7 @@
* \return
* Base location of the available bits on success or -1 on failure.
*/
-int
+static int
find_available_slots(unsigned used_mask, unsigned needed_count)
{
unsigned needed_mask = (1 << needed_count) - 1;
@@ -2592,7 +2578,7 @@
* If locations are successfully assigned, true is returned. Otherwise an
* error is emitted to the shader link log and false is returned.
*/
-bool
+static bool
assign_attribute_or_color_locations(void *mem_ctx,
gl_shader_program *prog,
struct gl_constants *constants,
@@ -3624,7 +3610,7 @@
return true;
prog->data->ProgramResourceList =
- reralloc(prog,
+ reralloc(prog->data,
prog->data->ProgramResourceList,
gl_program_resource,
prog->data->NumProgramResourceList + 1);
@@ -3819,6 +3805,7 @@
GLenum programInterface, ir_variable *var,
const char *name, const glsl_type *type,
bool use_implicit_location, int location,
+ bool inouts_share_location,
const glsl_type *outermost_struct_type = NULL)
{
const glsl_type *interface_type = var->get_interface_type();
@@ -3881,7 +3868,7 @@
stage_mask, programInterface,
var, field_name, field->type,
use_implicit_location, field_location,
- outermost_struct_type))
+ false, outermost_struct_type))
return false;
field_location += field->type->count_attribute_slots(false);
@@ -3889,6 +3876,43 @@
return true;
}
+ case GLSL_TYPE_ARRAY: {
+ /* The ARB_program_interface_query spec says:
+ *
+ * "For an active variable declared as an array of basic types, a
+ * single entry will be generated, with its name string formed by
+ * concatenating the name of the array and the string "[0]"."
+ *
+ * "For an active variable declared as an array of an aggregate data
+ * type (structures or arrays), a separate entry will be generated
+ * for each active array element, unless noted immediately below.
+ * The name of each entry is formed by concatenating the name of
+ * the array, the "[" character, an integer identifying the element
+ * number, and the "]" character. These enumeration rules are
+ * applied recursively, treating each enumerated array element as a
+ * separate active variable."
+ */
+ const struct glsl_type *array_type = type->fields.array;
+ if (array_type->base_type == GLSL_TYPE_STRUCT ||
+ array_type->base_type == GLSL_TYPE_ARRAY) {
+ unsigned elem_location = location;
+ unsigned stride = inouts_share_location ? 0 :
+ array_type->count_attribute_slots(false);
+ for (unsigned i = 0; i < type->length; i++) {
+ char *elem = ralloc_asprintf(shProg, "%s[%d]", name, i);
+ if (!add_shader_variable(ctx, shProg, resource_set,
+ stage_mask, programInterface,
+ var, elem, array_type,
+ use_implicit_location, elem_location,
+ false, outermost_struct_type))
+ return false;
+ elem_location += stride;
+ }
+ return true;
+ }
+ /* fallthrough */
+ }
+
default: {
/* The ARB_program_interface_query spec says:
*
@@ -3910,6 +3934,20 @@
}
static bool
+inout_has_same_location(const ir_variable *var, unsigned stage)
+{
+ if (!var->data.patch &&
+ ((var->data.mode == ir_var_shader_out &&
+ stage == MESA_SHADER_TESS_CTRL) ||
+ (var->data.mode == ir_var_shader_in &&
+ (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY))))
+ return true;
+ else
+ return false;
+}
+
+static bool
add_interface_variables(const struct gl_context *ctx,
struct gl_shader_program *shProg,
struct set *resource_set,
@@ -3965,7 +4003,8 @@
if (!add_shader_variable(ctx, shProg, resource_set,
1 << stage, programInterface,
var, var->name, var->type, vs_input_or_fs_output,
- var->data.location - loc_bias))
+ var->data.location - loc_bias,
+ inout_has_same_location(var, stage)))
return false;
}
return true;
@@ -4003,7 +4042,8 @@
if (!add_shader_variable(ctx, shProg, resource_set,
stage_mask,
iface, var, var->name, var->type, false,
- var->data.location - VARYING_SLOT_VAR0))
+ var->data.location - VARYING_SLOT_VAR0,
+ inout_has_same_location(var, stage)))
return false;
}
}
@@ -4029,7 +4069,8 @@
if (!add_shader_variable(ctx, shProg, resource_set,
1 << MESA_SHADER_FRAGMENT,
GL_PROGRAM_OUTPUT, var, var->name, var->type,
- true, var->data.location - FRAG_RESULT_DATA0))
+ true, var->data.location - FRAG_RESULT_DATA0,
+ false))
return false;
}
}
@@ -4144,9 +4185,9 @@
}
static int
-get_array_stride(struct gl_uniform_storage *uni, const glsl_type *interface,
- const glsl_struct_field *field, char *interface_name,
- char *var_name)
+get_array_stride(struct gl_context *ctx, struct gl_uniform_storage *uni,
+ const glsl_type *interface, const glsl_struct_field *field,
+ char *interface_name, char *var_name)
{
/* The ARB_program_interface_query spec says:
*
@@ -4170,7 +4211,9 @@
var_name))
return 0;
- if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
+ if (GLSL_INTERFACE_PACKING_STD140 ==
+ interface->
+ get_internal_ifc_packing(ctx->Const.UseSTD430AsDefaultPacking)) {
if (array_type->is_record() || array_type->is_array())
return glsl_align(array_type->std140_size(row_major), 16);
else
@@ -4183,7 +4226,8 @@
}
static void
-calculate_array_size_and_stride(struct gl_shader_program *shProg,
+calculate_array_size_and_stride(struct gl_context *ctx,
+ struct gl_shader_program *shProg,
struct gl_uniform_storage *uni)
{
int block_index = uni->block_index;
@@ -4232,7 +4276,7 @@
if (strcmp(field->name, var_name) != 0)
continue;
- array_stride = get_array_stride(uni, interface, field,
+ array_stride = get_array_stride(ctx, uni, interface, field,
interface_name, var_name);
array_size = get_array_size(uni, field, interface_name, var_name);
goto write_top_level_array_size_and_stride;
@@ -4358,7 +4402,7 @@
continue;
if (is_shader_storage) {
- calculate_array_size_and_stride(shProg,
+ calculate_array_size_and_stride(ctx, shProg,
&shProg->data->UniformStorage[i]);
}
@@ -4592,14 +4636,12 @@
update_array_sizes(prog);
link_assign_uniform_locations(prog, ctx);
- if (!prog->data->cache_fallback) {
- link_assign_atomic_counter_resources(ctx, prog);
- link_calculate_subroutine_compat(prog);
- check_resources(ctx, prog);
- check_subroutine_resources(prog);
- check_image_resources(ctx, prog);
- link_check_atomic_counter_resources(ctx, prog);
- }
+ link_assign_atomic_counter_resources(ctx, prog);
+ link_calculate_subroutine_compat(prog);
+ check_resources(ctx, prog);
+ check_subroutine_resources(prog);
+ check_image_resources(ctx, prog);
+ link_check_atomic_counter_resources(ctx, prog);
}
static bool
@@ -4660,11 +4702,11 @@
if (options->LowerBufferInterfaceBlocks)
lower_ubo_reference(prog->_LinkedShaders[i],
- options->ClampBlockIndicesToArrayBounds);
+ options->ClampBlockIndicesToArrayBounds,
+ ctx->Const.UseSTD430AsDefaultPacking);
if (i == MESA_SHADER_COMPUTE)
- lower_shared_reference(prog->_LinkedShaders[i],
- &prog->Comp.SharedSize);
+ lower_shared_reference(ctx, prog, prog->_LinkedShaders[i]);
lower_vector_derefs(prog->_LinkedShaders[i]);
do_vec_index_to_swizzle(prog->_LinkedShaders[i]->ir);
@@ -4913,10 +4955,8 @@
last = i;
}
- if (!prog->data->cache_fallback) {
- check_explicit_uniform_locations(ctx, prog);
- link_assign_subroutine_types(prog);
- }
+ check_explicit_uniform_locations(ctx, prog);
+ link_assign_subroutine_types(prog);
if (!prog->data->LinkStatus)
goto done;
@@ -4936,7 +4976,7 @@
if (!prog->data->LinkStatus)
goto done;
- cross_validate_outputs_to_inputs(prog,
+ cross_validate_outputs_to_inputs(ctx, prog,
prog->_LinkedShaders[prev],
prog->_LinkedShaders[i]);
if (!prog->data->LinkStatus)
@@ -4971,15 +5011,13 @@
if (prog->SeparateShader)
disable_varying_optimizations_for_sso(prog);
- if (!prog->data->cache_fallback) {
- /* Process UBOs */
- if (!interstage_cross_validate_uniform_blocks(prog, false))
- goto done;
+ /* Process UBOs */
+ if (!interstage_cross_validate_uniform_blocks(prog, false))
+ goto done;
- /* Process SSBOs */
- if (!interstage_cross_validate_uniform_blocks(prog, true))
- goto done;
- }
+ /* Process SSBOs */
+ if (!interstage_cross_validate_uniform_blocks(prog, true))
+ goto done;
/* Do common optimization before assigning storage for attributes,
* uniforms, and varyings. Later optimization could possibly make
diff -Nru mesa-17.2.4/src/compiler/glsl/linker.h mesa-17.3.3/src/compiler/glsl/linker.h
--- mesa-17.2.4/src/compiler/glsl/linker.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/linker.h 2018-01-18 21:30:28.000000000 +0000
@@ -122,7 +122,7 @@
* matter. For example, enumerating the names of members of the block, but
* not for determining the offsets of members.
*/
- void process(ir_variable *var);
+ void process(ir_variable *var, bool use_std430_as_default);
/**
* Begin processing a variable of a structured type.
@@ -139,7 +139,8 @@
* \c type must be \c GLSL_TYPE_RECORD, \c GLSL_TYPE_INTERFACE, or an array
* there of.
*/
- void process(const glsl_type *type, const char *name);
+ void process(const glsl_type *type, const char *name,
+ bool use_std430_as_default);
protected:
/**
diff -Nru mesa-17.2.4/src/compiler/glsl/link_interface_blocks.cpp mesa-17.3.3/src/compiler/glsl/link_interface_blocks.cpp
--- mesa-17.2.4/src/compiler/glsl/link_interface_blocks.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/link_interface_blocks.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -364,6 +364,35 @@
consumer->Stage != MESA_SHADER_FRAGMENT) ||
consumer->Stage == MESA_SHADER_GEOMETRY;
+ /* Check that block re-declarations of gl_PerVertex are compatible
+ * across shaders: From OpenGL Shading Language 4.5, section
+ * "7.1 Built-In Language Variables", page 130 of the PDF:
+ *
+ * "If multiple shaders using members of a built-in block belonging
+ * to the same interface are linked together in the same program,
+ * they must all redeclare the built-in block in the same way, as
+ * described in section 4.3.9 “Interface Blocks” for interface-block
+ * matching, or a link-time error will result."
+ *
+ * This is done explicitly outside of iterating the member variable
+ * declarations because it is possible that the variables are not used and
+ * so they would have been optimised out.
+ */
+ const glsl_type *consumer_iface =
+ consumer->symbols->get_interface("gl_PerVertex",
+ ir_var_shader_in);
+
+ const glsl_type *producer_iface =
+ producer->symbols->get_interface("gl_PerVertex",
+ ir_var_shader_out);
+
+ if (producer_iface && consumer_iface &&
+ interstage_member_mismatch(prog, consumer_iface, producer_iface)) {
+ linker_error(prog, "Incompatible or missing gl_PerVertex re-declaration "
+ "in consecutive shaders");
+ return;
+ }
+
/* Add output interfaces from the producer to the symbol table. */
foreach_in_list(ir_instruction, node, producer->ir) {
ir_variable *var = node->as_variable();
diff -Nru mesa-17.2.4/src/compiler/glsl/link_uniform_blocks.cpp mesa-17.3.3/src/compiler/glsl/link_uniform_blocks.cpp
--- mesa-17.2.4/src/compiler/glsl/link_uniform_blocks.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/link_uniform_blocks.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -34,10 +34,12 @@
class ubo_visitor : public program_resource_visitor {
public:
ubo_visitor(void *mem_ctx, gl_uniform_buffer_variable *variables,
- unsigned num_variables, struct gl_shader_program *prog)
+ unsigned num_variables, struct gl_shader_program *prog,
+ bool use_std430_as_default)
: index(0), offset(0), buffer_size(0), variables(variables),
num_variables(num_variables), mem_ctx(mem_ctx),
- is_array_instance(false), prog(prog)
+ is_array_instance(false), prog(prog),
+ use_std430_as_default(use_std430_as_default)
{
/* empty */
}
@@ -47,7 +49,8 @@
this->offset = 0;
this->buffer_size = 0;
this->is_array_instance = strchr(name, ']') != NULL;
- this->program_resource_visitor::process(type, name);
+ this->program_resource_visitor::process(type, name,
+ use_std430_as_default);
}
unsigned index;
@@ -181,6 +184,8 @@
*/
this->buffer_size = glsl_align(this->offset, 16);
}
+
+ bool use_std430_as_default;
};
class count_block_size : public program_resource_visitor {
@@ -280,7 +285,7 @@
blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0;
blocks[i].UniformBufferSize = 0;
- blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing);
+ blocks[i]._Packing = glsl_interface_packing(type->interface_packing);
blocks[i]._RowMajor = type->get_interface_row_major();
blocks[i].linearized_array_index = linearized_index;
@@ -352,16 +357,8 @@
/* Add each variable from each uniform block to the API tracking
* structures.
*/
- ubo_visitor parcel(blocks, variables, num_variables, prog);
-
- STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140)
- == unsigned(ubo_packing_std140));
- STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED)
- == unsigned(ubo_packing_shared));
- STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED)
- == unsigned(ubo_packing_packed));
- STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430)
- == unsigned(ubo_packing_std430));
+ ubo_visitor parcel(blocks, variables, num_variables, prog,
+ ctx->Const.UseSTD430AsDefaultPacking);
unsigned i = 0;
struct hash_entry *entry;
@@ -447,7 +444,8 @@
}
block_size.num_active_uniforms = 0;
- block_size.process(b->type->without_array(), "");
+ block_size.process(b->type->without_array(), "",
+ ctx->Const.UseSTD430AsDefaultPacking);
if (b->array != NULL) {
unsigned aoa_size = b->type->arrays_of_arrays_size();
diff -Nru mesa-17.2.4/src/compiler/glsl/link_uniform_initializers.cpp mesa-17.3.3/src/compiler/glsl/link_uniform_initializers.cpp
--- mesa-17.2.4/src/compiler/glsl/link_uniform_initializers.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/link_uniform_initializers.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -33,7 +33,7 @@
*/
namespace linker {
-gl_uniform_storage *
+static gl_uniform_storage *
get_storage(struct gl_shader_program *prog, const char *name)
{
unsigned id;
@@ -95,7 +95,7 @@
* qualifier specified in the shader. Atomic counters are different because
* they have no storage and should be handled elsewhere.
*/
-void
+static void
set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
const ir_variable *var, const glsl_type *type,
const char *name, int *binding)
@@ -179,7 +179,7 @@
}
}
-void
+static void
set_block_binding(gl_shader_program *prog, const char *block_name,
unsigned mode, int binding)
{
@@ -206,17 +206,13 @@
{
const glsl_type *t_without_array = type->without_array();
if (type->is_record()) {
- ir_constant *field_constant;
-
- field_constant = (ir_constant *)val->components.get_head();
-
for (unsigned int i = 0; i < type->length; i++) {
const glsl_type *field_type = type->fields.structure[i].type;
const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
type->fields.structure[i].name);
set_uniform_initializer(mem_ctx, prog, field_name,
- field_type, field_constant, boolean_true);
- field_constant = (ir_constant *)field_constant->next;
+ field_type, val->get_record_field(i),
+ boolean_true);
}
return;
} else if (t_without_array->is_record() ||
@@ -227,7 +223,7 @@
const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i);
set_uniform_initializer(mem_ctx, prog, element_name,
- element_type, val->array_elements[i],
+ element_type, val->const_elements[i],
boolean_true);
}
return;
@@ -240,15 +236,15 @@
if (val->type->is_array()) {
const enum glsl_base_type base_type =
- val->array_elements[0]->type->base_type;
- const unsigned int elements = val->array_elements[0]->type->components();
+ val->const_elements[0]->type->base_type;
+ const unsigned int elements = val->const_elements[0]->type->components();
unsigned int idx = 0;
unsigned dmul = glsl_base_type_is_64bit(base_type) ? 2 : 1;
assert(val->type->length >= storage->array_elements);
for (unsigned int i = 0; i < storage->array_elements; i++) {
copy_constant_to_storage(& storage->storage[idx],
- val->array_elements[i],
+ val->const_elements[i],
base_type,
elements,
boolean_true);
diff -Nru mesa-17.2.4/src/compiler/glsl/link_uniforms.cpp mesa-17.3.3/src/compiler/glsl/link_uniforms.cpp
--- mesa-17.2.4/src/compiler/glsl/link_uniforms.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/link_uniforms.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -43,14 +43,17 @@
#define UNMAPPED_UNIFORM_LOC ~0u
void
-program_resource_visitor::process(const glsl_type *type, const char *name)
+program_resource_visitor::process(const glsl_type *type, const char *name,
+ bool use_std430_as_default)
{
assert(type->without_array()->is_record()
|| type->without_array()->is_interface());
unsigned record_array_count = 1;
char *name_copy = ralloc_strdup(NULL, name);
- enum glsl_interface_packing packing = type->get_interface_packing();
+
+ enum glsl_interface_packing packing =
+ type->get_internal_ifc_packing(use_std430_as_default);
recursion(type, &name_copy, strlen(name), false, NULL, packing, false,
record_array_count, NULL);
@@ -58,15 +61,16 @@
}
void
-program_resource_visitor::process(ir_variable *var)
+program_resource_visitor::process(ir_variable *var, bool use_std430_as_default)
{
unsigned record_array_count = 1;
const bool row_major =
var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
- const enum glsl_interface_packing packing = var->get_interface_type() ?
- var->get_interface_type_packing() :
- var->type->get_interface_packing();
+ enum glsl_interface_packing packing = var->get_interface_type() ?
+ var->get_interface_type()->
+ get_internal_ifc_packing(use_std430_as_default) :
+ var->type->get_internal_ifc_packing(use_std430_as_default);
const glsl_type *t =
var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
@@ -253,12 +257,14 @@
class count_uniform_size : public program_resource_visitor {
public:
count_uniform_size(struct string_to_uint_map *map,
- struct string_to_uint_map *hidden_map)
+ struct string_to_uint_map *hidden_map,
+ bool use_std430_as_default)
: num_active_uniforms(0), num_hidden_uniforms(0), num_values(0),
num_shader_samplers(0), num_shader_images(0),
num_shader_uniform_components(0), num_shader_subroutines(0),
is_buffer_block(false), is_shader_storage(false), map(map),
- hidden_map(hidden_map), current_var(NULL)
+ hidden_map(hidden_map), current_var(NULL),
+ use_std430_as_default(use_std430_as_default)
{
/* empty */
}
@@ -278,9 +284,10 @@
this->is_shader_storage = var->is_in_shader_storage_block();
if (var->is_interface_instance())
program_resource_visitor::process(var->get_interface_type(),
- var->get_interface_type()->name);
+ var->get_interface_type()->name,
+ use_std430_as_default);
else
- program_resource_visitor::process(var);
+ program_resource_visitor::process(var, use_std430_as_default);
}
/**
@@ -393,6 +400,8 @@
* Current variable being processed.
*/
ir_variable *current_var;
+
+ bool use_std430_as_default;
};
} /* anonymous namespace */
@@ -417,8 +426,10 @@
parcel_out_uniform_storage(struct gl_shader_program *prog,
struct string_to_uint_map *map,
struct gl_uniform_storage *uniforms,
- union gl_constant_value *values)
- : prog(prog), map(map), uniforms(uniforms), values(values),
+ union gl_constant_value *values,
+ bool use_std430_as_default)
+ : prog(prog), map(map), uniforms(uniforms),
+ use_std430_as_default(use_std430_as_default), values(values),
bindless_targets(NULL), bindless_access(NULL)
{
}
@@ -498,7 +509,8 @@
if (var->is_interface_instance()) {
ubo_byte_offset = 0;
process(var->get_interface_type(),
- var->get_interface_type()->name);
+ var->get_interface_type()->name,
+ use_std430_as_default);
} else {
const struct gl_uniform_block *const block =
&blks[buffer_block_index];
@@ -509,7 +521,7 @@
&block->Uniforms[var->data.location];
ubo_byte_offset = ubo_var->Offset;
- process(var);
+ process(var, use_std430_as_default);
}
} else {
/* Store any explicit location and reset data location so we can
@@ -518,7 +530,7 @@
this->explicit_location = current_var->data.location;
current_var->data.location = -1;
- process(var);
+ process(var, use_std430_as_default);
}
delete this->record_next_sampler;
delete this->record_next_bindless_sampler;
@@ -896,6 +908,8 @@
unsigned next_bindless_image;
unsigned next_subroutine;
+ bool use_std430_as_default;
+
/**
* Field counter is used to take care that uniform structures
* with explicit locations get sequential locations.
@@ -1319,7 +1333,7 @@
union gl_constant_value *data;
if (prog->data->UniformStorage == NULL) {
- prog->data->UniformStorage = rzalloc_array(prog,
+ prog->data->UniformStorage = rzalloc_array(prog->data,
struct gl_uniform_storage,
prog->data->NumUniformStorage);
data = rzalloc_array(prog->data->UniformStorage,
@@ -1333,7 +1347,8 @@
#endif
parcel_out_uniform_storage parcel(prog, prog->UniformHash,
- prog->data->UniformStorage, data);
+ prog->data->UniformStorage, data,
+ ctx->Const.UseSTD430AsDefaultPacking);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_linked_shader *shader = prog->_LinkedShaders[i];
@@ -1385,13 +1400,6 @@
sizeof(shader->Program->sh.SamplerTargets));
}
- /* If this is a fallback compile for a cache miss we already have the
- * correct uniform mappings and we don't want to reinitialise uniforms so
- * just return now.
- */
- if (prog->data->cache_fallback)
- return;
-
#ifndef NDEBUG
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
assert(prog->data->UniformStorage[i].storage != NULL ||
@@ -1416,11 +1424,9 @@
link_assign_uniform_locations(struct gl_shader_program *prog,
struct gl_context *ctx)
{
- if (!prog->data->cache_fallback) {
- ralloc_free(prog->data->UniformStorage);
- prog->data->UniformStorage = NULL;
- prog->data->NumUniformStorage = 0;
- }
+ ralloc_free(prog->data->UniformStorage);
+ prog->data->UniformStorage = NULL;
+ prog->data->NumUniformStorage = 0;
if (prog->UniformHash != NULL) {
prog->UniformHash->clear();
@@ -1436,7 +1442,8 @@
* glGetUniformLocation.
*/
struct string_to_uint_map *hiddenUniforms = new string_to_uint_map;
- count_uniform_size uniform_size(prog->UniformHash, hiddenUniforms);
+ count_uniform_size uniform_size(prog->UniformHash, hiddenUniforms,
+ ctx->Const.UseSTD430AsDefaultPacking);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_linked_shader *sh = prog->_LinkedShaders[i];
diff -Nru mesa-17.2.4/src/compiler/glsl/link_varyings.cpp mesa-17.3.3/src/compiler/glsl/link_varyings.cpp
--- mesa-17.2.4/src/compiler/glsl/link_varyings.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/link_varyings.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -165,10 +165,12 @@
if (var->data.from_named_ifc_block) {
type = var->get_interface_type();
+
/* Find the member type before it was altered by lowering */
+ const glsl_type *type_wa = type->without_array();
member_type =
- type->fields.structure[type->field_index(var->name)].type;
- name = ralloc_strdup(NULL, type->without_array()->name);
+ type_wa->fields.structure[type_wa->field_index(var->name)].type;
+ name = ralloc_strdup(NULL, type_wa->name);
} else {
type = var->type;
member_type = NULL;
@@ -189,7 +191,8 @@
* matching input to another stage.
*/
static void
-cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
+cross_validate_types_and_qualifiers(struct gl_context *ctx,
+ struct gl_shader_program *prog,
const ir_variable *input,
const ir_variable *output,
gl_shader_stage consumer_stage,
@@ -325,20 +328,48 @@
* "It is a link-time error if, within the same stage, the interpolation
* qualifiers of variables of the same name do not match.
*
+ * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
+ *
+ * "When no interpolation qualifier is present, smooth interpolation
+ * is used."
+ *
+ * So we match variables where one is smooth and the other has no explicit
+ * qualifier.
*/
- if (input->data.interpolation != output->data.interpolation &&
+ unsigned input_interpolation = input->data.interpolation;
+ unsigned output_interpolation = output->data.interpolation;
+ if (prog->IsES) {
+ if (input_interpolation == INTERP_MODE_NONE)
+ input_interpolation = INTERP_MODE_SMOOTH;
+ if (output_interpolation == INTERP_MODE_NONE)
+ output_interpolation = INTERP_MODE_SMOOTH;
+ }
+ if (input_interpolation != output_interpolation &&
prog->data->Version < 440) {
- linker_error(prog,
- "%s shader output `%s' specifies %s "
- "interpolation qualifier, "
- "but %s shader input specifies %s "
- "interpolation qualifier\n",
- _mesa_shader_stage_to_string(producer_stage),
- output->name,
- interpolation_string(output->data.interpolation),
- _mesa_shader_stage_to_string(consumer_stage),
- interpolation_string(input->data.interpolation));
- return;
+ if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) {
+ linker_error(prog,
+ "%s shader output `%s' specifies %s "
+ "interpolation qualifier, "
+ "but %s shader input specifies %s "
+ "interpolation qualifier\n",
+ _mesa_shader_stage_to_string(producer_stage),
+ output->name,
+ interpolation_string(output->data.interpolation),
+ _mesa_shader_stage_to_string(consumer_stage),
+ interpolation_string(input->data.interpolation));
+ return;
+ } else {
+ linker_warning(prog,
+ "%s shader output `%s' specifies %s "
+ "interpolation qualifier, "
+ "but %s shader input specifies %s "
+ "interpolation qualifier\n",
+ _mesa_shader_stage_to_string(producer_stage),
+ output->name,
+ interpolation_string(output->data.interpolation),
+ _mesa_shader_stage_to_string(consumer_stage),
+ interpolation_string(input->data.interpolation));
+ }
}
}
@@ -346,7 +377,8 @@
* Validate front and back color outputs against single color input
*/
static void
-cross_validate_front_and_back_color(struct gl_shader_program *prog,
+cross_validate_front_and_back_color(struct gl_context *ctx,
+ struct gl_shader_program *prog,
const ir_variable *input,
const ir_variable *front_color,
const ir_variable *back_color,
@@ -354,19 +386,46 @@
gl_shader_stage producer_stage)
{
if (front_color != NULL && front_color->data.assigned)
- cross_validate_types_and_qualifiers(prog, input, front_color,
+ cross_validate_types_and_qualifiers(ctx, prog, input, front_color,
consumer_stage, producer_stage);
if (back_color != NULL && back_color->data.assigned)
- cross_validate_types_and_qualifiers(prog, input, back_color,
+ cross_validate_types_and_qualifiers(ctx, prog, input, back_color,
consumer_stage, producer_stage);
}
+static unsigned
+compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
+{
+ unsigned location_start = VARYING_SLOT_VAR0;
+
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ if (var->data.mode == ir_var_shader_in)
+ location_start = VERT_ATTRIB_GENERIC0;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ if (var->data.patch)
+ location_start = VARYING_SLOT_PATCH0;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.mode == ir_var_shader_out)
+ location_start = FRAG_RESULT_DATA0;
+ break;
+ default:
+ break;
+ }
+
+ return var->data.location - location_start;
+}
+
/**
* Validate that outputs from one stage match inputs of another
*/
void
-cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
+cross_validate_outputs_to_inputs(struct gl_context *ctx,
+ struct gl_shader_program *prog,
gl_linked_shader *producer,
gl_linked_shader *consumer)
{
@@ -391,10 +450,19 @@
*/
const glsl_type *type = get_varying_type(var, producer->Stage);
unsigned num_elements = type->count_attribute_slots(false);
- unsigned idx = var->data.location - VARYING_SLOT_VAR0;
+ unsigned idx = compute_variable_location_slot(var, producer->Stage);
unsigned slot_limit = idx + num_elements;
unsigned last_comp;
+ unsigned slot_max =
+ ctx->Const.Program[producer->Stage].MaxOutputComponents / 4;
+ if (slot_limit > slot_max) {
+ linker_error(prog,
+ "Invalid location %u in %s shader\n",
+ idx, _mesa_shader_stage_to_string(producer->Stage));
+ return;
+ }
+
if (type->without_array()->is_record()) {
/* The component qualifier can't be used on structs so just treat
* all component slots as used.
@@ -475,7 +543,7 @@
const ir_variable *const back_color =
parameters.get_variable("gl_BackColor");
- cross_validate_front_and_back_color(prog, input,
+ cross_validate_front_and_back_color(ctx, prog, input,
front_color, back_color,
consumer->Stage, producer->Stage);
} else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
@@ -485,7 +553,7 @@
const ir_variable *const back_color =
parameters.get_variable("gl_BackSecondaryColor");
- cross_validate_front_and_back_color(prog, input,
+ cross_validate_front_and_back_color(ctx, prog, input,
front_color, back_color,
consumer->Stage, producer->Stage);
} else {
@@ -500,7 +568,8 @@
const glsl_type *type = get_varying_type(input, consumer->Stage);
unsigned num_elements = type->count_attribute_slots(false);
- unsigned idx = input->data.location - VARYING_SLOT_VAR0;
+ unsigned idx =
+ compute_variable_location_slot(input, consumer->Stage);
unsigned slot_limit = idx + num_elements;
while (idx < slot_limit) {
@@ -527,7 +596,7 @@
*/
if (!(input->get_interface_type() &&
output->get_interface_type()))
- cross_validate_types_and_qualifiers(prog, input, output,
+ cross_validate_types_and_qualifiers(ctx, prog, input, output,
consumer->Stage,
producer->Stage);
} else {
@@ -1119,7 +1188,6 @@
if (has_xfb_qualifiers) {
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (prog->TransformFeedback.BufferStride[j]) {
- buffers |= 1 << j;
explicit_stride[j] = true;
xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
prog->TransformFeedback.BufferStride[j] / 4;
@@ -1144,10 +1212,24 @@
num_buffers++;
buffer_stream_id = -1;
continue;
- } else if (tfeedback_decls[i].is_varying()) {
+ }
+
+ if (has_xfb_qualifiers) {
+ buffer = tfeedback_decls[i].get_buffer();
+ } else {
+ buffer = num_buffers;
+ }
+
+ if (tfeedback_decls[i].is_varying()) {
if (buffer_stream_id == -1) {
/* First varying writing to this buffer: remember its stream */
buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
+
+ /* Only mark a buffer as active when there is a varying
+ * attached to it. This behaviour is based on a revised version
+ * of section 13.2.2 of the GL 4.6 spec.
+ */
+ buffers |= 1 << buffer;
} else if (buffer_stream_id !=
(int) tfeedback_decls[i].get_stream_id()) {
/* Varying writes to the same buffer from a different stream */
@@ -1163,13 +1245,6 @@
}
}
- if (has_xfb_qualifiers) {
- buffer = tfeedback_decls[i].get_buffer();
- } else {
- buffer = num_buffers;
- }
- buffers |= 1 << buffer;
-
if (!tfeedback_decls[i].store(ctx, prog,
xfb_prog->sh.LinkedTransformFeedback,
buffer, num_buffers, num_outputs,
@@ -1843,7 +1918,7 @@
this->toplevel_var = var;
this->varying_floats = 0;
- program_resource_visitor::process(var);
+ program_resource_visitor::process(var, false);
}
private:
@@ -2253,9 +2328,6 @@
}
}
- _mesa_hash_table_destroy(consumer_inputs, NULL);
- _mesa_hash_table_destroy(consumer_interface_inputs, NULL);
-
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (!tfeedback_decls[i].is_varying())
continue;
@@ -2268,12 +2340,34 @@
return false;
}
+ /* Mark xfb varyings as always active */
+ matched_candidate->toplevel_var->data.always_active_io = 1;
+
+ /* Mark any corresponding inputs as always active also. We must do this
+ * because we have a NIR pass that lowers vectors to scalars and another
+ * that removes unused varyings.
+ * We don't split varyings marked as always active because there is no
+ * point in doing so. This means we need to mark both sides of the
+ * interface as always active otherwise we will have a mismatch and
+ * start removing things we shouldn't.
+ */
+ ir_variable *const input_var =
+ linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
+ consumer_inputs,
+ consumer_interface_inputs,
+ consumer_inputs_with_locations);
+ if (input_var)
+ input_var->data.always_active_io = 1;
+
if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
matched_candidate->toplevel_var->data.is_xfb_only = 1;
matches.record(matched_candidate->toplevel_var, NULL);
}
}
+ _mesa_hash_table_destroy(consumer_inputs, NULL);
+ _mesa_hash_table_destroy(consumer_interface_inputs, NULL);
+
uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
const unsigned slots_used = matches.assign_locations(
prog, components, reserved_slots);
@@ -2350,7 +2444,7 @@
return true;
}
-bool
+static bool
check_against_output_limit(struct gl_context *ctx,
struct gl_shader_program *prog,
gl_linked_shader *producer,
@@ -2394,7 +2488,7 @@
return true;
}
-bool
+static bool
check_against_input_limit(struct gl_context *ctx,
struct gl_shader_program *prog,
gl_linked_shader *consumer,
diff -Nru mesa-17.2.4/src/compiler/glsl/link_varyings.h mesa-17.3.3/src/compiler/glsl/link_varyings.h
--- mesa-17.2.4/src/compiler/glsl/link_varyings.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/link_varyings.h 2018-01-18 21:30:28.000000000 +0000
@@ -300,7 +300,8 @@
struct gl_context *ctx, void *mem_ctx);
void
-cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
+cross_validate_outputs_to_inputs(struct gl_context *ctx,
+ struct gl_shader_program *prog,
gl_linked_shader *producer,
gl_linked_shader *consumer);
diff -Nru mesa-17.2.4/src/compiler/glsl/loop_analysis.cpp mesa-17.3.3/src/compiler/glsl/loop_analysis.cpp
--- mesa-17.2.4/src/compiler/glsl/loop_analysis.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/loop_analysis.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -25,13 +25,188 @@
#include "loop_analysis.h"
#include "ir_hierarchical_visitor.h"
-static bool is_loop_terminator(ir_if *ir);
+static void try_add_loop_terminator(loop_variable_state *ls, ir_if *ir);
static bool all_expression_operands_are_loop_constant(ir_rvalue *,
hash_table *);
static ir_rvalue *get_basic_induction_increment(ir_assignment *, hash_table *);
+/**
+ * Find an initializer of a variable outside a loop
+ *
+ * Works backwards from the loop to find the pre-loop value of the variable.
+ * This is used, for example, to find the initial value of loop induction
+ * variables.
+ *
+ * \param loop Loop where \c var is an induction variable
+ * \param var Variable whose initializer is to be found
+ *
+ * \return
+ * The \c ir_rvalue assigned to the variable outside the loop. May return
+ * \c NULL if no initializer can be found.
+ */
+static ir_rvalue *
+find_initial_value(ir_loop *loop, ir_variable *var)
+{
+ for (exec_node *node = loop->prev; !node->is_head_sentinel();
+ node = node->prev) {
+ ir_instruction *ir = (ir_instruction *) node;
+
+ switch (ir->ir_type) {
+ case ir_type_call:
+ case ir_type_loop:
+ case ir_type_loop_jump:
+ case ir_type_return:
+ case ir_type_if:
+ return NULL;
+
+ case ir_type_function:
+ case ir_type_function_signature:
+ assert(!"Should not get here.");
+ return NULL;
+
+ case ir_type_assignment: {
+ ir_assignment *assign = ir->as_assignment();
+ ir_variable *assignee = assign->lhs->whole_variable_referenced();
+
+ if (assignee == var)
+ return (assign->condition != NULL) ? NULL : assign->rhs;
+
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+
+static int
+calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
+ enum ir_expression_operation op, bool continue_from_then)
+{
+ if (from == NULL || to == NULL || increment == NULL)
+ return -1;
+
+ void *mem_ctx = ralloc_context(NULL);
+
+ ir_expression *const sub =
+ new(mem_ctx) ir_expression(ir_binop_sub, from->type, to, from);
+
+ ir_expression *const div =
+ new(mem_ctx) ir_expression(ir_binop_div, sub->type, sub, increment);
+
+ ir_constant *iter = div->constant_expression_value(mem_ctx);
+ if (iter == NULL) {
+ ralloc_free(mem_ctx);
+ return -1;
+ }
+
+ if (!iter->type->is_integer()) {
+ const ir_expression_operation op = iter->type->is_double()
+ ? ir_unop_d2i : ir_unop_f2i;
+ ir_rvalue *cast =
+ new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL);
+
+ iter = cast->constant_expression_value(mem_ctx);
+ }
+
+ int iter_value = iter->get_int_component(0);
+
+ /* Make sure that the calculated number of iterations satisfies the exit
+ * condition. This is needed to catch off-by-one errors and some types of
+ * ill-formed loops. For example, we need to detect that the following
+ * loop does not have a maximum iteration count.
+ *
+ * for (float x = 0.0; x != 0.9; x += 0.2)
+ * ;
+ */
+ const int bias[] = { -1, 0, 1 };
+ bool valid_loop = false;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(bias); i++) {
+ /* Increment may be of type int, uint or float. */
+ switch (increment->type->base_type) {
+ case GLSL_TYPE_INT:
+ iter = new(mem_ctx) ir_constant(iter_value + bias[i]);
+ break;
+ case GLSL_TYPE_UINT:
+ iter = new(mem_ctx) ir_constant(unsigned(iter_value + bias[i]));
+ break;
+ case GLSL_TYPE_FLOAT:
+ iter = new(mem_ctx) ir_constant(float(iter_value + bias[i]));
+ break;
+ case GLSL_TYPE_DOUBLE:
+ iter = new(mem_ctx) ir_constant(double(iter_value + bias[i]));
+ break;
+ default:
+ unreachable("Unsupported type for loop iterator.");
+ }
+
+ ir_expression *const mul =
+ new(mem_ctx) ir_expression(ir_binop_mul, increment->type, iter,
+ increment);
+
+ ir_expression *const add =
+ new(mem_ctx) ir_expression(ir_binop_add, mul->type, mul, from);
+
+ ir_expression *cmp =
+ new(mem_ctx) ir_expression(op, glsl_type::bool_type, add, to);
+ if (continue_from_then)
+ cmp = new(mem_ctx) ir_expression(ir_unop_logic_not, cmp);
+
+ ir_constant *const cmp_result = cmp->constant_expression_value(mem_ctx);
+
+ assert(cmp_result != NULL);
+ if (cmp_result->get_bool_component(0)) {
+ iter_value += bias[i];
+ valid_loop = true;
+ break;
+ }
+ }
+
+ ralloc_free(mem_ctx);
+ return (valid_loop) ? iter_value : -1;
+}
+
+static bool
+incremented_before_terminator(ir_loop *loop, ir_variable *var,
+ ir_if *terminator)
+{
+ for (exec_node *node = loop->body_instructions.get_head();
+ !node->is_tail_sentinel();
+ node = node->get_next()) {
+ ir_instruction *ir = (ir_instruction *) node;
+
+ switch (ir->ir_type) {
+ case ir_type_if:
+ if (ir->as_if() == terminator)
+ return false;
+ break;
+
+ case ir_type_assignment: {
+ ir_assignment *assign = ir->as_assignment();
+ ir_variable *assignee = assign->lhs->whole_variable_referenced();
+
+ if (assignee == var) {
+ assert(assign->condition == NULL);
+ return true;
+ }
+
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ unreachable("Unable to find induction variable");
+}
/**
* Record the fact that the given loop variable was referenced inside the loop.
@@ -133,12 +308,14 @@
loop_terminator *
-loop_variable_state::insert(ir_if *if_stmt)
+loop_variable_state::insert(ir_if *if_stmt, bool continue_from_then)
{
void *mem_ctx = ralloc_parent(this);
loop_terminator *t = new(mem_ctx) loop_terminator();
t->ir = if_stmt;
+ t->continue_from_then = continue_from_then;
+
this->terminators.push_tail(t);
return t;
@@ -295,10 +472,8 @@
ir_if *if_stmt = ((ir_instruction *) node)->as_if();
- if ((if_stmt != NULL) && is_loop_terminator(if_stmt))
- ls->insert(if_stmt);
- else
- break;
+ if (if_stmt != NULL)
+ try_add_loop_terminator(ls, if_stmt);
}
@@ -441,7 +616,11 @@
loop_variable *lv = ls->get(var);
if (lv != NULL && lv->is_induction_var()) {
t->iterations = calculate_iterations(init, limit, lv->increment,
- cmp);
+ cmp, t->continue_from_then);
+
+ if (incremented_before_terminator(ir, var, t->ir)) {
+ t->iterations--;
+ }
if (t->iterations >= 0 &&
(ls->limiting_terminator == NULL ||
@@ -604,31 +783,26 @@
/**
- * Detect whether an if-statement is a loop terminating condition
+ * Detect whether an if-statement is a loop terminating condition, if so
+ * add it to the list of loop terminators.
*
* Detects if-statements of the form
*
- * (if (expression bool ...) (break))
+ * (if (expression bool ...) (...then_instrs...break))
+ *
+ * or
+ *
+ * (if (expression bool ...) ... (...else_instrs...break))
*/
-bool
-is_loop_terminator(ir_if *ir)
+void
+try_add_loop_terminator(loop_variable_state *ls, ir_if *ir)
{
- if (!ir->else_instructions.is_empty())
- return false;
-
- ir_instruction *const inst =
- (ir_instruction *) ir->then_instructions.get_head();
- if (inst == NULL)
- return false;
-
- if (inst->ir_type != ir_type_loop_jump)
- return false;
-
- ir_loop_jump *const jump = (ir_loop_jump *) inst;
- if (jump->mode != ir_loop_jump::jump_break)
- return false;
+ ir_instruction *inst = (ir_instruction *) ir->then_instructions.get_tail();
+ ir_instruction *else_inst =
+ (ir_instruction *) ir->else_instructions.get_tail();
- return true;
+ if (is_break(inst) || is_break(else_inst))
+ ls->insert(ir, is_break(else_inst));
}
diff -Nru mesa-17.2.4/src/compiler/glsl/loop_analysis.h mesa-17.3.3/src/compiler/glsl/loop_analysis.h
--- mesa-17.2.4/src/compiler/glsl/loop_analysis.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/loop_analysis.h 2018-01-18 21:30:28.000000000 +0000
@@ -34,34 +34,18 @@
extern class loop_state *
analyze_loop_variables(exec_list *instructions);
-
-/**
- * Fill in loop control fields
- *
- * Based on analysis of loop variables, this function tries to remove
- * redundant sequences in the loop of the form
- *
- * (if (expression bool ...) (break))
- *
- * For example, if it is provable that one loop exit condition will
- * always be satisfied before another, the unnecessary exit condition will be
- * removed.
- */
-extern bool
-set_loop_controls(exec_list *instructions, loop_state *ls);
+static inline bool
+is_break(ir_instruction *ir)
+{
+ return ir != NULL && ir->ir_type == ir_type_loop_jump &&
+ ((ir_loop_jump *) ir)->is_break();
+}
extern bool
unroll_loops(exec_list *instructions, loop_state *ls,
const struct gl_shader_compiler_options *options);
-ir_rvalue *
-find_initial_value(ir_loop *loop, ir_variable *var);
-
-int
-calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
- enum ir_expression_operation op);
-
/**
* Tracking for all variables used in a loop
@@ -71,7 +55,7 @@
class loop_variable *get(const ir_variable *);
class loop_variable *insert(ir_variable *);
class loop_variable *get_or_insert(ir_variable *, bool in_assignee);
- class loop_terminator *insert(ir_if *);
+ class loop_terminator *insert(ir_if *, bool continue_from_then);
/**
@@ -226,6 +210,9 @@
* terminate the loop (if that is a fixed value). Otherwise -1.
*/
int iterations;
+
+ /* Does the if continue from the then branch or the else branch */
+ bool continue_from_then;
};
diff -Nru mesa-17.2.4/src/compiler/glsl/loop_controls.cpp mesa-17.3.3/src/compiler/glsl/loop_controls.cpp
--- mesa-17.2.4/src/compiler/glsl/loop_controls.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/loop_controls.cpp 1970-01-01 00:00:00.000000000 +0000
@@ -1,247 +0,0 @@
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include
-#include "main/compiler.h"
-#include "compiler/glsl_types.h"
-#include "loop_analysis.h"
-#include "ir_hierarchical_visitor.h"
-
-/**
- * Find an initializer of a variable outside a loop
- *
- * Works backwards from the loop to find the pre-loop value of the variable.
- * This is used, for example, to find the initial value of loop induction
- * variables.
- *
- * \param loop Loop where \c var is an induction variable
- * \param var Variable whose initializer is to be found
- *
- * \return
- * The \c ir_rvalue assigned to the variable outside the loop. May return
- * \c NULL if no initializer can be found.
- */
-ir_rvalue *
-find_initial_value(ir_loop *loop, ir_variable *var)
-{
- for (exec_node *node = loop->prev;
- !node->is_head_sentinel();
- node = node->prev) {
- ir_instruction *ir = (ir_instruction *) node;
-
- switch (ir->ir_type) {
- case ir_type_call:
- case ir_type_loop:
- case ir_type_loop_jump:
- case ir_type_return:
- case ir_type_if:
- return NULL;
-
- case ir_type_function:
- case ir_type_function_signature:
- assert(!"Should not get here.");
- return NULL;
-
- case ir_type_assignment: {
- ir_assignment *assign = ir->as_assignment();
- ir_variable *assignee = assign->lhs->whole_variable_referenced();
-
- if (assignee == var)
- return (assign->condition != NULL) ? NULL : assign->rhs;
-
- break;
- }
-
- default:
- break;
- }
- }
-
- return NULL;
-}
-
-
-int
-calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
- enum ir_expression_operation op)
-{
- if (from == NULL || to == NULL || increment == NULL)
- return -1;
-
- void *mem_ctx = ralloc_context(NULL);
-
- ir_expression *const sub =
- new(mem_ctx) ir_expression(ir_binop_sub, from->type, to, from);
-
- ir_expression *const div =
- new(mem_ctx) ir_expression(ir_binop_div, sub->type, sub, increment);
-
- ir_constant *iter = div->constant_expression_value();
- if (iter == NULL) {
- ralloc_free(mem_ctx);
- return -1;
- }
-
- if (!iter->type->is_integer()) {
- const ir_expression_operation op = iter->type->is_double()
- ? ir_unop_d2i : ir_unop_f2i;
- ir_rvalue *cast =
- new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL);
-
- iter = cast->constant_expression_value();
- }
-
- int iter_value = iter->get_int_component(0);
-
- /* Make sure that the calculated number of iterations satisfies the exit
- * condition. This is needed to catch off-by-one errors and some types of
- * ill-formed loops. For example, we need to detect that the following
- * loop does not have a maximum iteration count.
- *
- * for (float x = 0.0; x != 0.9; x += 0.2)
- * ;
- */
- const int bias[] = { -1, 0, 1 };
- bool valid_loop = false;
-
- for (unsigned i = 0; i < ARRAY_SIZE(bias); i++) {
- /* Increment may be of type int, uint or float. */
- switch (increment->type->base_type) {
- case GLSL_TYPE_INT:
- iter = new(mem_ctx) ir_constant(iter_value + bias[i]);
- break;
- case GLSL_TYPE_UINT:
- iter = new(mem_ctx) ir_constant(unsigned(iter_value + bias[i]));
- break;
- case GLSL_TYPE_FLOAT:
- iter = new(mem_ctx) ir_constant(float(iter_value + bias[i]));
- break;
- case GLSL_TYPE_DOUBLE:
- iter = new(mem_ctx) ir_constant(double(iter_value + bias[i]));
- break;
- default:
- unreachable("Unsupported type for loop iterator.");
- }
-
- ir_expression *const mul =
- new(mem_ctx) ir_expression(ir_binop_mul, increment->type, iter,
- increment);
-
- ir_expression *const add =
- new(mem_ctx) ir_expression(ir_binop_add, mul->type, mul, from);
-
- ir_expression *const cmp =
- new(mem_ctx) ir_expression(op, glsl_type::bool_type, add, to);
-
- ir_constant *const cmp_result = cmp->constant_expression_value();
-
- assert(cmp_result != NULL);
- if (cmp_result->get_bool_component(0)) {
- iter_value += bias[i];
- valid_loop = true;
- break;
- }
- }
-
- ralloc_free(mem_ctx);
- return (valid_loop) ? iter_value : -1;
-}
-
-namespace {
-
-class loop_control_visitor : public ir_hierarchical_visitor {
-public:
- loop_control_visitor(loop_state *state)
- {
- this->state = state;
- this->progress = false;
- }
-
- virtual ir_visitor_status visit_leave(ir_loop *ir);
-
- loop_state *state;
-
- bool progress;
-};
-
-} /* anonymous namespace */
-
-ir_visitor_status
-loop_control_visitor::visit_leave(ir_loop *ir)
-{
- loop_variable_state *const ls = this->state->get(ir);
-
- /* If we've entered a loop that hasn't been analyzed, something really,
- * really bad has happened.
- */
- if (ls == NULL) {
- assert(ls != NULL);
- return visit_continue;
- }
-
- if (ls->limiting_terminator != NULL) {
- /* If the limiting terminator has an iteration count of zero, then we've
- * proven that the loop cannot run, so delete it.
- */
- int iterations = ls->limiting_terminator->iterations;
- if (iterations == 0) {
- ir->remove();
- this->progress = true;
- return visit_continue;
- }
- }
-
- /* Remove the conditional break statements associated with all terminators
- * that are associated with a fixed iteration count, except for the one
- * associated with the limiting terminator--that one needs to stay, since
- * it terminates the loop. Exception: if the loop still has a normative
- * bound, then that terminates the loop, so we don't even need the limiting
- * terminator.
- */
- foreach_in_list(loop_terminator, t, &ls->terminators) {
- if (t->iterations < 0)
- continue;
-
- if (t != ls->limiting_terminator) {
- t->ir->remove();
-
- assert(ls->num_loop_jumps > 0);
- ls->num_loop_jumps--;
-
- this->progress = true;
- }
- }
-
- return visit_continue;
-}
-
-
-bool
-set_loop_controls(exec_list *instructions, loop_state *ls)
-{
- loop_control_visitor v(ls);
-
- v.run(instructions);
-
- return v.progress;
-}
diff -Nru mesa-17.2.4/src/compiler/glsl/loop_unroll.cpp mesa-17.3.3/src/compiler/glsl/loop_unroll.cpp
--- mesa-17.2.4/src/compiler/glsl/loop_unroll.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/loop_unroll.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -42,7 +42,9 @@
virtual ir_visitor_status visit_leave(ir_loop *ir);
void simple_unroll(ir_loop *ir, int iterations);
void complex_unroll(ir_loop *ir, int iterations,
- bool continue_from_then_branch);
+ bool continue_from_then_branch,
+ bool limiting_term_first,
+ bool lt_continue_from_then_branch);
void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
loop_state *state;
@@ -53,13 +55,6 @@
} /* anonymous namespace */
-static bool
-is_break(ir_instruction *ir)
-{
- return ir != NULL && ir->ir_type == ir_type_loop_jump
- && ((ir_loop_jump *) ir)->is_break();
-}
-
class loop_unroll_count : public ir_hierarchical_visitor {
public:
int nodes;
@@ -106,7 +101,7 @@
if (options->EmitNoIndirectSampler) {
if ((ir->array->type->is_array() &&
ir->array->type->contains_sampler()) &&
- !ir->array_index->constant_expression_value()) {
+ !ir->array_index->constant_expression_value(ralloc_parent(ir))) {
unsupported_variable_indexing = true;
return visit_continue;
}
@@ -183,6 +178,51 @@
loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
{
void *const mem_ctx = ralloc_parent(ir);
+ loop_variable_state *const ls = this->state->get(ir);
+
+ ir_instruction *first_ir =
+ (ir_instruction *) ir->body_instructions.get_head();
+
+ if (!first_ir) {
+ /* The loop is empty remove it and return */
+ ir->remove();
+ return;
+ }
+
+ ir_if *limit_if = NULL;
+ bool exit_branch_has_instructions = false;
+ if (ls->limiting_terminator) {
+ limit_if = ls->limiting_terminator->ir;
+ ir_instruction *ir_if_last = (ir_instruction *)
+ limit_if->then_instructions.get_tail();
+
+ if (is_break(ir_if_last)) {
+ if (ir_if_last != limit_if->then_instructions.get_head())
+ exit_branch_has_instructions = true;
+
+ splice_post_if_instructions(limit_if, &limit_if->else_instructions);
+ ir_if_last->remove();
+ } else {
+ ir_if_last = (ir_instruction *)
+ limit_if->else_instructions.get_tail();
+ assert(is_break(ir_if_last));
+
+ if (ir_if_last != limit_if->else_instructions.get_head())
+ exit_branch_has_instructions = true;
+
+ splice_post_if_instructions(limit_if, &limit_if->then_instructions);
+ ir_if_last->remove();
+ }
+ }
+
+ /* Because 'iterations' is the number of times we pass over the *entire*
+ * loop body before hitting the first break, we need to bump the number of
+ * iterations if the limiting terminator is not the first instruction in
+ * the loop, or it the exit branch contains instructions. This ensures we
+ * execute any instructions before the terminator or in its exit branch.
+ */
+ if (limit_if != first_ir->as_if() || exit_branch_has_instructions)
+ iterations++;
for (int i = 0; i < iterations; i++) {
exec_list copy_list;
@@ -234,11 +274,22 @@
*/
void
loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
- bool continue_from_then_branch)
+ bool second_term_then_continue,
+ bool extra_iteration_required,
+ bool first_term_then_continue)
{
void *const mem_ctx = ralloc_parent(ir);
ir_instruction *ir_to_replace = ir;
+ /* Because 'iterations' is the number of times we pass over the *entire*
+ * loop body before hitting the first break, we need to bump the number of
+ * iterations if the limiting terminator is not the first instruction in
+ * the loop, or it the exit branch contains instructions. This ensures we
+ * execute any instructions before the terminator or in its exit branch.
+ */
+ if (extra_iteration_required)
+ iterations++;
+
for (int i = 0; i < iterations; i++) {
exec_list copy_list;
@@ -248,6 +299,10 @@
ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
assert(ir_if != NULL);
+ exec_list *const first_list = first_term_then_continue
+ ? &ir_if->then_instructions : &ir_if->else_instructions;
+ ir_if = ((ir_instruction *) first_list->get_tail())->as_if();
+
ir_to_replace->insert_before(©_list);
ir_to_replace->remove();
@@ -255,10 +310,10 @@
ir_to_replace =
new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
- exec_list *const list = (continue_from_then_branch)
+ exec_list *const second_term_continue_list = second_term_then_continue
? &ir_if->then_instructions : &ir_if->else_instructions;
- list->push_tail(ir_to_replace);
+ second_term_continue_list->push_tail(ir_to_replace);
}
ir_to_replace->remove();
@@ -300,12 +355,26 @@
}
}
+static bool
+exit_branch_has_instructions(ir_if *term_if, bool lt_then_continue)
+{
+ if (lt_then_continue) {
+ if (term_if->else_instructions.get_head() ==
+ term_if->else_instructions.get_tail())
+ return false;
+ } else {
+ if (term_if->then_instructions.get_head() ==
+ term_if->then_instructions.get_tail())
+ return false;
+ }
+
+ return true;
+}
ir_visitor_status
loop_unroll_visitor::visit_leave(ir_loop *ir)
{
loop_variable_state *const ls = this->state->get(ir);
- int iterations;
/* If we've entered a loop that hasn't been analyzed, something really,
* really bad has happened.
@@ -315,6 +384,58 @@
return visit_continue;
}
+ if (ls->limiting_terminator != NULL) {
+ /* If the limiting terminator has an iteration count of zero, then we've
+ * proven that the loop cannot run, so delete it.
+ */
+ int iterations = ls->limiting_terminator->iterations;
+ if (iterations == 0) {
+ ir->remove();
+ this->progress = true;
+ return visit_continue;
+ }
+ }
+
+ /* Remove the conditional break statements associated with all terminators
+ * that are associated with a fixed iteration count, except for the one
+ * associated with the limiting terminator--that one needs to stay, since
+ * it terminates the loop. Exception: if the loop still has a normative
+ * bound, then that terminates the loop, so we don't even need the limiting
+ * terminator.
+ */
+ foreach_in_list_safe(loop_terminator, t, &ls->terminators) {
+ if (t->iterations < 0)
+ continue;
+
+ exec_list *branch_instructions;
+ if (t != ls->limiting_terminator) {
+ ir_instruction *ir_if_last = (ir_instruction *)
+ t->ir->then_instructions.get_tail();
+ if (is_break(ir_if_last)) {
+ branch_instructions = &t->ir->else_instructions;
+ } else {
+ branch_instructions = &t->ir->then_instructions;
+ assert(is_break((ir_instruction *)
+ t->ir->else_instructions.get_tail()));
+ }
+
+ exec_list copy_list;
+ copy_list.make_empty();
+ clone_ir_list(ir, ©_list, branch_instructions);
+
+ t->ir->insert_before(©_list);
+ t->ir->remove();
+
+ assert(ls->num_loop_jumps > 0);
+ ls->num_loop_jumps--;
+
+ /* Also remove it from the terminator list */
+ t->remove();
+
+ this->progress = true;
+ }
+ }
+
if (ls->limiting_terminator == NULL) {
ir_instruction *last_ir =
(ir_instruction *) ir->body_instructions.get_tail();
@@ -343,7 +464,7 @@
return visit_continue;
}
- iterations = ls->limiting_terminator->iterations;
+ int iterations = ls->limiting_terminator->iterations;
const int max_iterations = options->MaxUnrollIterations;
@@ -373,7 +494,6 @@
return visit_continue;
if (predicted_num_loop_jumps == 0) {
- ls->limiting_terminator->ir->remove();
simple_unroll(ir, iterations);
return visit_continue;
}
@@ -388,51 +508,71 @@
*/
last_ir->remove();
- ls->limiting_terminator->ir->remove();
simple_unroll(ir, 1);
return visit_continue;
}
- /* recognize loops in the form produced by ir_lower_jumps */
- foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) {
- /* Skip the limiting terminator, since it will go away when we
- * unroll.
- */
- if (cur_ir == ls->limiting_terminator->ir)
- continue;
+ /* Complex unrolling can only handle two terminators. One with an unknown
+ * iteration count and one with a known iteration count. We have already
+ * made sure we have a known iteration count above and removed any
+ * unreachable terminators with a known count. Here we make sure there
+ * isn't any additional unknown terminators, or any other jumps nested
+ * inside futher ifs.
+ */
+ if (ls->num_loop_jumps != 2)
+ return visit_continue;
- ir_if *ir_if = cur_ir->as_if();
- if (ir_if != NULL) {
- /* Determine which if-statement branch, if any, ends with a
- * break. The branch that did *not* have the break will get a
- * temporary continue inserted in each iteration of the loop
- * unroll.
- *
- * Note that since ls->num_loop_jumps is <= 1, it is impossible
- * for both branches to end with a break.
- */
- ir_instruction *ir_if_last =
- (ir_instruction *) ir_if->then_instructions.get_tail();
+ ir_instruction *first_ir =
+ (ir_instruction *) ir->body_instructions.get_head();
- if (is_break(ir_if_last)) {
- ls->limiting_terminator->ir->remove();
- splice_post_if_instructions(ir_if, &ir_if->else_instructions);
- ir_if_last->remove();
- complex_unroll(ir, iterations, false);
+ unsigned term_count = 0;
+ bool first_term_then_continue = false;
+ foreach_in_list(loop_terminator, t, &ls->terminators) {
+ assert(term_count < 2);
+
+ ir_if *ir_if = t->ir->as_if();
+ assert(ir_if != NULL);
+
+ ir_instruction *ir_if_last =
+ (ir_instruction *) ir_if->then_instructions.get_tail();
+
+ if (is_break(ir_if_last)) {
+ splice_post_if_instructions(ir_if, &ir_if->else_instructions);
+ ir_if_last->remove();
+ if (term_count == 1) {
+ bool ebi =
+ exit_branch_has_instructions(ls->limiting_terminator->ir,
+ first_term_then_continue);
+ complex_unroll(ir, iterations, false,
+ first_ir->as_if() != ls->limiting_terminator->ir ||
+ ebi,
+ first_term_then_continue);
return visit_continue;
- } else {
- ir_if_last =
- (ir_instruction *) ir_if->else_instructions.get_tail();
+ }
+ } else {
+ ir_if_last =
+ (ir_instruction *) ir_if->else_instructions.get_tail();
- if (is_break(ir_if_last)) {
- ls->limiting_terminator->ir->remove();
- splice_post_if_instructions(ir_if, &ir_if->then_instructions);
- ir_if_last->remove();
- complex_unroll(ir, iterations, true);
+ assert(is_break(ir_if_last));
+ if (is_break(ir_if_last)) {
+ splice_post_if_instructions(ir_if, &ir_if->then_instructions);
+ ir_if_last->remove();
+ if (term_count == 1) {
+ bool ebi =
+ exit_branch_has_instructions(ls->limiting_terminator->ir,
+ first_term_then_continue);
+ complex_unroll(ir, iterations, true,
+ first_ir->as_if() != ls->limiting_terminator->ir ||
+ ebi,
+ first_term_then_continue);
return visit_continue;
+ } else {
+ first_term_then_continue = true;
}
}
}
+
+ term_count++;
}
/* Did not find the break statement. It must be in a complex if-nesting,
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_buffer_access.cpp mesa-17.3.3/src/compiler/glsl/lower_buffer_access.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_buffer_access.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_buffer_access.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -72,16 +72,22 @@
new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
field->name);
- field_offset =
- glsl_align(field_offset,
- field->type->std140_base_alignment(row_major));
+ unsigned field_align;
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ field_align = field->type->std430_base_alignment(row_major);
+ else
+ field_align = field->type->std140_base_alignment(row_major);
+ field_offset = glsl_align(field_offset, field_align);
emit_access(mem_ctx, is_write, field_deref, base_offset,
deref_offset + field_offset,
row_major, 1, packing,
writemask_for_size(field_deref->type->vector_elements));
- field_offset += field->type->std140_size(row_major);
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ field_offset += field->type->std430_size(row_major);
+ else
+ field_offset += field->type->std140_size(row_major);
}
return;
}
@@ -252,7 +258,7 @@
ir = record_deref->record;
- const int idx = ir->type->field_index(record_deref->field);
+ const int idx = record_deref->field_idx;
assert(idx >= 0);
const enum glsl_matrix_layout matrix_layout =
@@ -404,7 +410,7 @@
array_index = i2u(array_index);
ir_constant *const_index =
- array_index->constant_expression_value(NULL);
+ array_index->constant_expression_value(mem_ctx, NULL);
if (const_index) {
*const_offset += array_stride * const_index->value.u[0];
} else {
@@ -445,8 +451,8 @@
intra_struct_offset = glsl_align(intra_struct_offset, field_align);
- if (strcmp(struct_type->fields.structure[i].name,
- deref_record->field) == 0) {
+ assert(deref_record->field_idx >= 0);
+ if (i == (unsigned) deref_record->field_idx) {
if (struct_field)
*struct_field = &struct_type->fields.structure[i];
break;
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_buffer_access.h mesa-17.3.3/src/compiler/glsl/lower_buffer_access.h
--- mesa-17.2.4/src/compiler/glsl/lower_buffer_access.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_buffer_access.h 2018-01-18 21:30:28.000000000 +0000
@@ -58,6 +58,9 @@
bool *row_major, int *matrix_columns,
const glsl_struct_field **struct_field,
enum glsl_interface_packing packing);
+
+protected:
+ bool use_std430_as_default;
};
} /* namespace lower_buffer_access */
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_cs_derived.cpp mesa-17.3.3/src/compiler/glsl/lower_cs_derived.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_cs_derived.cpp 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_cs_derived.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2017 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_cs_derived.cpp
+ *
+ * For hardware that does not support the gl_GlobalInvocationID and
+ * gl_LocalInvocationIndex system values, replace them with fresh
+ * globals. Note that we can't rely on gl_WorkGroupSize or
+ * gl_LocalGroupSizeARB being available, since they may only have been defined
+ * in a non-main shader.
+ *
+ * [ This can happen if only a secondary shader has the layout(local_size_*)
+ * declaration. ]
+ *
+ * This is meant to be run post-linking.
+ */
+
+#include "glsl_symbol_table.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
+#include "program/prog_statevars.h"
+#include "builtin_functions.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class lower_cs_derived_visitor : public ir_hierarchical_visitor {
+public:
+ explicit lower_cs_derived_visitor(gl_linked_shader *shader)
+ : progress(false),
+ shader(shader),
+ local_size_variable(shader->Program->info.cs.local_size_variable),
+ gl_WorkGroupSize(NULL),
+ gl_WorkGroupID(NULL),
+ gl_LocalInvocationID(NULL),
+ gl_GlobalInvocationID(NULL),
+ gl_LocalInvocationIndex(NULL)
+ {
+ main_sig = _mesa_get_main_function_signature(shader->symbols);
+ assert(main_sig);
+ }
+
+ virtual ir_visitor_status visit(ir_dereference_variable *);
+
+ ir_variable *add_system_value(
+ int slot, const glsl_type *type, const char *name);
+ void find_sysvals();
+ void make_gl_GlobalInvocationID();
+ void make_gl_LocalInvocationIndex();
+
+ bool progress;
+
+private:
+ gl_linked_shader *shader;
+ bool local_size_variable;
+ ir_function_signature *main_sig;
+
+ ir_rvalue *gl_WorkGroupSize;
+ ir_variable *gl_WorkGroupID;
+ ir_variable *gl_LocalInvocationID;
+
+ ir_variable *gl_GlobalInvocationID;
+ ir_variable *gl_LocalInvocationIndex;
+};
+
+} /* anonymous namespace */
+
+ir_variable *
+lower_cs_derived_visitor::add_system_value(
+ int slot, const glsl_type *type, const char *name)
+{
+ ir_variable *var = new(shader) ir_variable(type, name, ir_var_system_value);
+ var->data.how_declared = ir_var_declared_implicitly;
+ var->data.read_only = true;
+ var->data.location = slot;
+ var->data.explicit_location = true;
+ var->data.explicit_index = 0;
+ shader->ir->push_head(var);
+
+ return var;
+}
+
+void
+lower_cs_derived_visitor::find_sysvals()
+{
+ if (gl_WorkGroupSize != NULL)
+ return;
+
+ ir_variable *WorkGroupSize;
+ if (local_size_variable)
+ WorkGroupSize = shader->symbols->get_variable("gl_LocalGroupSizeARB");
+ else
+ WorkGroupSize = shader->symbols->get_variable("gl_WorkGroupSize");
+ if (WorkGroupSize)
+ gl_WorkGroupSize = new(shader) ir_dereference_variable(WorkGroupSize);
+ gl_WorkGroupID = shader->symbols->get_variable("gl_WorkGroupID");
+ gl_LocalInvocationID = shader->symbols->get_variable("gl_LocalInvocationID");
+
+ /*
+ * These may be missing due to either dead code elimination, or, in the
+ * case of the group size, due to the layout being declared in a non-main
+ * shader. Re-create them.
+ */
+
+ if (!gl_WorkGroupID)
+ gl_WorkGroupID = add_system_value(
+ SYSTEM_VALUE_WORK_GROUP_ID, glsl_type::uvec3_type, "gl_WorkGroupID");
+ if (!gl_LocalInvocationID)
+ gl_LocalInvocationID = add_system_value(
+ SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
+ "gl_LocalInvocationID");
+ if (!WorkGroupSize) {
+ if (local_size_variable) {
+ gl_WorkGroupSize = new(shader) ir_dereference_variable(
+ add_system_value(
+ SYSTEM_VALUE_LOCAL_GROUP_SIZE, glsl_type::uvec3_type,
+ "gl_LocalGroupSizeARB"));
+ } else {
+ ir_constant_data data;
+ memset(&data, 0, sizeof(data));
+ for (int i = 0; i < 3; i++)
+ data.u[i] = shader->Program->info.cs.local_size[i];
+ gl_WorkGroupSize = new(shader) ir_constant(glsl_type::uvec3_type, &data);
+ }
+ }
+}
+
+void
+lower_cs_derived_visitor::make_gl_GlobalInvocationID()
+{
+ if (gl_GlobalInvocationID != NULL)
+ return;
+
+ find_sysvals();
+
+ /* gl_GlobalInvocationID =
+ * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+ */
+ gl_GlobalInvocationID = new(shader) ir_variable(
+ glsl_type::uvec3_type, "__GlobalInvocationID", ir_var_temporary);
+ shader->ir->push_head(gl_GlobalInvocationID);
+
+ ir_instruction *inst =
+ assign(gl_GlobalInvocationID,
+ add(mul(gl_WorkGroupID, gl_WorkGroupSize->clone(shader, NULL)),
+ gl_LocalInvocationID));
+ main_sig->body.push_head(inst);
+}
+
+void
+lower_cs_derived_visitor::make_gl_LocalInvocationIndex()
+{
+ if (gl_LocalInvocationIndex != NULL)
+ return;
+
+ find_sysvals();
+
+ /* gl_LocalInvocationIndex =
+ * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+ * gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+ * gl_LocalInvocationID.x;
+ */
+ gl_LocalInvocationIndex = new(shader)
+ ir_variable(glsl_type::uint_type, "__LocalInvocationIndex", ir_var_temporary);
+ shader->ir->push_head(gl_LocalInvocationIndex);
+
+ ir_expression *index_z =
+ mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL))),
+ swizzle_y(gl_WorkGroupSize->clone(shader, NULL)));
+ ir_expression *index_y =
+ mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL)));
+ ir_expression *index_y_plus_z = add(index_y, index_z);
+ operand index_x(swizzle_x(gl_LocalInvocationID));
+ ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
+ ir_instruction *inst =
+ assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
+ main_sig->body.push_head(inst);
+}
+
+ir_visitor_status
+lower_cs_derived_visitor::visit(ir_dereference_variable *ir)
+{
+ if (ir->var->data.mode == ir_var_system_value &&
+ ir->var->data.location == SYSTEM_VALUE_GLOBAL_INVOCATION_ID) {
+ make_gl_GlobalInvocationID();
+ ir->var = gl_GlobalInvocationID;
+ progress = true;
+ }
+
+ if (ir->var->data.mode == ir_var_system_value &&
+ ir->var->data.location == SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) {
+ make_gl_LocalInvocationIndex();
+ ir->var = gl_LocalInvocationIndex;
+ progress = true;
+ }
+
+ return visit_continue;
+}
+
+bool
+lower_cs_derived(gl_linked_shader *shader)
+{
+ if (shader->Stage != MESA_SHADER_COMPUTE)
+ return false;
+
+ lower_cs_derived_visitor v(shader);
+ v.run(shader->ir);
+
+ return v.progress;
+}
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_discard.cpp mesa-17.3.3/src/compiler/glsl/lower_discard.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_discard.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_discard.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -158,7 +158,7 @@
ir_assignment *assignment =
new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var),
- condition, NULL);
+ condition);
ir->replace_with(assignment);
}
@@ -180,7 +180,7 @@
ir_var_temporary);
ir_assignment *temp_initializer =
new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp),
- new(mem_ctx) ir_constant(false), NULL);
+ new(mem_ctx) ir_constant(false));
ir->insert_before(temp);
ir->insert_before(temp_initializer);
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_distance.cpp mesa-17.3.3/src/compiler/glsl/lower_distance.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_distance.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_distance.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -235,7 +235,8 @@
old_index = new(ctx) ir_expression(ir_unop_u2i, old_index);
}
- ir_constant *old_index_constant = old_index->constant_expression_value();
+ ir_constant *old_index_constant =
+ old_index->constant_expression_value(ctx);
if (old_index_constant) {
/* gl_ClipDistance is being accessed via a constant index. Don't bother
* creating expressions to calculate the lowered indices. Just create
@@ -648,7 +649,7 @@
}
void
-lower_distance_visitor_counter::handle_rvalue(ir_rvalue **rv)
+lower_distance_visitor_counter::handle_rvalue(ir_rvalue **)
{
return;
}
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_if_to_cond_assign.cpp mesa-17.3.3/src/compiler/glsl/lower_if_to_cond_assign.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_if_to_cond_assign.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_if_to_cond_assign.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -115,7 +115,7 @@
return v.progress;
}
-void
+static void
check_ir_node(ir_instruction *ir, void *data)
{
ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data;
@@ -168,45 +168,45 @@
}
}
-void
+static void
move_block_to_cond_assign(void *mem_ctx,
- ir_if *if_ir, ir_rvalue *cond_expr,
- exec_list *instructions,
- struct set *set)
+ ir_if *if_ir, ir_rvalue *cond_expr,
+ exec_list *instructions,
+ struct set *set)
{
foreach_in_list_safe(ir_instruction, ir, instructions) {
if (ir->ir_type == ir_type_assignment) {
- ir_assignment *assign = (ir_assignment *)ir;
+ ir_assignment *assign = (ir_assignment *)ir;
- if (_mesa_set_search(set, assign) == NULL) {
- _mesa_set_add(set, assign);
+ if (_mesa_set_search(set, assign) == NULL) {
+ _mesa_set_add(set, assign);
- /* If the LHS of the assignment is a condition variable that was
- * previously added, insert an additional assignment of false to
- * the variable.
- */
- const bool assign_to_cv =
- _mesa_set_search(
- set, assign->lhs->variable_referenced()) != NULL;
-
- if (!assign->condition) {
- if (assign_to_cv) {
- assign->rhs =
- new(mem_ctx) ir_expression(ir_binop_logic_and,
- glsl_type::bool_type,
- cond_expr->clone(mem_ctx, NULL),
- assign->rhs);
- } else {
- assign->condition = cond_expr->clone(mem_ctx, NULL);
- }
- } else {
- assign->condition =
- new(mem_ctx) ir_expression(ir_binop_logic_and,
- glsl_type::bool_type,
- cond_expr->clone(mem_ctx, NULL),
- assign->condition);
- }
- }
+ /* If the LHS of the assignment is a condition variable that was
+ * previously added, insert an additional assignment of false to
+ * the variable.
+ */
+ const bool assign_to_cv =
+ _mesa_set_search(
+ set, assign->lhs->variable_referenced()) != NULL;
+
+ if (!assign->condition) {
+ if (assign_to_cv) {
+ assign->rhs =
+ new(mem_ctx) ir_expression(ir_binop_logic_and,
+ glsl_type::bool_type,
+ cond_expr->clone(mem_ctx, NULL),
+ assign->rhs);
+ } else {
+ assign->condition = cond_expr->clone(mem_ctx, NULL);
+ }
+ } else {
+ assign->condition =
+ new(mem_ctx) ir_expression(ir_binop_logic_and,
+ glsl_type::bool_type,
+ cond_expr->clone(mem_ctx, NULL),
+ assign->condition);
+ }
+ }
}
/* Now, move from the if block to the block surrounding it. */
@@ -216,9 +216,8 @@
}
ir_visitor_status
-ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
+ir_if_to_cond_assign_visitor::visit_enter(ir_if *)
{
- (void) ir;
this->depth++;
return visit_continue;
@@ -277,8 +276,8 @@
*/
ir_variable *const then_var =
new(mem_ctx) ir_variable(glsl_type::bool_type,
- "if_to_cond_assign_then",
- ir_var_temporary);
+ "if_to_cond_assign_then",
+ ir_var_temporary);
ir->insert_before(then_var);
ir_dereference_variable *then_cond =
@@ -288,8 +287,8 @@
ir->insert_before(assign);
move_block_to_cond_assign(mem_ctx, ir, then_cond,
- &ir->then_instructions,
- this->condition_variables);
+ &ir->then_instructions,
+ this->condition_variables);
/* Add the new condition variable to the hash table. This allows us to
* find this variable when lowering other (enclosing) if-statements.
@@ -303,24 +302,24 @@
*/
if (!ir->else_instructions.is_empty()) {
ir_variable *const else_var =
- new(mem_ctx) ir_variable(glsl_type::bool_type,
- "if_to_cond_assign_else",
- ir_var_temporary);
+ new(mem_ctx) ir_variable(glsl_type::bool_type,
+ "if_to_cond_assign_else",
+ ir_var_temporary);
ir->insert_before(else_var);
ir_dereference_variable *else_cond =
- new(mem_ctx) ir_dereference_variable(else_var);
+ new(mem_ctx) ir_dereference_variable(else_var);
ir_rvalue *inverse =
- new(mem_ctx) ir_expression(ir_unop_logic_not,
- then_cond->clone(mem_ctx, NULL));
+ new(mem_ctx) ir_expression(ir_unop_logic_not,
+ then_cond->clone(mem_ctx, NULL));
assign = new(mem_ctx) ir_assignment(else_cond, inverse);
ir->insert_before(assign);
move_block_to_cond_assign(mem_ctx, ir, else_cond,
- &ir->else_instructions,
- this->condition_variables);
+ &ir->else_instructions,
+ this->condition_variables);
/* Add the new condition variable to the hash table. This allows us to
* find this variable when lowering other (enclosing) if-statements.
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_instructions.cpp mesa-17.3.3/src/compiler/glsl/lower_instructions.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_instructions.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_instructions.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -193,6 +193,7 @@
lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
{
ir->operation = ir_binop_add;
+ ir->init_num_operands();
ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
ir->operands[1], NULL);
this->progress = true;
@@ -211,6 +212,7 @@
/* op0 / op1 -> op0 * (1.0 / op1) */
ir->operation = ir_binop_mul;
+ ir->init_num_operands();
ir->operands[1] = expr;
this->progress = true;
@@ -261,6 +263,7 @@
ir->operation = ir_unop_i2u;
ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
}
+ ir->init_num_operands();
ir->operands[1] = NULL;
this->progress = true;
@@ -272,6 +275,7 @@
ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
ir->operation = ir_unop_exp2;
+ ir->init_num_operands();
ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
ir->operands[0], log2_e);
this->progress = true;
@@ -285,6 +289,7 @@
ir->operands[0]);
ir->operation = ir_unop_exp2;
+ ir->init_num_operands();
ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
ir->operands[1], log2_x);
ir->operands[1] = NULL;
@@ -295,6 +300,7 @@
lower_instructions_visitor::log_to_log2(ir_expression *ir)
{
ir->operation = ir_binop_mul;
+ ir->init_num_operands();
ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
ir->operands[0], NULL);
ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
@@ -313,10 +319,10 @@
ir_assignment *const assign_x =
new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
- ir->operands[0], NULL);
+ ir->operands[0]);
ir_assignment *const assign_y =
new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
- ir->operands[1], NULL);
+ ir->operands[1]);
this->base_ir->insert_before(assign_x);
this->base_ir->insert_before(assign_y);
@@ -345,6 +351,7 @@
floor_expr);
ir->operation = ir_binop_sub;
+ ir->init_num_operands();
ir->operands[0] = new(ir) ir_dereference_variable(x);
ir->operands[1] = mul_expr;
this->progress = true;
@@ -515,6 +522,7 @@
}
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = gequal(extracted_biased_exp,
new(ir) ir_constant(255, vec_elem));
ir->operands[1] = new(ir) ir_dereference_variable(x);
@@ -640,6 +648,7 @@
}
ir->operation = ir_quadop_vector;
+ ir->init_num_operands();
ir->operands[0] = results[0];
ir->operands[1] = results[1];
ir->operands[2] = results[2];
@@ -716,6 +725,7 @@
/* Put the dvec back together */
ir->operation = ir_quadop_vector;
+ ir->init_num_operands();
ir->operands[0] = results[0];
ir->operands[1] = results[1];
ir->operands[2] = results[2];
@@ -769,6 +779,7 @@
/* For non-zero inputs, shift the exponent down and apply bias. */
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero);
ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift)));
ir->operands[2] = izero;
@@ -789,6 +800,7 @@
ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL);
ir->operation = ir_unop_i2u;
+ ir->init_num_operands();
ir->operands[0] = b2i(less(add(ir->operands[0], ir->operands[1]), x_clone));
ir->operands[1] = NULL;
@@ -806,6 +818,7 @@
*/
ir->operation = ir_unop_i2u;
+ ir->init_num_operands();
ir->operands[0] = b2i(less(ir->operands[0], ir->operands[1]));
ir->operands[1] = NULL;
@@ -822,6 +835,7 @@
*/
ir->operation = ir_binop_min;
+ ir->init_num_operands();
ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type,
ir->operands[0],
new(ir) ir_constant(0.0f));
@@ -852,6 +866,7 @@
}
ir->operation = ir_triop_fma;
+ ir->init_num_operands();
ir->operands[0] = swizzle(ir->operands[0], 0, 1);
ir->operands[1] = swizzle(ir->operands[1], 0, 1);
ir->operands[2] = new(ir) ir_dereference_variable(temp);
@@ -878,6 +893,7 @@
}
ir->operation = ir_triop_fma;
+ ir->init_num_operands();
ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements);
ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0);
@@ -902,6 +918,7 @@
i.insert_before(assign(frtemp, fract(ir->operands[0])));
ir->operation = ir_binop_add;
+ ir->init_num_operands();
ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp);
ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL));
@@ -916,6 +933,7 @@
* result = sub(x, frtemp);
*/
ir->operation = ir_binop_sub;
+ ir->init_num_operands();
ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL));
this->progress = true;
@@ -955,6 +973,7 @@
i.insert_before(assign(t2, sub(temp, frtemp)));
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)),
p5->clone(ir, NULL));
ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))),
@@ -990,6 +1009,7 @@
i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp)));
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = gequal(arg->clone(ir, NULL), zero);
ir->operands[1] = new (ir) ir_dereference_variable(temp);
ir->operands[2] = add(temp,
@@ -1013,6 +1033,7 @@
ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements);
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = less(arg->clone(ir, NULL),
zero->clone(ir, NULL));
ir->operands[1] = neg_one;
@@ -1062,6 +1083,7 @@
/* int(((temp + (temp >> 4) & 0xF0F0F0Fu) * 0x1010101u) >> 24); */
ir->operation = ir_unop_u2i;
+ ir->init_num_operands();
ir->operands[0] = rshift(mul(bit_and(add(temp, rshift(temp, c4)), c0F0F0F0F),
c01010101),
c24);
@@ -1105,6 +1127,7 @@
* (value >> offset) & mask;
*/
ir->operation = ir_binop_bit_and;
+ ir->init_num_operands();
ir->operands[0] = rshift(ir->operands[0], ir->operands[1]);
ir->operands[1] = mask;
ir->operands[2] = NULL;
@@ -1135,6 +1158,7 @@
* (bits == 0) ? 0 : e;
*/
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = equal(c0, bits);
ir->operands[1] = c0->clone(ir, NULL);
ir->operands[2] = expr;
@@ -1201,6 +1225,7 @@
/* (base & ~mask) | ((insert << offset) & mask) */
ir->operation = ir_binop_bit_or;
+ ir->init_num_operands();
ir->operands[0] = bit_and(ir->operands[0], bit_not(mask));
ir->operands[1] = bit_and(lshift(ir->operands[1], offset), mask);
ir->operands[2] = NULL;
@@ -1284,10 +1309,12 @@
if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
ir->operation = ir_binop_bit_or;
+ ir->init_num_operands();
ir->operands[0] = rshift(temp, c16);
ir->operands[1] = lshift(temp, c16->clone(ir, NULL));
} else {
ir->operation = ir_unop_u2i;
+ ir->init_num_operands();
ir->operands[0] = bit_or(rshift(temp, c16),
lshift(temp, c16->clone(ir, NULL)));
}
@@ -1368,6 +1395,7 @@
* small.
*/
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = equal(lsb_only, c0);
ir->operands[1] = cminus1;
ir->operands[2] = new(ir) ir_dereference_variable(lsb);
@@ -1468,6 +1496,7 @@
* be negative. It will only be negative (-0x7f, in fact) if temp is 0.
*/
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = less(msb, c0);
ir->operands[1] = cminus1;
ir->operands[2] = new(ir) ir_dereference_variable(msb);
@@ -1600,6 +1629,7 @@
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
ir->operation = ir_binop_add;
+ ir->init_num_operands();
ir->operands[0] = add(hi, rshift(t1, c16->clone(ir, NULL)));
ir->operands[1] = rshift(t2, c16->clone(ir, NULL));
} else {
@@ -1622,6 +1652,7 @@
u2i(_carry(bit_not(lo), c1)))));
ir->operation = ir_triop_csel;
+ ir->init_num_operands();
ir->operands[0] = new(ir) ir_dereference_variable(different_signs);
ir->operands[1] = new(ir) ir_dereference_variable(neg_hi);
ir->operands[2] = u2i(hi);
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_int64.cpp mesa-17.3.3/src/compiler/glsl/lower_int64.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_int64.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_int64.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -69,7 +69,7 @@
class lower_64bit_visitor : public ir_rvalue_visitor {
public:
lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
- : progress(false), lower(lower), instructions(instructions),
+ : progress(false), lower(lower),
function_list(), added_functions(&function_list, mem_ctx)
{
functions = _mesa_hash_table_create(mem_ctx,
@@ -111,8 +111,6 @@
private:
unsigned lower; /** Bitfield of which operations to lower */
- exec_list *instructions;
-
/** Hashtable containing all of the known functions in the IR */
struct hash_table *functions;
@@ -258,7 +256,7 @@
ir_expression *ir,
ir_function_signature *callee)
{
- const unsigned num_operands = ir->get_num_operands();
+ const unsigned num_operands = ir->num_operands;
ir_variable *src[4][4];
ir_variable *dst[4];
void *const mem_ctx = ralloc_parent(ir);
@@ -319,7 +317,7 @@
const char *function_name,
function_generator generator)
{
- for (unsigned i = 0; i < ir->get_num_operands(); i++)
+ for (unsigned i = 0; i < ir->num_operands; i++)
if (!ir->operands[i]->type->is_integer_64())
return ir;
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_jumps.cpp mesa-17.3.3/src/compiler/glsl/lower_jumps.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_jumps.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_jumps.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -189,7 +189,7 @@
if(!this->execute_flag) {
exec_list& list = this->loop ? this->loop->body_instructions : signature->body;
this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary);
- list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true), 0));
+ list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true)));
list.push_head(this->execute_flag);
}
return this->execute_flag;
@@ -201,7 +201,7 @@
if(!this->break_flag) {
this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary);
this->loop->insert_before(this->break_flag);
- this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false), 0));
+ this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false)));
}
return this->break_flag;
}
@@ -229,7 +229,7 @@
{
if(!this->return_flag) {
this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary);
- this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false), 0));
+ this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false)));
this->signature->body.push_head(this->return_flag);
}
return this->return_flag;
@@ -356,8 +356,7 @@
void *ctx = this->function.signature;
return new(ctx) ir_assignment(
new(ctx) ir_dereference_variable(this->loop.get_break_flag()),
- new(ctx) ir_constant(true),
- 0);
+ new(ctx) ir_constant(true));
}
/**
@@ -681,7 +680,7 @@
* this->loop must be initialized even outside of loops.
*/
ir_variable* execute_flag = this->loop.get_execute_flag();
- jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0));
+ jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false)));
/* Note: we must update block_records and jumps to reflect
* the fact that the control path has been altered to an
* instruction that clears the execute flag.
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_mat_op_to_vec.cpp mesa-17.3.3/src/compiler/glsl/lower_mat_op_to_vec.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_mat_op_to_vec.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_mat_op_to_vec.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -76,7 +76,7 @@
if (!expr)
return false;
- for (i = 0; i < expr->get_num_operands(); i++) {
+ for (i = 0; i < expr->num_operands; i++) {
if (expr->operands[i]->type->is_matrix())
return true;
}
@@ -294,7 +294,7 @@
static bool
has_matrix_operand(const ir_expression *expr, unsigned &columns)
{
- for (unsigned i = 0; i < expr->get_num_operands(); i++) {
+ for (unsigned i = 0; i < expr->num_operands; i++) {
if (expr->operands[i]->type->is_matrix()) {
columns = expr->operands[i]->type->matrix_columns;
return true;
@@ -318,7 +318,7 @@
if (!has_matrix_operand(orig_expr, matrix_columns))
return visit_continue;
- assert(orig_expr->get_num_operands() <= 2);
+ assert(orig_expr->num_operands <= 2);
mem_ctx = ralloc_parent(orig_assign);
@@ -329,7 +329,7 @@
/* Store the expression operands in temps so we can use them
* multiple times.
*/
- for (i = 0; i < orig_expr->get_num_operands(); i++) {
+ for (i = 0; i < orig_expr->num_operands; i++) {
ir_assignment *assign;
ir_dereference *deref = orig_expr->operands[i]->as_dereference();
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_named_interface_blocks.cpp mesa-17.3.3/src/compiler/glsl/lower_named_interface_blocks.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_named_interface_blocks.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_named_interface_blocks.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -115,6 +115,7 @@
void run(exec_list *instructions);
virtual ir_visitor_status visit_leave(ir_assignment *);
+ virtual ir_visitor_status visit_leave(ir_expression *);
virtual void handle_rvalue(ir_rvalue **rvalue);
};
@@ -238,6 +239,23 @@
return rvalue_visit(ir);
}
+ir_visitor_status
+flatten_named_interface_blocks_declarations::visit_leave(ir_expression *ir)
+{
+ ir_visitor_status status = rvalue_visit(ir);
+
+ if (ir->operation == ir_unop_interpolate_at_centroid ||
+ ir->operation == ir_binop_interpolate_at_offset ||
+ ir->operation == ir_binop_interpolate_at_sample) {
+ const ir_rvalue *val = ir->operands[0];
+
+ /* This disables varying packing for this input. */
+ val->variable_referenced()->data.must_be_shader_input = 1;
+ }
+
+ return status;
+}
+
void
flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue)
{
@@ -267,7 +285,8 @@
ralloc_asprintf(mem_ctx, "%s %s.%s.%s",
var->data.mode == ir_var_shader_in ? "in" : "out",
var->get_interface_type()->name,
- var->name, ir->field);
+ var->name,
+ ir->record->type->fields.structure[ir->field_idx].name);
/* Find the variable in the set of flattened interface blocks */
hash_entry *entry = _mesa_hash_table_search(interface_namespace,
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_shared_reference.cpp mesa-17.3.3/src/compiler/glsl/lower_shared_reference.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_shared_reference.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_shared_reference.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -33,6 +33,7 @@
#include "lower_buffer_access.h"
#include "ir_builder.h"
+#include "linker.h"
#include "main/macros.h"
#include "util/list.h"
#include "glsl_parser_extras.h"
@@ -478,7 +479,9 @@
} /* unnamed namespace */
void
-lower_shared_reference(struct gl_linked_shader *shader, unsigned *shared_size)
+lower_shared_reference(struct gl_context *ctx,
+ struct gl_shader_program *prog,
+ struct gl_linked_shader *shader)
{
if (shader->Stage != MESA_SHADER_COMPUTE)
return;
@@ -495,5 +498,19 @@
visit_list_elements(&v, shader->ir);
} while (v.progress);
- *shared_size = v.shared_size;
+ prog->Comp.SharedSize = v.shared_size;
+
+ /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile)
+ * specification says:
+ *
+ * "There is a limit to the total size of all variables declared as
+ * shared in a single program object. This limit, expressed in units of
+ * basic machine units, may be queried as the value of
+ * MAX_COMPUTE_SHARED_MEMORY_SIZE."
+ */
+ if (prog->Comp.SharedSize > ctx->Const.MaxComputeSharedMemorySize) {
+ linker_error(prog, "Too much shared memory used (%u/%u)\n",
+ prog->Comp.SharedSize,
+ ctx->Const.MaxComputeSharedMemorySize);
+ }
}
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_tess_level.cpp mesa-17.3.3/src/compiler/glsl/lower_tess_level.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_tess_level.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_tess_level.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -264,7 +264,8 @@
ir_dereference *const new_lhs = (ir_dereference *) expr->operands[0];
- ir_constant *old_index_constant = expr->operands[1]->constant_expression_value();
+ ir_constant *old_index_constant =
+ expr->operands[1]->constant_expression_value(mem_ctx);
if (!old_index_constant) {
ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
expr->operands[0]->type,
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_texture_projection.cpp mesa-17.3.3/src/compiler/glsl/lower_texture_projection.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_texture_projection.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_texture_projection.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -69,7 +69,7 @@
ir->projector->type,
ir->projector,
NULL);
- ir_assignment *assign = new(mem_ctx) ir_assignment(deref, expr, NULL);
+ ir_assignment *assign = new(mem_ctx) ir_assignment(deref, expr);
base_ir->insert_before(assign);
deref = new(mem_ctx) ir_dereference_variable(var);
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_ubo_reference.cpp mesa-17.3.3/src/compiler/glsl/lower_ubo_reference.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_ubo_reference.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_ubo_reference.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -45,10 +45,12 @@
public lower_buffer_access::lower_buffer_access {
public:
lower_ubo_reference_visitor(struct gl_linked_shader *shader,
- bool clamp_block_indices)
+ bool clamp_block_indices,
+ bool use_std430_as_default)
: shader(shader), clamp_block_indices(clamp_block_indices),
struct_field(NULL), variable(NULL)
{
+ this->use_std430_as_default = use_std430_as_default;
}
void handle_rvalue(ir_rvalue **rvalue);
@@ -345,7 +347,10 @@
unsigned const_offset;
bool row_major;
int matrix_columns;
- enum glsl_interface_packing packing = var->get_interface_type_packing();
+
+ enum glsl_interface_packing packing =
+ var->get_interface_type()->
+ get_internal_ifc_packing(use_std430_as_default);
this->buffer_access_type =
var->is_in_shader_storage_block() ?
@@ -558,7 +563,10 @@
unsigned const_offset;
bool row_major;
int matrix_columns;
- enum glsl_interface_packing packing = var->get_interface_type_packing();
+
+ enum glsl_interface_packing packing =
+ var->get_interface_type()->
+ get_internal_ifc_packing(use_std430_as_default);
this->buffer_access_type = ssbo_store_access;
this->variable = var;
@@ -627,7 +635,7 @@
return;
}
- for (unsigned i = 0; i < ir->get_num_operands(); i++) {
+ for (unsigned i = 0; i < ir->num_operands; i++) {
if (ir->operands[i]->ir_type != ir_type_expression)
continue;
ir_expression *expr = (ir_expression *) ir->operands[i];
@@ -737,8 +745,12 @@
unsigned const_offset;
bool row_major;
int matrix_columns;
- enum glsl_interface_packing packing = var->get_interface_type_packing();
- int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
+
+ enum glsl_interface_packing packing =
+ var->get_interface_type()->
+ get_internal_ifc_packing(use_std430_as_default);
+ int unsized_array_stride =
+ calculate_unsized_array_stride(deref, packing);
this->buffer_access_type = ssbo_unsized_array_length_access;
this->variable = var;
@@ -971,7 +983,10 @@
unsigned const_offset;
bool row_major;
int matrix_columns;
- enum glsl_interface_packing packing = var->get_interface_type_packing();
+
+ enum glsl_interface_packing packing =
+ var->get_interface_type()->
+ get_internal_ifc_packing(use_std430_as_default);
this->buffer_access_type = ssbo_atomic_access;
this->variable = var;
@@ -1108,9 +1123,11 @@
} /* unnamed namespace */
void
-lower_ubo_reference(struct gl_linked_shader *shader, bool clamp_block_indices)
+lower_ubo_reference(struct gl_linked_shader *shader,
+ bool clamp_block_indices, bool use_std430_as_default)
{
- lower_ubo_reference_visitor v(shader, clamp_block_indices);
+ lower_ubo_reference_visitor v(shader, clamp_block_indices,
+ use_std430_as_default);
/* Loop over the instructions lowering references, because we take
* a deref of a UBO array using a UBO dereference as the index will
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp mesa-17.3.3/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -51,6 +51,10 @@
#include "ir_optimization.h"
#include "compiler/glsl_types.h"
#include "main/macros.h"
+#include "program/prog_instruction.h" /* For SWIZZLE_XXXX */
+#include "ir_builder.h"
+
+using namespace ir_builder;
/**
* Generate a comparison value for a block of indices
@@ -66,23 +70,21 @@
* \param mem_ctx ralloc memory context to be used for all allocations.
*
* \returns
- * An \c ir_rvalue that \b must be cloned for each use in conditional
- * assignments, etc.
+ * An \c ir_variable containing the per-component comparison results. This
+ * must be dereferenced per use.
*/
-ir_rvalue *
-compare_index_block(exec_list *instructions, ir_variable *index,
- unsigned base, unsigned components, void *mem_ctx)
+ir_variable *
+compare_index_block(ir_factory &body, ir_variable *index,
+ unsigned base, unsigned components)
{
- ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index);
-
assert(index->type->is_scalar());
- assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT);
+ assert(index->type->base_type == GLSL_TYPE_INT ||
+ index->type->base_type == GLSL_TYPE_UINT);
assert(components >= 1 && components <= 4);
- if (components > 1) {
- const ir_swizzle_mask m = { 0, 0, 0, 0, components, false };
- broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m);
- }
+ ir_rvalue *const broadcast_index = components > 1
+ ? swizzle(index, SWIZZLE_XXXX, components)
+ : operand(index).val;
/* Compare the desired index value with the next block of four indices.
*/
@@ -94,26 +96,16 @@
test_indices_data.i[3] = base + 3;
ir_constant *const test_indices =
- new(mem_ctx) ir_constant(broadcast_index->type,
- &test_indices_data);
+ new(body.mem_ctx) ir_constant(broadcast_index->type, &test_indices_data);
+
+ ir_rvalue *const condition_val = equal(broadcast_index, test_indices);
- ir_rvalue *const condition_val =
- new(mem_ctx) ir_expression(ir_binop_equal,
- glsl_type::bvec(components),
- broadcast_index,
- test_indices);
-
- ir_variable *const condition =
- new(mem_ctx) ir_variable(condition_val->type,
- "dereference_condition",
- ir_var_temporary);
- instructions->push_tail(condition);
-
- ir_rvalue *const cond_deref =
- new(mem_ctx) ir_dereference_variable(condition);
- instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0));
+ ir_variable *const condition = body.make_temp(condition_val->type,
+ "dereference_condition");
- return cond_deref;
+ body.emit(assign(condition, condition_val));
+
+ return condition;
}
static inline bool
@@ -133,7 +125,7 @@
public:
deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value)
: variable_to_replace(variable_to_replace), value(value),
- progress(false)
+ progress(false)
{
assert(this->variable_to_replace != NULL);
assert(this->value != NULL);
@@ -143,9 +135,9 @@
{
ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable();
- if ((dv != NULL) && (dv->var == this->variable_to_replace)) {
- this->progress = true;
- *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
+ if (dv != NULL && dv->var == this->variable_to_replace) {
+ this->progress = true;
+ *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
}
}
@@ -167,10 +159,10 @@
virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
{
- if (is_array_or_matrix(ir->array)
- && (ir->array_index->as_constant() == NULL)) {
- this->deref = ir;
- return visit_stop;
+ if (is_array_or_matrix(ir->array) &&
+ ir->array_index->as_constant() == NULL) {
+ this->deref = ir;
+ return visit_stop;
}
return visit_continue;
@@ -201,18 +193,13 @@
{
}
- void generate(unsigned i, ir_rvalue* condition, exec_list *list) const
+ void generate(unsigned i, ir_rvalue* condition, ir_factory &body) const
{
- /* Just clone the rest of the deref chain when trying to get at the
- * underlying variable.
- */
- void *mem_ctx = ralloc_parent(base_ir);
-
/* Clone the old r-value in its entirety. Then replace any occurances of
* the old variable index with the new constant index.
*/
- ir_dereference *element = this->rvalue->clone(mem_ctx, NULL);
- ir_constant *const index = new(mem_ctx) ir_constant(i);
+ ir_dereference *element = this->rvalue->clone(body.mem_ctx, NULL);
+ ir_constant *const index = body.constant(i);
deref_replacer r(this->old_index, index);
element->accept(&r);
assert(r.progress);
@@ -220,12 +207,11 @@
/* Generate a conditional assignment to (or from) the constant indexed
* array dereference.
*/
- ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
ir_assignment *const assignment = (is_write)
- ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask)
- : new(mem_ctx) ir_assignment(variable, element, condition);
+ ? assign(element, this->var, condition, write_mask)
+ : assign(this->var, element, condition);
- list->push_tail(assignment);
+ body.emit(assignment);
}
};
@@ -242,16 +228,16 @@
void *mem_ctx;
switch_generator(const TFunction& generator, ir_variable *index,
- unsigned linear_sequence_max_length,
- unsigned condition_components)
+ unsigned linear_sequence_max_length,
+ unsigned condition_components)
: generator(generator), index(index),
- linear_sequence_max_length(linear_sequence_max_length),
- condition_components(condition_components)
+ linear_sequence_max_length(linear_sequence_max_length),
+ condition_components(condition_components)
{
this->mem_ctx = ralloc_parent(index);
}
- void linear_sequence(unsigned begin, unsigned end, exec_list *list)
+ void linear_sequence(unsigned begin, unsigned end, ir_factory &body)
{
if (begin == end)
return;
@@ -266,66 +252,57 @@
*/
unsigned first;
if (!this->generator.is_write) {
- this->generator.generate(begin, 0, list);
- first = begin + 1;
+ this->generator.generate(begin, 0, body);
+ first = begin + 1;
} else {
- first = begin;
+ first = begin;
}
for (unsigned i = first; i < end; i += 4) {
const unsigned comps = MIN2(condition_components, end - i);
-
- ir_rvalue *const cond_deref =
- compare_index_block(list, index, i, comps, this->mem_ctx);
+ ir_variable *const cond = compare_index_block(body, index, i, comps);
if (comps == 1) {
- this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL),
- list);
+ this->generator.generate(i,
+ operand(cond).val,
+ body);
} else {
for (unsigned j = 0; j < comps; j++) {
- ir_rvalue *const cond_swiz =
- new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL),
- j, 0, 0, 0, 1);
-
- this->generator.generate(i + j, cond_swiz, list);
+ this->generator.generate(i + j,
+ swizzle(cond, j, 1),
+ body);
}
}
}
}
- void bisect(unsigned begin, unsigned end, exec_list *list)
+ void bisect(unsigned begin, unsigned end, ir_factory &body)
{
unsigned middle = (begin + end) >> 1;
assert(index->type->is_integer());
ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT)
- ? new(this->mem_ctx) ir_constant((unsigned)middle)
- : new(this->mem_ctx) ir_constant((int)middle);
-
-
- ir_dereference_variable *deref =
- new(this->mem_ctx) ir_dereference_variable(this->index);
-
- ir_expression *less =
- new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type,
- deref, middle_c);
+ ? new(body.mem_ctx) ir_constant((unsigned)middle)
+ : new(body.mem_ctx) ir_constant((int)middle);
- ir_if *if_less = new(this->mem_ctx) ir_if(less);
+ ir_if *if_less = new(body.mem_ctx) ir_if(less(this->index, middle_c));
- generate(begin, middle, &if_less->then_instructions);
- generate(middle, end, &if_less->else_instructions);
+ ir_factory then_body(&if_less->then_instructions, body.mem_ctx);
+ ir_factory else_body(&if_less->else_instructions, body.mem_ctx);
+ generate(begin, middle, then_body);
+ generate(middle, end, else_body);
- list->push_tail(if_less);
+ body.emit(if_less);
}
- void generate(unsigned begin, unsigned end, exec_list *list)
+ void generate(unsigned begin, unsigned end, ir_factory &body)
{
unsigned length = end - begin;
if (length <= this->linear_sequence_max_length)
- return linear_sequence(begin, end, list);
+ return linear_sequence(begin, end, body);
else
- return bisect(begin, end, list);
+ return bisect(begin, end, body);
}
};
@@ -340,13 +317,11 @@
bool lower_output,
bool lower_temp,
bool lower_uniform)
+ : progress(false), stage(stage), lower_inputs(lower_input),
+ lower_outputs(lower_output), lower_temps(lower_temp),
+ lower_uniforms(lower_uniform)
{
- this->progress = false;
- this->stage = stage;
- this->lower_inputs = lower_input;
- this->lower_outputs = lower_output;
- this->lower_temps = lower_temp;
- this->lower_uniforms = lower_uniform;
+ /* empty */
}
bool progress;
@@ -367,19 +342,19 @@
*/
const ir_variable *const var = deref->array->variable_referenced();
if (var == NULL)
- return this->lower_temps;
+ return this->lower_temps;
switch (var->data.mode) {
case ir_var_auto:
case ir_var_temporary:
- return this->lower_temps;
+ return this->lower_temps;
case ir_var_uniform:
case ir_var_shader_storage:
- return this->lower_uniforms;
+ return this->lower_uniforms;
case ir_var_shader_shared:
- return false;
+ return false;
case ir_var_function_in:
case ir_var_const_in:
@@ -435,7 +410,7 @@
return this->lower_outputs;
case ir_var_function_inout:
- return this->lower_temps;
+ return this->lower_temps;
}
assert(!"Should not get here.");
@@ -444,25 +419,27 @@
bool needs_lowering(ir_dereference_array *deref) const
{
- if (deref == NULL || deref->array_index->as_constant()
- || !is_array_or_matrix(deref->array))
- return false;
+ if (deref == NULL || deref->array_index->as_constant() ||
+ !is_array_or_matrix(deref->array))
+ return false;
return this->storage_type_needs_lowering(deref);
}
ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
- ir_assignment* orig_assign,
- ir_dereference *orig_base)
+ ir_assignment* orig_assign,
+ ir_dereference *orig_base)
{
+ void *const mem_ctx = ralloc_parent(base_ir);
+ exec_list list;
+ ir_factory body(&list, mem_ctx);
+
assert(is_array_or_matrix(orig_deref->array));
const unsigned length = (orig_deref->array->type->is_array())
? orig_deref->array->type->length
: orig_deref->array->type->matrix_columns;
- void *const mem_ctx = ralloc_parent(base_ir);
-
/* Temporary storage for either the result of the dereference of
* the array, or the RHS that's being assigned into the
* dereference of the array.
@@ -470,36 +447,22 @@
ir_variable *var;
if (orig_assign) {
- var = new(mem_ctx) ir_variable(orig_assign->rhs->type,
- "dereference_array_value",
- ir_var_temporary);
- base_ir->insert_before(var);
-
- ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var);
- ir_assignment *assign = new(mem_ctx) ir_assignment(lhs,
- orig_assign->rhs,
- NULL);
+ var = body.make_temp(orig_assign->rhs->type,
+ "dereference_array_value");
- base_ir->insert_before(assign);
+ body.emit(assign(var, orig_assign->rhs));
} else {
- var = new(mem_ctx) ir_variable(orig_deref->type,
- "dereference_array_value",
- ir_var_temporary);
- base_ir->insert_before(var);
+ var = body.make_temp(orig_deref->type,
+ "dereference_array_value");
}
/* Store the index to a temporary to avoid reusing its tree. */
- ir_variable *index =
- new(mem_ctx) ir_variable(orig_deref->array_index->type,
- "dereference_array_index", ir_var_temporary);
- base_ir->insert_before(index);
-
- ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index);
- ir_assignment *assign =
- new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL);
- base_ir->insert_before(assign);
+ ir_variable *index = body.make_temp(orig_deref->array_index->type,
+ "dereference_array_index");
+
+ body.emit(assign(index, orig_deref->array_index));
- orig_deref->array_index = lhs->clone(mem_ctx, NULL);
+ orig_deref->array_index = deref(index).val;
assignment_generator ag;
ag.rvalue = orig_base;
@@ -507,10 +470,10 @@
ag.old_index = index;
ag.var = var;
if (orig_assign) {
- ag.is_write = true;
- ag.write_mask = orig_assign->write_mask;
+ ag.is_write = true;
+ ag.write_mask = orig_assign->write_mask;
} else {
- ag.is_write = false;
+ ag.is_write = false;
}
switch_generator sg(ag, index, 4, 4);
@@ -519,28 +482,27 @@
* condition! This is acomplished by wrapping the new conditional
* assignments in an if-statement that uses the original condition.
*/
- if ((orig_assign != NULL) && (orig_assign->condition != NULL)) {
- /* No need to clone the condition because the IR that it hangs on is
- * going to be removed from the instruction sequence.
- */
- ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition);
+ if (orig_assign != NULL && orig_assign->condition != NULL) {
+ /* No need to clone the condition because the IR that it hangs on is
+ * going to be removed from the instruction sequence.
+ */
+ ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition);
+ ir_factory then_body(&if_stmt->then_instructions, body.mem_ctx);
- sg.generate(0, length, &if_stmt->then_instructions);
- base_ir->insert_before(if_stmt);
+ sg.generate(0, length, then_body);
+ body.emit(if_stmt);
} else {
- exec_list list;
-
- sg.generate(0, length, &list);
- base_ir->insert_before(&list);
+ sg.generate(0, length, body);
}
+ base_ir->insert_before(&list);
return var;
}
virtual void handle_rvalue(ir_rvalue **pir)
{
if (this->in_assignee)
- return;
+ return;
if (!*pir)
return;
@@ -548,7 +510,7 @@
ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
if (needs_lowering(orig_deref)) {
ir_variable *var =
- convert_dereference_array(orig_deref, NULL, orig_deref);
+ convert_dereference_array(orig_deref, NULL, orig_deref);
assert(var);
*pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var);
this->progress = true;
@@ -563,7 +525,7 @@
find_variable_index f;
ir->lhs->accept(&f);
- if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) {
+ if (f.deref != NULL && storage_type_needs_lowering(f.deref)) {
convert_dereference_array(f.deref, ir, ir->lhs);
ir->remove();
this->progress = true;
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp mesa-17.3.3/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -40,6 +40,9 @@
#include "ir_visitor.h"
#include "ir_optimization.h"
#include "compiler/glsl_types.h"
+#include "ir_builder.h"
+
+using namespace ir_builder;
namespace {
@@ -50,8 +53,9 @@
class ir_vec_index_to_cond_assign_visitor : public ir_hierarchical_visitor {
public:
ir_vec_index_to_cond_assign_visitor()
+ : progress(false)
{
- progress = false;
+ /* empty */
}
ir_rvalue *convert_vec_index_to_cond_assign(void *mem_ctx,
@@ -79,63 +83,36 @@
ir_rvalue *orig_index,
const glsl_type *type)
{
- ir_assignment *assign, *value_assign;
- ir_variable *index, *var, *value;
- ir_dereference *deref, *deref_value;
- unsigned i;
-
-
exec_list list;
+ ir_factory body(&list, base_ir);
/* Store the index to a temporary to avoid reusing its tree. */
assert(orig_index->type == glsl_type::int_type ||
orig_index->type == glsl_type::uint_type);
- index = new(base_ir) ir_variable(orig_index->type,
- "vec_index_tmp_i",
- ir_var_temporary);
- list.push_tail(index);
- deref = new(base_ir) ir_dereference_variable(index);
- assign = new(base_ir) ir_assignment(deref, orig_index, NULL);
- list.push_tail(assign);
+ ir_variable *const index =
+ body.make_temp(orig_index->type, "vec_index_tmp_i");
+
+ body.emit(assign(index, orig_index));
/* Store the value inside a temp, thus avoiding matrixes duplication */
- value = new(base_ir) ir_variable(orig_vector->type, "vec_value_tmp",
- ir_var_temporary);
- list.push_tail(value);
- deref_value = new(base_ir) ir_dereference_variable(value);
- value_assign = new(base_ir) ir_assignment(deref_value, orig_vector);
- list.push_tail(value_assign);
+ ir_variable *const value =
+ body.make_temp(orig_vector->type, "vec_value_tmp");
+
+ body.emit(assign(value, orig_vector));
+
/* Temporary where we store whichever value we swizzle out. */
- var = new(base_ir) ir_variable(type, "vec_index_tmp_v",
- ir_var_temporary);
- list.push_tail(var);
+ ir_variable *const var = body.make_temp(type, "vec_index_tmp_v");
/* Generate a single comparison condition "mask" for all of the components
* in the vector.
*/
- ir_rvalue *const cond_deref =
- compare_index_block(&list, index, 0,
- orig_vector->type->vector_elements,
- mem_ctx);
+ ir_variable *const cond =
+ compare_index_block(body, index, 0, orig_vector->type->vector_elements);
/* Generate a conditional move of each vector element to the temp. */
- for (i = 0; i < orig_vector->type->vector_elements; i++) {
- ir_rvalue *condition_swizzle =
- new(base_ir) ir_swizzle(cond_deref->clone(mem_ctx, NULL),
- i, 0, 0, 0, 1);
-
- /* Just clone the rest of the deref chain when trying to get at the
- * underlying variable.
- */
- ir_rvalue *swizzle =
- new(base_ir) ir_swizzle(deref_value->clone(mem_ctx, NULL),
- i, 0, 0, 0, 1);
-
- deref = new(base_ir) ir_dereference_variable(var);
- assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle);
- list.push_tail(assign);
- }
+ for (unsigned i = 0; i < orig_vector->type->vector_elements; i++)
+ body.emit(assign(var, swizzle(value, i, 1), swizzle(cond, i, 1)));
/* Put all of the new instructions in the IR stream before the old
* instruction.
@@ -143,7 +120,7 @@
base_ir->insert_before(&list);
this->progress = true;
- return new(base_ir) ir_dereference_variable(var);
+ return deref(var).val;
}
ir_rvalue *
@@ -151,7 +128,36 @@
{
ir_expression *const expr = ir->as_expression();
- if (expr == NULL || expr->operation != ir_binop_vector_extract)
+ if (expr == NULL)
+ return ir;
+
+ if (expr->operation == ir_unop_interpolate_at_centroid ||
+ expr->operation == ir_binop_interpolate_at_offset ||
+ expr->operation == ir_binop_interpolate_at_sample) {
+ /* Lower interpolateAtXxx(some_vec[idx], ...) to
+ * interpolateAtXxx(some_vec, ...)[idx] before lowering to conditional
+ * assignments, to maintain the rule that the interpolant is an l-value
+ * referring to a (part of a) shader input.
+ *
+ * This is required when idx is dynamic (otherwise it gets lowered to
+ * a swizzle).
+ */
+ ir_expression *const interpolant = expr->operands[0]->as_expression();
+ if (!interpolant || interpolant->operation != ir_binop_vector_extract)
+ return ir;
+
+ ir_rvalue *vec_input = interpolant->operands[0];
+ ir_expression *const vec_interpolate =
+ new(base_ir) ir_expression(expr->operation, vec_input->type,
+ vec_input, expr->operands[1]);
+
+ return convert_vec_index_to_cond_assign(ralloc_parent(ir),
+ vec_interpolate,
+ interpolant->operands[1],
+ ir->type);
+ }
+
+ if (expr->operation != ir_binop_vector_extract)
return ir;
return convert_vec_index_to_cond_assign(ralloc_parent(ir),
@@ -163,11 +169,8 @@
ir_visitor_status
ir_vec_index_to_cond_assign_visitor::visit_enter(ir_expression *ir)
{
- unsigned int i;
-
- for (i = 0; i < ir->get_num_operands(); i++) {
+ for (unsigned i = 0; i < ir->num_operands; i++)
ir->operands[i] = convert_vector_extract_to_cond_assign(ir->operands[i]);
- }
return visit_continue;
}
@@ -189,9 +192,8 @@
{
ir->rhs = convert_vector_extract_to_cond_assign(ir->rhs);
- if (ir->condition) {
+ if (ir->condition)
ir->condition = convert_vector_extract_to_cond_assign(ir->condition);
- }
return visit_continue;
}
@@ -203,7 +205,7 @@
ir_rvalue *new_param = convert_vector_extract_to_cond_assign(param);
if (new_param != param) {
- param->replace_with(new_param);
+ param->replace_with(new_param);
}
}
@@ -213,9 +215,8 @@
ir_visitor_status
ir_vec_index_to_cond_assign_visitor::visit_enter(ir_return *ir)
{
- if (ir->value) {
+ if (ir->value)
ir->value = convert_vector_extract_to_cond_assign(ir->value);
- }
return visit_continue;
}
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_vec_index_to_swizzle.cpp mesa-17.3.3/src/compiler/glsl/lower_vec_index_to_swizzle.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_vec_index_to_swizzle.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_vec_index_to_swizzle.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -63,11 +63,12 @@
if (expr == NULL || expr->operation != ir_binop_vector_extract)
return;
- ir_constant *const idx = expr->operands[1]->constant_expression_value();
+ void *mem_ctx = ralloc_parent(expr);
+ ir_constant *const idx =
+ expr->operands[1]->constant_expression_value(mem_ctx);
if (idx == NULL)
return;
- void *ctx = ralloc_parent(expr);
this->progress = true;
/* Page 40 of the GLSL 1.20 spec says:
@@ -87,7 +88,7 @@
const int i = CLAMP(idx->value.i[0], 0,
(int) expr->operands[0]->type->vector_elements - 1);
- *rv = new(ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1);
+ *rv = new(mem_ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1);
}
bool
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_vector.cpp mesa-17.3.3/src/compiler/glsl/lower_vector.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_vector.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_vector.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -58,7 +58,7 @@
* Extended swizzles consist of access of a single vector source (with possible
* per component negation) and the constants -1, 0, or 1.
*/
-bool
+static bool
is_extended_swizzle(ir_expression *ir)
{
/* Track any variables that are accessed by this expression.
@@ -133,7 +133,7 @@
*/
void *const mem_ctx = expr;
- assert(expr->type->vector_elements == expr->get_num_operands());
+ assert(expr->type->vector_elements == expr->num_operands);
/* Generate a temporary with the same type as the ir_quadop_operation.
*/
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_vector_derefs.cpp mesa-17.3.3/src/compiler/glsl/lower_vector_derefs.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_vector_derefs.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_vector_derefs.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -61,8 +61,9 @@
ir_dereference *const new_lhs = (ir_dereference *) deref->array;
ir->set_lhs(new_lhs);
- ir_constant *old_index_constant = deref->array_index->constant_expression_value();
void *mem_ctx = ralloc_parent(ir);
+ ir_constant *old_index_constant =
+ deref->array_index->constant_expression_value(mem_ctx);
if (!old_index_constant) {
ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
new_lhs->type,
diff -Nru mesa-17.2.4/src/compiler/glsl/lower_vector_insert.cpp mesa-17.3.3/src/compiler/glsl/lower_vector_insert.cpp
--- mesa-17.2.4/src/compiler/glsl/lower_vector_insert.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/lower_vector_insert.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -65,7 +65,8 @@
factory.mem_ctx = ralloc_parent(expr);
- ir_constant *const idx = expr->operands[2]->constant_expression_value();
+ ir_constant *const idx =
+ expr->operands[2]->constant_expression_value(factory.mem_ctx);
if (idx != NULL) {
/* Replace (vector_insert (vec) (scalar) (index)) with a dereference of
* a new temporary. The new temporary gets assigned as
diff -Nru mesa-17.2.4/src/compiler/glsl/main.cpp mesa-17.3.3/src/compiler/glsl/main.cpp
--- mesa-17.2.4/src/compiler/glsl/main.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/main.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -52,7 +52,7 @@
/**
* \brief Print proper usage and exit with failure.
*/
-void
+static void
usage_fail(const char *name)
{
diff -Nru mesa-17.2.4/src/compiler/glsl/meson.build mesa-17.3.3/src/compiler/glsl/meson.build
--- mesa-17.2.4/src/compiler/glsl/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,246 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+subdir('glcpp')
+
+glsl_parser = custom_target(
+ 'glsl_parser',
+ input : 'glsl_parser.yy',
+ output : ['glsl_parser.cpp', 'glsl_parser.h'],
+ command : [prog_bison, '-o', '@OUTPUT0@', '-p', '_mesa_glsl_',
+ '--defines=@OUTPUT1@', '@INPUT@'],
+)
+
+glsl_lexer_cpp = custom_target(
+ 'glsl_lexer_cpp',
+ input : 'glsl_lexer.ll',
+ output : 'glsl_lexer.cpp',
+ command : [prog_flex, '-o', '@OUTPUT@', '@INPUT@'],
+)
+
+ir_expression_operation_constant_h = custom_target(
+ 'ir_expression_operation_constant.h',
+ input : 'ir_expression_operation.py',
+ output : 'ir_expression_operation_constant.h',
+ command : [prog_python2, '@INPUT@', 'constant'],
+ capture : true,
+)
+
+ir_expression_operation_strings_h = custom_target(
+ 'ir_expression_operation_strings.h',
+ input : 'ir_expression_operation.py',
+ output : 'ir_expression_operation_strings.h',
+ command : [prog_python2, '@INPUT@', 'strings'],
+ capture : true,
+)
+
+files_libglsl = files(
+ 'ast.h',
+ 'ast_array_index.cpp',
+ 'ast_expr.cpp',
+ 'ast_function.cpp',
+ 'ast_to_hir.cpp',
+ 'ast_type.cpp',
+ 'builtin_functions.cpp',
+ 'builtin_functions.h',
+ 'builtin_int64.h',
+ 'builtin_types.cpp',
+ 'builtin_variables.cpp',
+ 'generate_ir.cpp',
+ 'glsl_parser_extras.cpp',
+ 'glsl_parser_extras.h',
+ 'glsl_symbol_table.cpp',
+ 'glsl_symbol_table.h',
+ 'glsl_to_nir.cpp',
+ 'glsl_to_nir.h',
+ 'hir_field_selection.cpp',
+ 'ir_array_refcount.cpp',
+ 'ir_array_refcount.h',
+ 'ir_basic_block.cpp',
+ 'ir_basic_block.h',
+ 'ir_builder.cpp',
+ 'ir_builder.h',
+ 'ir_clone.cpp',
+ 'ir_constant_expression.cpp',
+ 'ir.cpp',
+ 'ir.h',
+ 'ir_equals.cpp',
+ 'ir_expression_flattening.cpp',
+ 'ir_expression_flattening.h',
+ 'ir_function_can_inline.cpp',
+ 'ir_function_detect_recursion.cpp',
+ 'ir_function_inlining.h',
+ 'ir_function.cpp',
+ 'ir_hierarchical_visitor.cpp',
+ 'ir_hierarchical_visitor.h',
+ 'ir_hv_accept.cpp',
+ 'ir_optimization.h',
+ 'ir_print_visitor.cpp',
+ 'ir_print_visitor.h',
+ 'ir_reader.cpp',
+ 'ir_reader.h',
+ 'ir_rvalue_visitor.cpp',
+ 'ir_rvalue_visitor.h',
+ 'ir_set_program_inouts.cpp',
+ 'ir_uniform.h',
+ 'ir_validate.cpp',
+ 'ir_variable_refcount.cpp',
+ 'ir_variable_refcount.h',
+ 'ir_visitor.h',
+ 'linker.cpp',
+ 'linker.h',
+ 'link_atomics.cpp',
+ 'link_functions.cpp',
+ 'link_interface_blocks.cpp',
+ 'link_uniforms.cpp',
+ 'link_uniform_initializers.cpp',
+ 'link_uniform_block_active_visitor.cpp',
+ 'link_uniform_block_active_visitor.h',
+ 'link_uniform_blocks.cpp',
+ 'link_varyings.cpp',
+ 'link_varyings.h',
+ 'list.h',
+ 'loop_analysis.cpp',
+ 'loop_analysis.h',
+ 'loop_unroll.cpp',
+ 'lower_blend_equation_advanced.cpp',
+ 'lower_buffer_access.cpp',
+ 'lower_buffer_access.h',
+ 'lower_const_arrays_to_uniforms.cpp',
+ 'lower_cs_derived.cpp',
+ 'lower_discard.cpp',
+ 'lower_discard_flow.cpp',
+ 'lower_distance.cpp',
+ 'lower_if_to_cond_assign.cpp',
+ 'lower_instructions.cpp',
+ 'lower_int64.cpp',
+ 'lower_jumps.cpp',
+ 'lower_mat_op_to_vec.cpp',
+ 'lower_noise.cpp',
+ 'lower_offset_array.cpp',
+ 'lower_packed_varyings.cpp',
+ 'lower_named_interface_blocks.cpp',
+ 'lower_packing_builtins.cpp',
+ 'lower_subroutine.cpp',
+ 'lower_tess_level.cpp',
+ 'lower_texture_projection.cpp',
+ 'lower_variable_index_to_cond_assign.cpp',
+ 'lower_vec_index_to_cond_assign.cpp',
+ 'lower_vec_index_to_swizzle.cpp',
+ 'lower_vector.cpp',
+ 'lower_vector_derefs.cpp',
+ 'lower_vector_insert.cpp',
+ 'lower_vertex_id.cpp',
+ 'lower_output_reads.cpp',
+ 'lower_shared_reference.cpp',
+ 'lower_ubo_reference.cpp',
+ 'opt_algebraic.cpp',
+ 'opt_array_splitting.cpp',
+ 'opt_conditional_discard.cpp',
+ 'opt_constant_folding.cpp',
+ 'opt_constant_propagation.cpp',
+ 'opt_constant_variable.cpp',
+ 'opt_copy_propagation.cpp',
+ 'opt_copy_propagation_elements.cpp',
+ 'opt_dead_builtin_variables.cpp',
+ 'opt_dead_builtin_varyings.cpp',
+ 'opt_dead_code.cpp',
+ 'opt_dead_code_local.cpp',
+ 'opt_dead_functions.cpp',
+ 'opt_flatten_nested_if_blocks.cpp',
+ 'opt_flip_matrices.cpp',
+ 'opt_function_inlining.cpp',
+ 'opt_if_simplification.cpp',
+ 'opt_minmax.cpp',
+ 'opt_noop_swizzle.cpp',
+ 'opt_rebalance_tree.cpp',
+ 'opt_redundant_jumps.cpp',
+ 'opt_structure_splitting.cpp',
+ 'opt_swizzle_swizzle.cpp',
+ 'opt_tree_grafting.cpp',
+ 'opt_vectorize.cpp',
+ 'program.h',
+ 'propagate_invariance.cpp',
+ 's_expression.cpp',
+ 's_expression.h',
+ 'string_to_uint_map.cpp',
+ 'string_to_uint_map.h',
+ 'shader_cache.cpp',
+ 'shader_cache.h',
+)
+
+files_libglsl_standalone = files(
+ 'ir_builder_print_visitor.cpp',
+ 'ir_builder_print_visitor.h',
+ 'opt_add_neg_to_sub.h',
+ 'standalone_scaffolding.cpp',
+ 'standalone_scaffolding.h',
+ 'standalone.cpp',
+ 'standalone.h',
+)
+
+libglsl = static_library(
+ 'glsl',
+ [files_libglsl, glsl_parser, glsl_lexer_cpp, ir_expression_operation_h,
+ ir_expression_operation_strings_h, ir_expression_operation_constant_h],
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ link_with : [libnir, libglcpp],
+ include_directories : [inc_common, inc_compiler, inc_nir],
+ build_by_default : false,
+)
+
+libglsl_standalone = static_library(
+ 'glsl_standalone',
+ [files_libglsl_standalone, ir_expression_operation_h],
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ include_directories : [inc_common],
+ link_with : [libglsl, libglsl_util, libmesa_util],
+ dependencies : [dep_thread],
+ build_by_default : false,
+)
+
+glsl_compiler = executable(
+ 'glsl_compiler',
+ 'main.cpp',
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ dependencies : [dep_clock],
+ include_directories : [inc_common],
+ link_with : [libglsl_standalone],
+ build_by_default : false,
+)
+
+glsl_test = executable(
+ 'glsl_test',
+ ['test.cpp', 'test_optpass.cpp', 'test_optpass.h',
+ ir_expression_operation_h],
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ include_directories : [inc_common],
+ dependencies : [dep_clock, dep_thread],
+ link_with : [libglsl, libglsl_standalone, libglsl_util],
+ build_by_default : false,
+)
+
+if with_tests
+ subdir('tests')
+endif
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_algebraic.cpp mesa-17.3.3/src/compiler/glsl/opt_algebraic.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_algebraic.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_algebraic.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -263,9 +263,11 @@
ir2->operands[1]->type->is_matrix())
return false;
+ void *mem_ctx = ralloc_parent(ir2);
+
ir_constant *ir2_const[2];
- ir2_const[0] = ir2->operands[0]->constant_expression_value();
- ir2_const[1] = ir2->operands[1]->constant_expression_value();
+ ir2_const[0] = ir2->operands[0]->constant_expression_value(mem_ctx);
+ ir2_const[1] = ir2->operands[1]->constant_expression_value(mem_ctx);
if (ir2_const[0] && ir2_const[1])
return false;
@@ -328,12 +330,13 @@
}
}
- assert(ir->get_num_operands() <= 4);
- for (unsigned i = 0; i < ir->get_num_operands(); i++) {
+ assert(ir->num_operands <= 4);
+ for (unsigned i = 0; i < ir->num_operands; i++) {
if (ir->operands[i]->type->is_matrix())
return ir;
- op_const[i] = ir->operands[i]->constant_expression_value();
+ op_const[i] =
+ ir->operands[i]->constant_expression_value(ralloc_parent(ir));
op_expr[i] = ir->operands[i]->as_expression();
}
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_array_splitting.cpp mesa-17.3.3/src/compiler/glsl/opt_array_splitting.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_array_splitting.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_array_splitting.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -195,7 +195,7 @@
}
ir_visitor_status
-ir_array_reference_visitor::visit_leave(ir_assignment *ir)
+ir_array_reference_visitor::visit_leave(ir_assignment *)
{
in_whole_array_copy = false;
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_constant_folding.cpp mesa-17.3.3/src/compiler/glsl/opt_constant_folding.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_constant_folding.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_constant_folding.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -74,7 +74,7 @@
*/
ir_expression *expr = (*rvalue)->as_expression();
if (expr) {
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ for (unsigned int i = 0; i < expr->num_operands; i++) {
if (!expr->operands[i]->as_constant())
return false;
}
@@ -100,7 +100,8 @@
if (var_ref)
return false;
- ir_constant *constant = (*rvalue)->constant_expression_value();
+ ir_constant *constant =
+ (*rvalue)->constant_expression_value(ralloc_parent(*rvalue));
if (constant) {
*rvalue = constant;
return true;
@@ -189,7 +190,7 @@
}
/* Next, see if the call can be replaced with an assignment of a constant */
- ir_constant *const_val = ir->constant_expression_value();
+ ir_constant *const_val = ir->constant_expression_value(ralloc_parent(ir));
if (const_val != NULL) {
ir_assignment *assignment =
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_constant_propagation.cpp mesa-17.3.3/src/compiler/glsl/opt_constant_propagation.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_constant_propagation.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_constant_propagation.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -154,7 +154,8 @@
ir_dereference_variable *var_ref = (*rvalue)->as_dereference_variable();
if (var_ref && !var_ref->type->is_array()) {
- ir_constant *constant = var_ref->constant_expression_value();
+ ir_constant *constant =
+ var_ref->constant_expression_value(ralloc_parent(var_ref));
if (constant) {
*rvalue = constant;
this->progress = true;
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_constant_variable.cpp mesa-17.3.3/src/compiler/glsl/opt_constant_variable.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_constant_variable.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_constant_variable.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -131,7 +131,7 @@
var->data.mode == ir_var_shader_shared)
return visit_continue;
- constval = ir->rhs->constant_expression_value();
+ constval = ir->rhs->constant_expression_value(ralloc_parent(ir));
if (!constval)
return visit_continue;
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_copy_propagation.cpp mesa-17.3.3/src/compiler/glsl/opt_copy_propagation.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_copy_propagation.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_copy_propagation.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -348,20 +348,13 @@
ir_variable *lhs_var = ir->whole_variable_written();
ir_variable *rhs_var = ir->rhs->whole_variable_referenced();
- if ((lhs_var != NULL) && (rhs_var != NULL)) {
- if (lhs_var == rhs_var) {
- /* This is a dumb assignment, but we've conveniently noticed
- * it here. Removing it now would mess up the loop iteration
- * calling us. Just flag it to not execute, and someone else
- * will clean up the mess.
- */
- ir->condition = new(ralloc_parent(ir)) ir_constant(false);
- this->progress = true;
- } else if (lhs_var->data.mode != ir_var_shader_storage &&
- lhs_var->data.mode != ir_var_shader_shared &&
- lhs_var->data.precise == rhs_var->data.precise) {
- assert(lhs_var);
- assert(rhs_var);
+ /* Don't try to remove a dumb assignment of a variable to itself. Removing
+ * it now would mess up the loop iteration calling us.
+ */
+ if (lhs_var != NULL && rhs_var != NULL && lhs_var != rhs_var) {
+ if (lhs_var->data.mode != ir_var_shader_storage &&
+ lhs_var->data.mode != ir_var_shader_shared &&
+ lhs_var->data.precise == rhs_var->data.precise) {
_mesa_hash_table_insert(acp, lhs_var, rhs_var);
}
}
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_dead_builtin_variables.cpp mesa-17.3.3/src/compiler/glsl/opt_dead_builtin_variables.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_dead_builtin_variables.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_dead_builtin_variables.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -62,23 +62,6 @@
* information, so removing these variables from the user shader will
* cause problems later.
*
- * For compute shaders, gl_GlobalInvocationID has some dependencies, so
- * we avoid removing these dependencies.
- *
- * We also avoid removing gl_GlobalInvocationID at this stage because it
- * might be used by a linked shader. In this case it still needs to be
- * initialized by the main function.
- *
- * gl_GlobalInvocationID =
- * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
- *
- * Similarly, we initialize gl_LocalInvocationIndex in the main function:
- *
- * gl_LocalInvocationIndex =
- * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
- * gl_LocalInvocationID.y * gl_WorkGroupSize.x +
- * gl_LocalInvocationID.x;
- *
* Matrix uniforms with "Transpose" are not eliminated because there's
* an optimization pass that can turn references to the regular matrix
* into references to the transpose matrix. Eliminating the transpose
@@ -90,11 +73,6 @@
*/
if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
|| strcmp(var->name, "gl_Vertex") == 0
- || strcmp(var->name, "gl_WorkGroupID") == 0
- || strcmp(var->name, "gl_WorkGroupSize") == 0
- || strcmp(var->name, "gl_LocalInvocationID") == 0
- || strcmp(var->name, "gl_GlobalInvocationID") == 0
- || strcmp(var->name, "gl_LocalInvocationIndex") == 0
|| strstr(var->name, "Transpose") != NULL)
continue;
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_dead_code_local.cpp mesa-17.3.3/src/compiler/glsl/opt_dead_code_local.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_dead_code_local.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_dead_code_local.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -173,6 +173,17 @@
bool progress = false;
kill_for_derefs_visitor v(assignments);
+ if (ir->condition == NULL) {
+ /* If this is an assignment of the form "foo = foo;", remove the whole
+ * instruction and be done with it.
+ */
+ const ir_variable *const lhs_var = ir->whole_variable_written();
+ if (lhs_var != NULL && lhs_var == ir->rhs->whole_variable_referenced()) {
+ ir->remove();
+ return true;
+ }
+ }
+
/* Kill assignment entries for things used to produce this assignment. */
ir->rhs->accept(&v);
if (ir->condition) {
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_function_inlining.cpp mesa-17.3.3/src/compiler/glsl/opt_function_inlining.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_function_inlining.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_function_inlining.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -89,7 +89,7 @@
if (ret) {
if (ret->value) {
ir_rvalue *lhs = orig_deref->clone(ctx, NULL);
- ret->replace_with(new(ctx) ir_assignment(lhs, ret->value, NULL));
+ ret->replace_with(new(ctx) ir_assignment(lhs, ret->value));
} else {
/* un-valued return has to be the last return, or we shouldn't
* have reached here. (see can_inline()).
@@ -121,7 +121,7 @@
base_ir->insert_before(index);
assignment = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(index),
- deref->array_index, 0);
+ deref->array_index);
base_ir->insert_before(assignment);
deref->array_index = new(ctx) ir_dereference_variable(index);
@@ -199,7 +199,7 @@
ir_assignment *assign;
assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
- param, NULL);
+ param);
next_ir->insert_before(assign);
} else {
assert(sig_param->data.mode == ir_var_function_out ||
@@ -215,7 +215,7 @@
ir_assignment *assign;
assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
- param->clone(ctx, NULL)->as_rvalue(), NULL);
+ param->clone(ctx, NULL)->as_rvalue());
next_ir->insert_before(assign);
}
}
@@ -268,8 +268,7 @@
ir_assignment *assign;
assign = new(ctx) ir_assignment(param,
- new(ctx) ir_dereference_variable(parameters[i]),
- NULL);
+ new(ctx) ir_dereference_variable(parameters[i]));
next_ir->insert_before(assign);
}
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_if_simplification.cpp mesa-17.3.3/src/compiler/glsl/opt_if_simplification.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_if_simplification.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_if_simplification.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -84,7 +84,8 @@
* FINISHME: This can probably be done with some flags, but it would take
* FINISHME: some work to get right.
*/
- ir_constant *condition_constant = ir->condition->constant_expression_value();
+ ir_constant *condition_constant =
+ ir->condition->constant_expression_value(ralloc_parent(ir));
if (condition_constant) {
/* Move the contents of the one branch of the conditional
* that matters out.
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_structure_splitting.cpp mesa-17.3.3/src/compiler/glsl/opt_structure_splitting.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_structure_splitting.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_structure_splitting.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -233,13 +233,9 @@
if (!entry)
return;
- unsigned int i;
- for (i = 0; i < entry->var->type->length; i++) {
- if (strcmp(deref_record->field,
- entry->var->type->fields.structure[i].name) == 0)
- break;
- }
- assert(i != entry->var->type->length);
+ int i = deref_record->field_idx;
+ assert(i >= 0);
+ assert((unsigned) i < entry->var->type->length);
*deref = new(entry->mem_ctx) ir_dereference_variable(entry->components[i]);
}
@@ -289,9 +285,7 @@
type->fields.structure[i].name);
}
- ir->insert_before(new(mem_ctx) ir_assignment(new_lhs,
- new_rhs,
- NULL));
+ ir->insert_before(new(mem_ctx) ir_assignment(new_lhs, new_rhs));
}
ir->remove();
} else {
diff -Nru mesa-17.2.4/src/compiler/glsl/opt_tree_grafting.cpp mesa-17.3.3/src/compiler/glsl/opt_tree_grafting.cpp
--- mesa-17.2.4/src/compiler/glsl/opt_tree_grafting.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/opt_tree_grafting.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -232,7 +232,7 @@
ir_visitor_status
ir_tree_grafting_visitor::visit_enter(ir_expression *ir)
{
- for (unsigned int i = 0; i < ir->get_num_operands(); i++) {
+ for (unsigned int i = 0; i < ir->num_operands; i++) {
if (do_graft(&ir->operands[i]))
return visit_stop;
}
diff -Nru mesa-17.2.4/src/compiler/glsl/shader_cache.cpp mesa-17.3.3/src/compiler/glsl/shader_cache.cpp
--- mesa-17.2.4/src/compiler/glsl/shader_cache.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/shader_cache.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -670,7 +670,7 @@
prog->data->NumUniformStorage = blob_read_uint32(metadata);
prog->data->NumUniformDataSlots = blob_read_uint32(metadata);
- uniforms = rzalloc_array(prog, struct gl_uniform_storage,
+ uniforms = rzalloc_array(prog->data, struct gl_uniform_storage,
prog->data->NumUniformStorage);
prog->data->UniformStorage = uniforms;
@@ -1126,7 +1126,7 @@
prog->data->NumProgramResourceList = blob_read_uint32(metadata);
prog->data->ProgramResourceList =
- ralloc_array(prog, gl_program_resource,
+ ralloc_array(prog->data, gl_program_resource,
prog->data->NumProgramResourceList);
for (unsigned i = 0; i < prog->data->NumProgramResourceList; i++) {
@@ -1356,70 +1356,85 @@
if (memcmp(prog->data->sha1, zero, sizeof(prog->data->sha1)) == 0)
return;
- struct blob *metadata = blob_create();
+ struct blob metadata;
+ blob_init(&metadata);
- write_uniforms(metadata, prog);
+ write_uniforms(&metadata, prog);
- write_hash_tables(metadata, prog);
+ write_hash_tables(&metadata, prog);
- blob_write_uint32(metadata, prog->data->Version);
- blob_write_uint32(metadata, prog->data->linked_stages);
+ blob_write_uint32(&metadata, prog->data->Version);
+ blob_write_uint32(&metadata, prog->data->linked_stages);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_linked_shader *sh = prog->_LinkedShaders[i];
if (sh) {
- write_shader_metadata(metadata, sh);
+ write_shader_metadata(&metadata, sh);
if (sh->Program->info.name)
- blob_write_string(metadata, sh->Program->info.name);
+ blob_write_string(&metadata, sh->Program->info.name);
else
- blob_write_string(metadata, "");
+ blob_write_string(&metadata, "");
if (sh->Program->info.label)
- blob_write_string(metadata, sh->Program->info.label);
+ blob_write_string(&metadata, sh->Program->info.label);
else
- blob_write_string(metadata, "");
+ blob_write_string(&metadata, "");
size_t s_info_size, s_info_ptrs;
get_shader_info_and_pointer_sizes(&s_info_size, &s_info_ptrs,
&sh->Program->info);
/* Store shader info */
- blob_write_bytes(metadata,
+ blob_write_bytes(&metadata,
((char *) &sh->Program->info) + s_info_ptrs,
s_info_size - s_info_ptrs);
}
}
- write_xfb(metadata, prog);
+ write_xfb(&metadata, prog);
- write_uniform_remap_tables(metadata, prog);
+ write_uniform_remap_tables(&metadata, prog);
- write_atomic_buffers(metadata, prog);
+ write_atomic_buffers(&metadata, prog);
- write_buffer_blocks(metadata, prog);
+ write_buffer_blocks(&metadata, prog);
- write_subroutines(metadata, prog);
+ write_subroutines(&metadata, prog);
- write_program_resource_list(metadata, prog);
+ write_program_resource_list(&metadata, prog);
+
+ struct cache_item_metadata cache_item_metadata;
+ cache_item_metadata.type = CACHE_ITEM_TYPE_GLSL;
+ cache_item_metadata.keys =
+ (cache_key *) malloc(prog->NumShaders * sizeof(cache_key));
+ cache_item_metadata.num_keys = prog->NumShaders;
+
+ if (!cache_item_metadata.keys)
+ goto fail;
char sha1_buf[41];
for (unsigned i = 0; i < prog->NumShaders; i++) {
disk_cache_put_key(cache, prog->Shaders[i]->sha1);
+ memcpy(cache_item_metadata.keys[i], prog->Shaders[i]->sha1,
+ sizeof(cache_key));
if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
_mesa_sha1_format(sha1_buf, prog->Shaders[i]->sha1);
fprintf(stderr, "marking shader: %s\n", sha1_buf);
}
}
- disk_cache_put(cache, prog->data->sha1, metadata->data, metadata->size);
-
- blob_destroy(metadata);
+ disk_cache_put(cache, prog->data->sha1, metadata.data, metadata.size,
+ &cache_item_metadata);
if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
_mesa_sha1_format(sha1_buf, prog->data->sha1);
fprintf(stderr, "putting program metadata in cache: %s\n", sha1_buf);
}
+
+fail:
+ free(cache_item_metadata.keys);
+ blob_finish(&metadata);
}
bool
@@ -1433,7 +1448,7 @@
return false;
struct disk_cache *cache = ctx->Cache;
- if (!cache || prog->data->cache_fallback || prog->data->skip_cache)
+ if (!cache || prog->data->skip_cache)
return false;
/* Include bindings when creating sha1. These bindings change the resulting
diff -Nru mesa-17.2.4/src/compiler/glsl/standalone.cpp mesa-17.3.3/src/compiler/glsl/standalone.cpp
--- mesa-17.2.4/src/compiler/glsl/standalone.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/standalone.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -100,7 +100,7 @@
set *variables;
};
-void
+static void
init_gl_program(struct gl_program *prog, GLenum target, bool is_arb_asm)
{
prog->RefCount = 1;
@@ -108,7 +108,7 @@
prog->is_arb_asm = is_arb_asm;
}
-struct gl_program *
+static struct gl_program *
new_program(struct gl_context *ctx, GLenum target, GLuint id, bool is_arb_asm)
{
switch (target) {
@@ -253,6 +253,7 @@
case 430:
case 440:
case 450:
+ case 460:
ctx->Const.MaxClipPlanes = 8;
ctx->Const.MaxDrawBuffers = 8;
ctx->Const.MinProgramTexelOffset = -8;
@@ -373,7 +374,7 @@
return text;
}
-void
+static void
compile_shader(struct gl_context *ctx, struct gl_shader *shader)
{
struct _mesa_glsl_parse_state *state =
@@ -418,6 +419,7 @@
case 430:
case 440:
case 450:
+ case 460:
glsl_es = false;
break;
default:
diff -Nru mesa-17.2.4/src/compiler/glsl/tests/blob_test.c mesa-17.3.3/src/compiler/glsl/tests/blob_test.c
--- mesa-17.2.4/src/compiler/glsl/tests/blob_test.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/tests/blob_test.c 2018-01-18 21:30:28.000000000 +0000
@@ -83,7 +83,7 @@
}
static void
-expect_equal_bytes(uint8_t *expected, uint8_t *actual,
+expect_equal_bytes(uint8_t *expected, const uint8_t *actual,
size_t num_bytes, const char *test)
{
size_t i;
@@ -118,44 +118,44 @@
static void
test_write_and_read_functions (void)
{
- struct blob *blob;
+ struct blob blob;
struct blob_reader reader;
- uint8_t *reserved;
+ ssize_t reserved;
size_t str_offset, uint_offset;
uint8_t reserve_buf[sizeof(reserve_test_str)];
- blob = blob_create();
+ blob_init(&blob);
/*** Test blob by writing one of every possible kind of value. */
- blob_write_bytes(blob, bytes_test_str, sizeof(bytes_test_str));
+ blob_write_bytes(&blob, bytes_test_str, sizeof(bytes_test_str));
- reserved = blob_reserve_bytes(blob, sizeof(reserve_test_str));
- memcpy(reserved, reserve_test_str, sizeof(reserve_test_str));
+ reserved = blob_reserve_bytes(&blob, sizeof(reserve_test_str));
+ blob_overwrite_bytes(&blob, reserved, reserve_test_str, sizeof(reserve_test_str));
/* Write a placeholder, (to be replaced later via overwrite_bytes) */
- str_offset = blob->size;
- blob_write_bytes(blob, placeholder_str, sizeof(placeholder_str));
+ str_offset = blob.size;
+ blob_write_bytes(&blob, placeholder_str, sizeof(placeholder_str));
- blob_write_uint32(blob, uint32_test);
+ blob_write_uint32(&blob, uint32_test);
/* Write a placeholder, (to be replaced later via overwrite_uint32) */
- uint_offset = blob->size;
- blob_write_uint32(blob, uint32_placeholder);
+ uint_offset = blob.size;
+ blob_write_uint32(&blob, uint32_placeholder);
- blob_write_uint64(blob, uint64_test);
+ blob_write_uint64(&blob, uint64_test);
- blob_write_intptr(blob, (intptr_t) blob);
+ blob_write_intptr(&blob, (intptr_t) &blob);
- blob_write_string(blob, string_test_str);
+ blob_write_string(&blob, string_test_str);
/* Finally, overwrite our placeholders. */
- blob_overwrite_bytes(blob, str_offset, overwrite_test_str,
+ blob_overwrite_bytes(&blob, str_offset, overwrite_test_str,
sizeof(overwrite_test_str));
- blob_overwrite_uint32(blob, uint_offset, uint32_overwrite);
+ blob_overwrite_uint32(&blob, uint_offset, uint32_overwrite);
/*** Now read each value and verify. */
- blob_reader_init(&reader, blob->data, blob->size);
+ blob_reader_init(&reader, blob.data, blob.size);
expect_equal_str(bytes_test_str,
blob_read_bytes(&reader, sizeof(bytes_test_str)),
@@ -175,7 +175,7 @@
"blob_overwrite_uint32");
expect_equal(uint64_test, blob_read_uint64(&reader),
"blob_write/read_uint64");
- expect_equal((intptr_t) blob, blob_read_intptr(&reader),
+ expect_equal((intptr_t) &blob, blob_read_intptr(&reader),
"blob_write/read_intptr");
expect_equal_str(string_test_str, blob_read_string(&reader),
"blob_write/read_string");
@@ -184,28 +184,28 @@
"read_consumes_all_bytes");
expect_equal(false, reader.overrun, "read_does_not_overrun");
- blob_destroy(blob);
+ blob_finish(&blob);
}
/* Test that data values are written and read with proper alignment. */
static void
test_alignment(void)
{
- struct blob *blob;
+ struct blob blob;
struct blob_reader reader;
uint8_t bytes[] = "ABCDEFGHIJKLMNOP";
size_t delta, last, num_bytes;
- blob = blob_create();
+ blob_init(&blob);
/* First, write an intptr value to the blob and capture that size. This is
* the expected offset between any pair of intptr values (if written with
* alignment).
*/
- blob_write_intptr(blob, (intptr_t) blob);
+ blob_write_intptr(&blob, (intptr_t) &blob);
- delta = blob->size;
- last = blob->size;
+ delta = blob.size;
+ last = blob.size;
/* Then loop doing the following:
*
@@ -215,56 +215,56 @@
* 2. Verify that that write results in an aligned size
*/
for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) {
- blob_write_bytes(blob, bytes, num_bytes);
+ blob_write_bytes(&blob, bytes, num_bytes);
- expect_unequal(delta, blob->size - last, "unaligned write of bytes");
+ expect_unequal(delta, blob.size - last, "unaligned write of bytes");
- blob_write_intptr(blob, (intptr_t) blob);
+ blob_write_intptr(&blob, (intptr_t) &blob);
- expect_equal(2 * delta, blob->size - last, "aligned write of intptr");
+ expect_equal(2 * delta, blob.size - last, "aligned write of intptr");
- last = blob->size;
+ last = blob.size;
}
/* Finally, test that reading also does proper alignment. Since we know
* that values were written with all the right alignment, all we have to do
* here is verify that correct values are read.
*/
- blob_reader_init(&reader, blob->data, blob->size);
+ blob_reader_init(&reader, blob.data, blob.size);
- expect_equal((intptr_t) blob, blob_read_intptr(&reader),
+ expect_equal((intptr_t) &blob, blob_read_intptr(&reader),
"read of initial, aligned intptr_t");
for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) {
expect_equal_bytes(bytes, blob_read_bytes(&reader, num_bytes),
num_bytes, "unaligned read of bytes");
- expect_equal((intptr_t) blob, blob_read_intptr(&reader),
+ expect_equal((intptr_t) &blob, blob_read_intptr(&reader),
"aligned read of intptr_t");
}
- blob_destroy(blob);
+ blob_finish(&blob);
}
/* Test that we detect overrun. */
static void
test_overrun(void)
{
- struct blob *blob;
+ struct blob blob;
struct blob_reader reader;
uint32_t value = 0xdeadbeef;
- blob = blob_create();
+ blob_init(&blob);
- blob_write_uint32(blob, value);
+ blob_write_uint32(&blob, value);
- blob_reader_init(&reader, blob->data, blob->size);
+ blob_reader_init(&reader, blob.data, blob.size);
expect_equal(value, blob_read_uint32(&reader), "read before overrun");
expect_equal(false, reader.overrun, "overrun flag not set");
expect_equal(0, blob_read_uint32(&reader), "read at overrun");
expect_equal(true, reader.overrun, "overrun flag set");
- blob_destroy(blob);
+ blob_finish(&blob);
}
/* Test that we can read and write some large objects, (exercising the code in
@@ -274,14 +274,14 @@
test_big_objects(void)
{
void *ctx = ralloc_context(NULL);
- struct blob *blob;
+ struct blob blob;
struct blob_reader reader;
int size = 1000;
int count = 1000;
size_t i;
char *buf;
- blob = blob_create();
+ blob_init(&blob);
/* Initialize our buffer. */
buf = ralloc_size(ctx, size);
@@ -291,10 +291,10 @@
/* Write it many times. */
for (i = 0; i < count; i++) {
- blob_write_bytes(blob, buf, size);
+ blob_write_bytes(&blob, buf, size);
}
- blob_reader_init(&reader, blob->data, blob->size);
+ blob_reader_init(&reader, blob.data, blob.size);
/* Read and verify it many times. */
for (i = 0; i < count; i++) {
@@ -308,7 +308,7 @@
expect_equal(false, reader.overrun,
"overrun flag not set reading large objects");
- blob_destroy(blob);
+ blob_finish(&blob);
ralloc_free(ctx);
}
diff -Nru mesa-17.2.4/src/compiler/glsl/tests/cache_test.c mesa-17.3.3/src/compiler/glsl/tests/cache_test.c
--- mesa-17.2.4/src/compiler/glsl/tests/cache_test.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/tests/cache_test.c 2018-01-18 21:30:28.000000000 +0000
@@ -156,9 +156,9 @@
int err;
/* Before doing anything else, ensure that with
- * MESA_GLSL_CACHE_DISABLE set, that disk_cache_create returns NULL.
+ * MESA_GLSL_CACHE_DISABLE set to true, that disk_cache_create returns NULL.
*/
- setenv("MESA_GLSL_CACHE_DISABLE", "1", 1);
+ setenv("MESA_GLSL_CACHE_DISABLE", "true", 1);
cache = disk_cache_create("test", "make_check", 0);
expect_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DISABLE set");
@@ -185,7 +185,8 @@
cache = disk_cache_create("test", "make_check", 0);
expect_non_null(cache, "disk_cache_create with XDG_CACHE_HOME set");
- check_directories_created(CACHE_TEST_TMP "/xdg-cache-home/mesa");
+ check_directories_created(CACHE_TEST_TMP "/xdg-cache-home/"
+ CACHE_DIR_NAME);
disk_cache_destroy(cache);
@@ -202,7 +203,8 @@
cache = disk_cache_create("test", "make_check", 0);
expect_non_null(cache, "disk_cache_create with MESA_GLSL_CACHE_DIR set");
- check_directories_created(CACHE_TEST_TMP "/mesa-glsl-cache-dir/mesa");
+ check_directories_created(CACHE_TEST_TMP "/mesa-glsl-cache-dir/"
+ CACHE_DIR_NAME);
disk_cache_destroy(cache);
}
@@ -266,7 +268,7 @@
expect_equal(size, 0, "disk_cache_get with non-existent item (size)");
/* Simple test of put and get. */
- disk_cache_put(cache, blob_key, blob, sizeof(blob));
+ disk_cache_put(cache, blob_key, blob, sizeof(blob), NULL);
/* disk_cache_put() hands things off to a thread give it some time to
* finish.
@@ -281,7 +283,7 @@
/* Test put and get of a second item. */
disk_cache_compute_key(cache, string, sizeof(string), string_key);
- disk_cache_put(cache, string_key, string, sizeof(string));
+ disk_cache_put(cache, string_key, string, sizeof(string), NULL);
/* disk_cache_put() hands things off to a thread give it some time to
* finish.
@@ -322,7 +324,7 @@
disk_cache_compute_key(cache, one_KB, 1024, one_KB_key);
one_KB_key[0] = blob_key[0];
- disk_cache_put(cache, one_KB_key, one_KB, 1024);
+ disk_cache_put(cache, one_KB_key, one_KB, 1024, NULL);
free(one_KB);
@@ -365,8 +367,8 @@
setenv("MESA_GLSL_CACHE_MAX_SIZE", "1M", 1);
cache = disk_cache_create("test", "make_check", 0);
- disk_cache_put(cache, blob_key, blob, sizeof(blob));
- disk_cache_put(cache, string_key, string, sizeof(string));
+ disk_cache_put(cache, blob_key, blob, sizeof(blob), NULL);
+ disk_cache_put(cache, string_key, string, sizeof(string), NULL);
/* disk_cache_put() hands things off to a thread give it some time to
* finish.
@@ -392,7 +394,7 @@
disk_cache_compute_key(cache, one_MB, 1024 * 1024, one_MB_key);
one_MB_key[0] = blob_key[0];
- disk_cache_put(cache, one_MB_key, one_MB, 1024 * 1024);
+ disk_cache_put(cache, one_MB_key, one_MB, 1024 * 1024, NULL);
free(one_MB);
diff -Nru mesa-17.2.4/src/compiler/glsl/tests/meson.build mesa-17.3.3/src/compiler/glsl/tests/meson.build
--- mesa-17.2.4/src/compiler/glsl/tests/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/tests/meson.build 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,76 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+glsl_blob_test = executable(
+ 'blob_test',
+ 'blob_test.c',
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ include_directories : [inc_common, inc_compiler],
+ link_with : [libglsl],
+)
+
+glsl_cache_test = executable(
+ 'cache_test',
+ 'cache_test.c',
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ include_directories : [inc_common, inc_glsl],
+ link_with : [libglsl],
+ dependencies : [dep_clock, dep_thread],
+)
+
+glsl_general_ir_test = executable(
+ 'general_ir_test',
+ ['array_refcount_test.cpp', 'builtin_variable_test.cpp',
+ 'invalidate_locations_test.cpp', 'general_ir_test.cpp',
+ 'lower_int64_test.cpp', 'opt_add_neg_to_sub_test.cpp', 'varyings_test.cpp',
+ ir_expression_operation_h],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ include_directories : [inc_common, inc_glsl],
+ link_with : [libglsl, libglsl_standalone, libglsl_util],
+ dependencies : [dep_clock, dep_thread, idep_gtest],
+)
+
+glsl_uniform_initializer_test = executable(
+ 'uniform_initializer_test',
+ ['copy_constant_to_storage_tests.cpp', 'set_uniform_initializer_tests.cpp',
+ 'uniform_initializer_utils.cpp', 'uniform_initializer_utils.h',
+ ir_expression_operation_h],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ include_directories : [inc_common, inc_glsl],
+ link_with : [libglsl, libglsl_util],
+ dependencies : [dep_thread, idep_gtest],
+)
+
+glsl_sampler_types_test = executable(
+ 'sampler_types_test',
+ ['sampler_types_test.cpp', ir_expression_operation_h],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ include_directories : [inc_common, inc_glsl],
+ link_with : [libglsl, libglsl_util],
+ dependencies : [dep_thread, idep_gtest],
+)
+
+test('blob_test', glsl_blob_test)
+test('cache_test', glsl_cache_test)
+test('general_ir_test', glsl_general_ir_test)
+test('uniform_initializer_test', glsl_uniform_initializer_test)
+test('sampler_types_test', glsl_sampler_types_test)
+
+# TODO: figure out how to get the shell based tests to work?
diff -Nru mesa-17.2.4/src/compiler/glsl/tests/uniform_initializer_utils.cpp mesa-17.3.3/src/compiler/glsl/tests/uniform_initializer_utils.cpp
--- mesa-17.2.4/src/compiler/glsl/tests/uniform_initializer_utils.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl/tests/uniform_initializer_utils.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -215,11 +215,11 @@
unsigned int boolean_true)
{
if (val->type->is_array()) {
- const glsl_type *const element_type = val->array_elements[0]->type;
+ const glsl_type *const element_type = val->const_elements[0]->type;
for (unsigned i = 0; i < storage_array_size; i++) {
verify_data(storage + (i * element_type->components()), 0,
- val->array_elements[i], 0, boolean_true);
+ val->const_elements[i], 0, boolean_true);
}
const unsigned components = element_type->components();
diff -Nru mesa-17.2.4/src/compiler/glsl_types.cpp mesa-17.3.3/src/compiler/glsl_types.cpp
--- mesa-17.2.4/src/compiler/glsl_types.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl_types.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -1999,10 +1999,12 @@
case GLSL_TYPE_ARRAY:
return this->length * this->fields.array->count_attribute_slots(is_vertex_input);
+ case GLSL_TYPE_SUBROUTINE:
+ return 1;
+
case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_VOID:
- case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_ERROR:
break;
}
diff -Nru mesa-17.2.4/src/compiler/glsl_types.h mesa-17.3.3/src/compiler/glsl_types.h
--- mesa-17.2.4/src/compiler/glsl_types.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/glsl_types.h 2018-01-18 21:30:28.000000000 +0000
@@ -28,6 +28,8 @@
#include
#include
+#include "shader_enums.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -101,13 +103,6 @@
GLSL_SAMPLER_DIM_SUBPASS_MS, /* for multisampled vulkan input attachments */
};
-enum glsl_interface_packing {
- GLSL_INTERFACE_PACKING_STD140,
- GLSL_INTERFACE_PACKING_SHARED,
- GLSL_INTERFACE_PACKING_PACKED,
- GLSL_INTERFACE_PACKING_STD430
-};
-
enum glsl_matrix_layout {
/**
* The layout of the matrix is inherited from the object containing the
@@ -822,6 +817,27 @@
}
/**
+ * Get the type interface packing used internally. For shared and packing
+ * layouts this is implementation defined.
+ */
+ enum glsl_interface_packing get_internal_ifc_packing(bool std430_supported) const
+ {
+ enum glsl_interface_packing packing = this->get_interface_packing();
+ if (packing == GLSL_INTERFACE_PACKING_STD140 ||
+ (!std430_supported &&
+ (packing == GLSL_INTERFACE_PACKING_SHARED ||
+ packing == GLSL_INTERFACE_PACKING_PACKED))) {
+ return GLSL_INTERFACE_PACKING_STD140;
+ } else {
+ assert(packing == GLSL_INTERFACE_PACKING_STD430 ||
+ (std430_supported &&
+ (packing == GLSL_INTERFACE_PACKING_SHARED ||
+ packing == GLSL_INTERFACE_PACKING_PACKED)));
+ return GLSL_INTERFACE_PACKING_STD430;
+ }
+ }
+
+ /**
* Check if the type interface is row major
*/
bool get_interface_row_major() const
diff -Nru mesa-17.2.4/src/compiler/Makefile.am mesa-17.3.3/src/compiler/Makefile.am
--- mesa-17.2.4/src/compiler/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -42,6 +42,7 @@
AM_CFLAGS = \
$(VISIBILITY_CFLAGS) \
+ $(WNO_OVERRIDE_INIT) \
$(MSVC2013_COMPAT_CFLAGS)
AM_CXXFLAGS = \
diff -Nru mesa-17.2.4/src/compiler/Makefile.in mesa-17.3.3/src/compiler/Makefile.in
--- mesa-17.2.4/src/compiler/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/compiler/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -177,7 +177,8 @@
noinst_PROGRAMS = glsl_compiler$(EXEEXT) spirv2nir$(EXEEXT)
subdir = src/compiler
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -209,7 +210,7 @@
am__objects_2 = glsl/glsl_lexer.lo glsl/glsl_parser.lo
am__objects_3 = glsl/ast_array_index.lo glsl/ast_expr.lo \
glsl/ast_function.lo glsl/ast_to_hir.lo glsl/ast_type.lo \
- glsl/blob.lo glsl/builtin_functions.lo glsl/builtin_types.lo \
+ glsl/builtin_functions.lo glsl/builtin_types.lo \
glsl/builtin_variables.lo glsl/generate_ir.lo \
glsl/glsl_parser_extras.lo glsl/glsl_symbol_table.lo \
glsl/glsl_to_nir.lo glsl/hir_field_selection.lo \
@@ -228,10 +229,11 @@
glsl/link_uniform_initializers.lo \
glsl/link_uniform_block_active_visitor.lo \
glsl/link_uniform_blocks.lo glsl/link_varyings.lo \
- glsl/loop_analysis.lo glsl/loop_controls.lo \
- glsl/loop_unroll.lo glsl/lower_blend_equation_advanced.lo \
+ glsl/loop_analysis.lo glsl/loop_unroll.lo \
+ glsl/lower_blend_equation_advanced.lo \
glsl/lower_buffer_access.lo \
- glsl/lower_const_arrays_to_uniforms.lo glsl/lower_discard.lo \
+ glsl/lower_const_arrays_to_uniforms.lo \
+ glsl/lower_cs_derived.lo glsl/lower_discard.lo \
glsl/lower_discard_flow.lo glsl/lower_distance.lo \
glsl/lower_if_to_cond_assign.lo glsl/lower_instructions.lo \
glsl/lower_int64.lo glsl/lower_jumps.lo \
@@ -275,7 +277,7 @@
am_glsl_libstandalone_la_OBJECTS = $(am__objects_5)
glsl_libstandalone_la_OBJECTS = $(am_glsl_libstandalone_la_OBJECTS)
libcompiler_la_LIBADD =
-am__objects_6 = glsl_types.lo nir_types.lo shader_enums.lo
+am__objects_6 = blob.lo glsl_types.lo nir_types.lo shader_enums.lo
am_libcompiler_la_OBJECTS = $(am__objects_6)
libcompiler_la_OBJECTS = $(am_libcompiler_la_OBJECTS)
nir_libnir_la_DEPENDENCIES = libcompiler.la
@@ -283,8 +285,9 @@
nir/nir_dominance.lo nir/nir_from_ssa.lo \
nir/nir_gather_info.lo nir/nir_gs_count_vertices.lo \
nir/nir_inline_functions.lo nir/nir_instr_set.lo \
- nir/nir_intrinsics.lo nir/nir_liveness.lo \
- nir/nir_loop_analyze.lo nir/nir_lower_64bit_packing.lo \
+ nir/nir_intrinsics.lo nir/nir_linking_helpers.lo \
+ nir/nir_liveness.lo nir/nir_loop_analyze.lo \
+ nir/nir_lower_64bit_packing.lo nir/nir_lower_alpha_test.lo \
nir/nir_lower_alu_to_scalar.lo nir/nir_lower_atomics.lo \
nir/nir_lower_atomics_to_ssbo.lo nir/nir_lower_bitmap.lo \
nir/nir_lower_clamp_color_outputs.lo nir/nir_lower_clip.lo \
@@ -303,9 +306,11 @@
nir/nir_lower_phis_to_scalar.lo \
nir/nir_lower_read_invocation_to_scalar.lo \
nir/nir_lower_regs_to_ssa.lo nir/nir_lower_returns.lo \
- nir/nir_lower_samplers.lo nir/nir_lower_system_values.lo \
- nir/nir_lower_tex.lo nir/nir_lower_to_source_mods.lo \
- nir/nir_lower_two_sided_color.lo nir/nir_lower_vars_to_ssa.lo \
+ nir/nir_lower_samplers.lo nir/nir_lower_samplers_as_deref.lo \
+ nir/nir_lower_system_values.lo nir/nir_lower_tex.lo \
+ nir/nir_lower_to_source_mods.lo \
+ nir/nir_lower_two_sided_color.lo \
+ nir/nir_lower_uniforms_to_ubo.lo nir/nir_lower_vars_to_ssa.lo \
nir/nir_lower_var_copies.lo nir/nir_lower_vec_to_movs.lo \
nir/nir_lower_wpos_center.lo nir/nir_lower_wpos_ytransform.lo \
nir/nir_metadata.lo nir/nir_move_vec_src_uses_to_dest.lo \
@@ -841,9 +846,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -897,6 +902,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -908,12 +915,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -998,6 +1008,8 @@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
LIBCOMPILER_FILES = \
+ blob.c \
+ blob.h \
builtin_type_macros.h \
glsl_types.cpp \
glsl_types.h \
@@ -1016,8 +1028,6 @@
glsl/ast_function.cpp \
glsl/ast_to_hir.cpp \
glsl/ast_type.cpp \
- glsl/blob.c \
- glsl/blob.h \
glsl/builtin_functions.cpp \
glsl/builtin_functions.h \
glsl/builtin_int64.h \
@@ -1079,12 +1089,12 @@
glsl/list.h \
glsl/loop_analysis.cpp \
glsl/loop_analysis.h \
- glsl/loop_controls.cpp \
glsl/loop_unroll.cpp \
glsl/lower_blend_equation_advanced.cpp \
glsl/lower_buffer_access.cpp \
glsl/lower_buffer_access.h \
glsl/lower_const_arrays_to_uniforms.cpp \
+ glsl/lower_cs_derived.cpp \
glsl/lower_discard.cpp \
glsl/lower_discard_flow.cpp \
glsl/lower_distance.cpp \
@@ -1203,10 +1213,12 @@
nir/nir_instr_set.h \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
+ nir/nir_linking_helpers.c \
nir/nir_liveness.c \
nir/nir_loop_analyze.c \
nir/nir_loop_analyze.h \
nir/nir_lower_64bit_packing.c \
+ nir/nir_lower_alpha_test.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
nir/nir_lower_atomics_to_ssbo.c \
@@ -1235,10 +1247,12 @@
nir/nir_lower_regs_to_ssa.c \
nir/nir_lower_returns.c \
nir/nir_lower_samplers.c \
+ nir/nir_lower_samplers_as_deref.c \
nir/nir_lower_system_values.c \
nir/nir_lower_tex.c \
nir/nir_lower_to_source_mods.c \
nir/nir_lower_two_sided_color.c \
+ nir/nir_lower_uniforms_to_ubo.c \
nir/nir_lower_vars_to_ssa.c \
nir/nir_lower_var_copies.c \
nir/nir_lower_vec_to_movs.c \
@@ -1315,6 +1329,7 @@
AM_CFLAGS = \
$(VISIBILITY_CFLAGS) \
+ $(WNO_OVERRIDE_INIT) \
$(MSVC2013_COMPAT_CFLAGS)
AM_CXXFLAGS = \
@@ -1630,7 +1645,6 @@
glsl/ast_to_hir.lo: glsl/$(am__dirstamp) \
glsl/$(DEPDIR)/$(am__dirstamp)
glsl/ast_type.lo: glsl/$(am__dirstamp) glsl/$(DEPDIR)/$(am__dirstamp)
-glsl/blob.lo: glsl/$(am__dirstamp) glsl/$(DEPDIR)/$(am__dirstamp)
glsl/builtin_functions.lo: glsl/$(am__dirstamp) \
glsl/$(DEPDIR)/$(am__dirstamp)
glsl/builtin_types.lo: glsl/$(am__dirstamp) \
@@ -1700,8 +1714,6 @@
glsl/$(DEPDIR)/$(am__dirstamp)
glsl/loop_analysis.lo: glsl/$(am__dirstamp) \
glsl/$(DEPDIR)/$(am__dirstamp)
-glsl/loop_controls.lo: glsl/$(am__dirstamp) \
- glsl/$(DEPDIR)/$(am__dirstamp)
glsl/loop_unroll.lo: glsl/$(am__dirstamp) \
glsl/$(DEPDIR)/$(am__dirstamp)
glsl/lower_blend_equation_advanced.lo: glsl/$(am__dirstamp) \
@@ -1710,6 +1722,8 @@
glsl/$(DEPDIR)/$(am__dirstamp)
glsl/lower_const_arrays_to_uniforms.lo: glsl/$(am__dirstamp) \
glsl/$(DEPDIR)/$(am__dirstamp)
+glsl/lower_cs_derived.lo: glsl/$(am__dirstamp) \
+ glsl/$(DEPDIR)/$(am__dirstamp)
glsl/lower_discard.lo: glsl/$(am__dirstamp) \
glsl/$(DEPDIR)/$(am__dirstamp)
glsl/lower_discard_flow.lo: glsl/$(am__dirstamp) \
@@ -1858,11 +1872,15 @@
nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_intrinsics.lo: nir/$(am__dirstamp) \
nir/$(DEPDIR)/$(am__dirstamp)
+nir/nir_linking_helpers.lo: nir/$(am__dirstamp) \
+ nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_liveness.lo: nir/$(am__dirstamp) nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_loop_analyze.lo: nir/$(am__dirstamp) \
nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_64bit_packing.lo: nir/$(am__dirstamp) \
nir/$(DEPDIR)/$(am__dirstamp)
+nir/nir_lower_alpha_test.lo: nir/$(am__dirstamp) \
+ nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_alu_to_scalar.lo: nir/$(am__dirstamp) \
nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_atomics.lo: nir/$(am__dirstamp) \
@@ -1918,6 +1936,8 @@
nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_samplers.lo: nir/$(am__dirstamp) \
nir/$(DEPDIR)/$(am__dirstamp)
+nir/nir_lower_samplers_as_deref.lo: nir/$(am__dirstamp) \
+ nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_system_values.lo: nir/$(am__dirstamp) \
nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_tex.lo: nir/$(am__dirstamp) \
@@ -1926,6 +1946,8 @@
nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_two_sided_color.lo: nir/$(am__dirstamp) \
nir/$(DEPDIR)/$(am__dirstamp)
+nir/nir_lower_uniforms_to_ubo.lo: nir/$(am__dirstamp) \
+ nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_vars_to_ssa.lo: nir/$(am__dirstamp) \
nir/$(DEPDIR)/$(am__dirstamp)
nir/nir_lower_var_copies.lo: nir/$(am__dirstamp) \
@@ -2145,6 +2167,7 @@
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blob.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/glsl_types.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nir_types.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shader_enums.Plo@am__quote@
@@ -2154,7 +2177,6 @@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/ast_function.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/ast_to_hir.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/ast_type.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/blob.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/builtin_functions.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/builtin_types.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/builtin_variables.Plo@am__quote@
@@ -2195,11 +2217,11 @@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/link_varyings.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/linker.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/loop_analysis.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/loop_controls.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/loop_unroll.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/lower_blend_equation_advanced.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/lower_buffer_access.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/lower_const_arrays_to_uniforms.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/lower_cs_derived.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/lower_discard.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/lower_discard_flow.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@glsl/$(DEPDIR)/lower_distance.Plo@am__quote@
@@ -2288,9 +2310,11 @@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_inline_functions.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_instr_set.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_intrinsics.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_linking_helpers.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_liveness.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_loop_analyze.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_64bit_packing.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_alpha_test.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_alu_to_scalar.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_atomics.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_atomics_to_ssbo.Plo@am__quote@
@@ -2319,10 +2343,12 @@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_regs_to_ssa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_returns.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_samplers.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_samplers_as_deref.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_system_values.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_tex.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_to_source_mods.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_two_sided_color.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_uniforms_to_ubo.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_var_copies.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_vars_to_ssa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@nir/$(DEPDIR)/nir_lower_vec_to_movs.Plo@am__quote@
diff -Nru mesa-17.2.4/src/compiler/Makefile.sources mesa-17.3.3/src/compiler/Makefile.sources
--- mesa-17.2.4/src/compiler/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -1,4 +1,6 @@
LIBCOMPILER_FILES = \
+ blob.c \
+ blob.h \
builtin_type_macros.h \
glsl_types.cpp \
glsl_types.h \
@@ -17,8 +19,6 @@
glsl/ast_function.cpp \
glsl/ast_to_hir.cpp \
glsl/ast_type.cpp \
- glsl/blob.c \
- glsl/blob.h \
glsl/builtin_functions.cpp \
glsl/builtin_functions.h \
glsl/builtin_int64.h \
@@ -80,12 +80,12 @@
glsl/list.h \
glsl/loop_analysis.cpp \
glsl/loop_analysis.h \
- glsl/loop_controls.cpp \
glsl/loop_unroll.cpp \
glsl/lower_blend_equation_advanced.cpp \
glsl/lower_buffer_access.cpp \
glsl/lower_buffer_access.h \
glsl/lower_const_arrays_to_uniforms.cpp \
+ glsl/lower_cs_derived.cpp \
glsl/lower_discard.cpp \
glsl/lower_discard_flow.cpp \
glsl/lower_distance.cpp \
@@ -203,10 +203,12 @@
nir/nir_instr_set.h \
nir/nir_intrinsics.c \
nir/nir_intrinsics.h \
+ nir/nir_linking_helpers.c \
nir/nir_liveness.c \
nir/nir_loop_analyze.c \
nir/nir_loop_analyze.h \
nir/nir_lower_64bit_packing.c \
+ nir/nir_lower_alpha_test.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
nir/nir_lower_atomics_to_ssbo.c \
@@ -235,10 +237,12 @@
nir/nir_lower_regs_to_ssa.c \
nir/nir_lower_returns.c \
nir/nir_lower_samplers.c \
+ nir/nir_lower_samplers_as_deref.c \
nir/nir_lower_system_values.c \
nir/nir_lower_tex.c \
nir/nir_lower_to_source_mods.c \
nir/nir_lower_two_sided_color.c \
+ nir/nir_lower_uniforms_to_ubo.c \
nir/nir_lower_vars_to_ssa.c \
nir/nir_lower_var_copies.c \
nir/nir_lower_vec_to_movs.c \
diff -Nru mesa-17.2.4/src/compiler/meson.build mesa-17.3.3/src/compiler/meson.build
--- mesa-17.2.4/src/compiler/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,68 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_compiler = include_directories('.')
+inc_nir = include_directories('nir')
+inc_glsl = include_directories('glsl')
+
+files_libcompiler = files(
+ 'blob.c',
+ 'blob.h',
+ 'builtin_type_macros.h',
+ 'glsl_types.cpp',
+ 'glsl_types.h',
+ 'nir_types.cpp',
+ 'nir_types.h',
+ 'shader_enums.c',
+ 'shader_enums.h',
+ 'shader_info.h',
+)
+
+ir_expression_operation_h = custom_target(
+ 'ir_expression_operation.h',
+ input : 'glsl/ir_expression_operation.py',
+ output : 'ir_expression_operation.h',
+ command : [prog_python2, '@INPUT@', 'enum'],
+ capture : true,
+)
+
+libcompiler = static_library(
+ 'compiler',
+ [files_libcompiler, ir_expression_operation_h],
+ include_directories : [inc_mapi, inc_mesa, inc_compiler, inc_common],
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ dependencies : [dep_valgrind],
+ build_by_default : false,
+)
+
+subdir('nir')
+
+spirv2nir = executable(
+ 'spirv2nir',
+ [files('spirv/spirv2nir.c'), dummy_cpp],
+ dependencies : [dep_m, dep_thread],
+ include_directories : [inc_common, inc_nir, include_directories('spirv')],
+ link_with : [libnir, libmesa_util],
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ build_by_default : false,
+)
+
+subdir('glsl')
diff -Nru mesa-17.2.4/src/compiler/nir/meson.build mesa-17.3.3/src/compiler/nir/meson.build
--- mesa-17.2.4/src/compiler/nir/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,207 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+nir_depends = files('nir_opcodes.py')
+
+nir_builder_opcodes_h = custom_target(
+ 'nir_builder_opcodes.h',
+ input : 'nir_builder_opcodes_h.py',
+ output : 'nir_builder_opcodes.h',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+ depend_files : nir_depends,
+)
+
+nir_constant_expressions_c = custom_target(
+ 'nir_constant_expressions.c',
+ input : 'nir_constant_expressions.py',
+ output : 'nir_constant_expressions.c',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+ depend_files : nir_depends,
+)
+
+nir_opcodes_h = custom_target(
+ 'nir_opcodes.h',
+ input : 'nir_opcodes_h.py',
+ output : 'nir_opcodes.h',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+ depend_files : nir_depends,
+)
+
+nir_opcodes_c = custom_target(
+ 'nir_opcodes.c',
+ input : 'nir_opcodes_c.py',
+ output : 'nir_opcodes.c',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+ depend_files : nir_depends,
+)
+
+nir_opt_algebraic_c = custom_target(
+ 'nir_opt_algebraic.c',
+ input : 'nir_opt_algebraic.py',
+ output : 'nir_opt_algebraic.c',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+ depend_files : files('nir_algebraic.py'),
+)
+
+spirv_info_c = custom_target(
+ 'spirv_info.c',
+ input : files('../spirv/spirv_info_c.py', '../spirv/spirv.core.grammar.json'),
+ output : 'spirv_info.c',
+ command : [prog_python2, '@INPUT0@', '@INPUT1@', '@OUTPUT@'],
+)
+
+files_libnir = files(
+ 'nir.c',
+ 'nir.h',
+ 'nir_builder.h',
+ 'nir_clone.c',
+ 'nir_constant_expressions.h',
+ 'nir_control_flow.c',
+ 'nir_control_flow.h',
+ 'nir_control_flow_private.h',
+ 'nir_dominance.c',
+ 'nir_from_ssa.c',
+ 'nir_gather_info.c',
+ 'nir_gs_count_vertices.c',
+ 'nir_inline_functions.c',
+ 'nir_instr_set.c',
+ 'nir_instr_set.h',
+ 'nir_intrinsics.c',
+ 'nir_intrinsics.h',
+ 'nir_linking_helpers.c',
+ 'nir_liveness.c',
+ 'nir_loop_analyze.c',
+ 'nir_loop_analyze.h',
+ 'nir_lower_64bit_packing.c',
+ 'nir_lower_alu_to_scalar.c',
+ 'nir_lower_alpha_test.c',
+ 'nir_lower_atomics.c',
+ 'nir_lower_atomics_to_ssbo.c',
+ 'nir_lower_bitmap.c',
+ 'nir_lower_clamp_color_outputs.c',
+ 'nir_lower_clip.c',
+ 'nir_lower_clip_cull_distance_arrays.c',
+ 'nir_lower_constant_initializers.c',
+ 'nir_lower_double_ops.c',
+ 'nir_lower_drawpixels.c',
+ 'nir_lower_global_vars_to_local.c',
+ 'nir_lower_gs_intrinsics.c',
+ 'nir_lower_load_const_to_scalar.c',
+ 'nir_lower_locals_to_regs.c',
+ 'nir_lower_idiv.c',
+ 'nir_lower_indirect_derefs.c',
+ 'nir_lower_int64.c',
+ 'nir_lower_io.c',
+ 'nir_lower_io_to_temporaries.c',
+ 'nir_lower_io_to_scalar.c',
+ 'nir_lower_io_types.c',
+ 'nir_lower_passthrough_edgeflags.c',
+ 'nir_lower_patch_vertices.c',
+ 'nir_lower_phis_to_scalar.c',
+ 'nir_lower_read_invocation_to_scalar.c',
+ 'nir_lower_regs_to_ssa.c',
+ 'nir_lower_returns.c',
+ 'nir_lower_samplers.c',
+ 'nir_lower_samplers_as_deref.c',
+ 'nir_lower_system_values.c',
+ 'nir_lower_tex.c',
+ 'nir_lower_to_source_mods.c',
+ 'nir_lower_two_sided_color.c',
+ 'nir_lower_uniforms_to_ubo.c',
+ 'nir_lower_vars_to_ssa.c',
+ 'nir_lower_var_copies.c',
+ 'nir_lower_vec_to_movs.c',
+ 'nir_lower_wpos_center.c',
+ 'nir_lower_wpos_ytransform.c',
+ 'nir_metadata.c',
+ 'nir_move_vec_src_uses_to_dest.c',
+ 'nir_normalize_cubemap_coords.c',
+ 'nir_opt_conditional_discard.c',
+ 'nir_opt_constant_folding.c',
+ 'nir_opt_copy_prop_vars.c',
+ 'nir_opt_copy_propagate.c',
+ 'nir_opt_cse.c',
+ 'nir_opt_dce.c',
+ 'nir_opt_dead_cf.c',
+ 'nir_opt_gcm.c',
+ 'nir_opt_global_to_local.c',
+ 'nir_opt_if.c',
+ 'nir_opt_intrinsics.c',
+ 'nir_opt_loop_unroll.c',
+ 'nir_opt_move_comparisons.c',
+ 'nir_opt_peephole_select.c',
+ 'nir_opt_remove_phis.c',
+ 'nir_opt_trivial_continues.c',
+ 'nir_opt_undef.c',
+ 'nir_phi_builder.c',
+ 'nir_phi_builder.h',
+ 'nir_print.c',
+ 'nir_propagate_invariant.c',
+ 'nir_remove_dead_variables.c',
+ 'nir_repair_ssa.c',
+ 'nir_search.c',
+ 'nir_search.h',
+ 'nir_search_helpers.h',
+ 'nir_split_var_copies.c',
+ 'nir_sweep.c',
+ 'nir_to_lcssa.c',
+ 'nir_validate.c',
+ 'nir_vla.h',
+ 'nir_worklist.c',
+ 'nir_worklist.h',
+ '../spirv/GLSL.std.450.h',
+ '../spirv/nir_spirv.h',
+ '../spirv/spirv.h',
+ '../spirv/spirv_info.h',
+ '../spirv/spirv_to_nir.c',
+ '../spirv/vtn_alu.c',
+ '../spirv/vtn_cfg.c',
+ '../spirv/vtn_glsl450.c',
+ '../spirv/vtn_private.h',
+ '../spirv/vtn_variables.c',
+)
+
+libnir = static_library(
+ 'nir',
+ [files_libnir, spirv_info_c, nir_opt_algebraic_c, nir_opcodes_c,
+ nir_opcodes_h, nir_constant_expressions_c, nir_builder_opcodes_h],
+ include_directories : [inc_common, inc_compiler, include_directories('../spirv')],
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ link_with : libcompiler,
+ build_by_default : false,
+)
+
+if with_tests
+ nir_control_flow_test = executable(
+ 'nir_control_flow_test',
+ [files('tests/control_flow_tests.cpp'), nir_opcodes_h],
+ c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+ include_directories : [inc_common],
+ dependencies : [dep_thread, idep_gtest],
+ link_with : [libmesa_util, libnir],
+ )
+
+ test('nir_control_flow', nir_control_flow_test)
+endif
diff -Nru mesa-17.2.4/src/compiler/nir/nir_builder.h mesa-17.3.3/src/compiler/nir/nir_builder.h
--- mesa-17.2.4/src/compiler/nir/nir_builder.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_builder.h 2018-01-18 21:30:28.000000000 +0000
@@ -643,4 +643,29 @@
nir_builder_instr_insert(build, &jump->instr);
}
+static inline nir_ssa_def *
+nir_compare_func(nir_builder *b, enum compare_func func,
+ nir_ssa_def *src0, nir_ssa_def *src1)
+{
+ switch (func) {
+ case COMPARE_FUNC_NEVER:
+ return nir_imm_int(b, 0);
+ case COMPARE_FUNC_ALWAYS:
+ return nir_imm_int(b, ~0);
+ case COMPARE_FUNC_EQUAL:
+ return nir_feq(b, src0, src1);
+ case COMPARE_FUNC_NOTEQUAL:
+ return nir_fne(b, src0, src1);
+ case COMPARE_FUNC_GREATER:
+ return nir_flt(b, src1, src0);
+ case COMPARE_FUNC_GEQUAL:
+ return nir_fge(b, src0, src1);
+ case COMPARE_FUNC_LESS:
+ return nir_flt(b, src0, src1);
+ case COMPARE_FUNC_LEQUAL:
+ return nir_fge(b, src1, src0);
+ }
+ unreachable("bad compare func");
+}
+
#endif /* NIR_BUILDER_H */
diff -Nru mesa-17.2.4/src/compiler/nir/nir.c mesa-17.3.3/src/compiler/nir/nir.c
--- mesa-17.2.4/src/compiler/nir/nir.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir.c 2018-01-18 21:30:28.000000000 +0000
@@ -44,8 +44,12 @@
shader->options = options;
- if (si)
+ if (si) {
+ assert(si->stage == stage);
shader->info = *si;
+ } else {
+ shader->info.stage = stage;
+ }
exec_list_make_empty(&shader->functions);
exec_list_make_empty(&shader->registers);
@@ -58,8 +62,6 @@
shader->num_uniforms = 0;
shader->num_shared = 0;
- shader->stage = stage;
-
return shader;
}
@@ -143,7 +145,7 @@
break;
case nir_var_shared:
- assert(shader->stage == MESA_SHADER_COMPUTE);
+ assert(shader->info.stage == MESA_SHADER_COMPUTE);
exec_list_push_tail(&shader->shared, &var->node);
break;
@@ -162,8 +164,10 @@
var->type = type;
var->data.mode = mode;
- if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) ||
- (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT))
+ if ((mode == nir_var_shader_in &&
+ shader->info.stage != MESA_SHADER_VERTEX) ||
+ (mode == nir_var_shader_out &&
+ shader->info.stage != MESA_SHADER_FRAGMENT))
var->data.interpolation = INTERP_MODE_SMOOTH;
if (mode == nir_var_shader_in || mode == nir_var_uniform)
@@ -542,6 +546,28 @@
}
void
+nir_tex_instr_add_src(nir_tex_instr *tex,
+ nir_tex_src_type src_type,
+ nir_src src)
+{
+ nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
+ tex->num_srcs + 1);
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ new_srcs[i].src_type = tex->src[i].src_type;
+ nir_instr_move_src(&tex->instr, &new_srcs[i].src,
+ &tex->src[i].src);
+ }
+
+ ralloc_free(tex->src);
+ tex->src = new_srcs;
+
+ tex->src[tex->num_srcs].src_type = src_type;
+ nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
+ tex->num_srcs++;
+}
+
+void
nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
{
assert(src_idx < tex->num_srcs);
@@ -1509,6 +1535,19 @@
src_add_all_uses(dest->reg.indirect, instr, NULL);
}
+void
+nir_instr_rewrite_deref(nir_instr *instr, nir_deref_var **deref,
+ nir_deref_var *new_deref)
+{
+ if (*deref)
+ visit_deref_src(*deref, remove_use_cb, NULL);
+
+ *deref = new_deref;
+
+ if (*deref)
+ visit_deref_src(*deref, add_use_cb, instr);
+}
+
/* note: does *not* take ownership of 'name' */
void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
@@ -1878,6 +1917,8 @@
return nir_intrinsic_load_base_vertex;
case SYSTEM_VALUE_INVOCATION_ID:
return nir_intrinsic_load_invocation_id;
+ case SYSTEM_VALUE_FRAG_COORD:
+ return nir_intrinsic_load_frag_coord;
case SYSTEM_VALUE_FRONT_FACE:
return nir_intrinsic_load_front_face;
case SYSTEM_VALUE_SAMPLE_ID:
@@ -1913,15 +1954,15 @@
case SYSTEM_VALUE_SUBGROUP_INVOCATION:
return nir_intrinsic_load_subgroup_invocation;
case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
- return nir_intrinsic_load_subgroup_eq_mask;
+ return nir_intrinsic_load_subgroup_eq_mask;
case SYSTEM_VALUE_SUBGROUP_GE_MASK:
- return nir_intrinsic_load_subgroup_ge_mask;
+ return nir_intrinsic_load_subgroup_ge_mask;
case SYSTEM_VALUE_SUBGROUP_GT_MASK:
- return nir_intrinsic_load_subgroup_gt_mask;
+ return nir_intrinsic_load_subgroup_gt_mask;
case SYSTEM_VALUE_SUBGROUP_LE_MASK:
- return nir_intrinsic_load_subgroup_le_mask;
+ return nir_intrinsic_load_subgroup_le_mask;
case SYSTEM_VALUE_SUBGROUP_LT_MASK:
- return nir_intrinsic_load_subgroup_lt_mask;
+ return nir_intrinsic_load_subgroup_lt_mask;
default:
unreachable("system value does not directly correspond to intrinsic");
}
@@ -1945,6 +1986,8 @@
return SYSTEM_VALUE_BASE_VERTEX;
case nir_intrinsic_load_invocation_id:
return SYSTEM_VALUE_INVOCATION_ID;
+ case nir_intrinsic_load_frag_coord:
+ return SYSTEM_VALUE_FRAG_COORD;
case nir_intrinsic_load_front_face:
return SYSTEM_VALUE_FRONT_FACE;
case nir_intrinsic_load_sample_id:
diff -Nru mesa-17.2.4/src/compiler/nir/nir_clone.c mesa-17.3.3/src/compiler/nir/nir_clone.c
--- mesa-17.2.4/src/compiler/nir/nir_clone.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_clone.c 2018-01-18 21:30:28.000000000 +0000
@@ -737,7 +737,7 @@
clone_state state;
init_clone_state(&state, NULL, true, false);
- nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options, NULL);
+ nir_shader *ns = nir_shader_create(mem_ctx, s->info.stage, s->options, NULL);
state.ns = ns;
clone_var_list(&state, &ns->uniforms, &s->uniforms);
diff -Nru mesa-17.2.4/src/compiler/nir/nir_constant_expressions.c mesa-17.3.3/src/compiler/nir/nir_constant_expressions.c
--- mesa-17.2.4/src/compiler/nir/nir_constant_expressions.c 2017-10-30 14:50:47.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_constant_expressions.c 2018-01-18 21:31:04.000000000 +0000
@@ -1936,12 +1936,12 @@
unsigned base = src0, insert = src1;
int offset = src2, bits = src3;
if (bits == 0) {
- dst = 0;
+ dst = base;
} else if (offset < 0 || bits < 0 || bits + offset > 32) {
dst = 0;
} else {
unsigned mask = ((1ull << bits) - 1) << offset;
- dst = (base & ~mask) | ((insert << bits) & mask);
+ dst = (base & ~mask) | ((insert << offset) & mask);
}
@@ -3084,7 +3084,7 @@
const float src0 =
_mesa_half_to_float(_src[0].u16[_i]);
- float16_t dst = bit_size == 64 ? fabs(src0) : fabsf(src0);
+ float16_t dst = fabs(src0);
_dst_val.u16[_i] = _mesa_float_to_half(dst);
}
@@ -3100,7 +3100,7 @@
const float32_t src0 =
_src[0].f32[_i];
- float32_t dst = bit_size == 64 ? fabs(src0) : fabsf(src0);
+ float32_t dst = fabs(src0);
_dst_val.f32[_i] = dst;
}
@@ -3116,7 +3116,7 @@
const float64_t src0 =
_src[0].f64[_i];
- float64_t dst = bit_size == 64 ? fabs(src0) : fabsf(src0);
+ float64_t dst = fabs(src0);
_dst_val.f64[_i] = dst;
}
diff -Nru mesa-17.2.4/src/compiler/nir/nir_gather_info.c mesa-17.3.3/src/compiler/nir/nir_gather_info.c
--- mesa-17.2.4/src/compiler/nir/nir_gather_info.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_gather_info.c 2018-01-18 21:30:28.000000000 +0000
@@ -53,7 +53,7 @@
else
shader->info.inputs_read |= bitfield;
- if (shader->stage == MESA_SHADER_FRAGMENT) {
+ if (shader->info.stage == MESA_SHADER_FRAGMENT) {
shader->info.fs.uses_sample_qualifier |= var->data.sample;
}
} else {
@@ -79,7 +79,7 @@
{
const struct glsl_type *type = var->type;
- if (nir_is_per_vertex_io(var, shader->stage)) {
+ if (nir_is_per_vertex_io(var, shader->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@@ -129,7 +129,7 @@
nir_variable *var = deref->var;
const struct glsl_type *type = var->type;
- if (nir_is_per_vertex_io(var, shader->stage)) {
+ if (nir_is_per_vertex_io(var, shader->info.stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
@@ -196,7 +196,7 @@
switch (instr->intrinsic) {
case nir_intrinsic_discard:
case nir_intrinsic_discard_if:
- assert(shader->stage == MESA_SHADER_FRAGMENT);
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
shader->info.fs.uses_discard = true;
break;
@@ -214,7 +214,7 @@
/* We need to track which input_reads bits correspond to a
* dvec3/dvec4 input attribute */
- if (shader->stage == MESA_SHADER_VERTEX &&
+ if (shader->info.stage == MESA_SHADER_VERTEX &&
var->data.mode == nir_var_shader_in &&
glsl_type_is_dual_slot(glsl_without_array(var->type))) {
for (uint i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {
@@ -227,6 +227,7 @@
}
case nir_intrinsic_load_draw_id:
+ case nir_intrinsic_load_frag_coord:
case nir_intrinsic_load_front_face:
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_vertex_id_zero_base:
@@ -251,7 +252,7 @@
case nir_intrinsic_end_primitive:
case nir_intrinsic_end_primitive_with_counter:
- assert(shader->stage == MESA_SHADER_GEOMETRY);
+ assert(shader->info.stage == MESA_SHADER_GEOMETRY);
shader->info.gs.uses_end_primitive = 1;
break;
@@ -263,8 +264,20 @@
static void
gather_tex_info(nir_tex_instr *instr, nir_shader *shader)
{
- if (instr->op == nir_texop_tg4)
+ switch (instr->op) {
+ case nir_texop_tg4:
shader->info.uses_texture_gather = true;
+ break;
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ case nir_texop_txf_ms_mcs:
+ shader->info.textures_used_by_txf |=
+ ((1 << MAX2(instr->texture_array_size, 1)) - 1) <<
+ instr->texture_index;
+ break;
+ default:
+ break;
+ }
}
static void
@@ -314,7 +327,7 @@
shader->info.patch_inputs_read = 0;
shader->info.patch_outputs_written = 0;
shader->info.system_values_read = 0;
- if (shader->stage == MESA_SHADER_FRAGMENT) {
+ if (shader->info.stage == MESA_SHADER_FRAGMENT) {
shader->info.fs.uses_sample_qualifier = false;
}
nir_foreach_block(block, entrypoint) {
diff -Nru mesa-17.2.4/src/compiler/nir/nir.h mesa-17.3.3/src/compiler/nir/nir.h
--- mesa-17.2.4/src/compiler/nir/nir.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir.h 2018-01-18 21:30:28.000000000 +0000
@@ -41,9 +41,9 @@
#include "compiler/shader_info.h"
#include
-#ifdef DEBUG
+#ifndef NDEBUG
#include "util/debug.h"
-#endif /* DEBUG */
+#endif /* NDEBUG */
#include "nir_opcodes.h"
@@ -192,6 +192,16 @@
unsigned invariant:1;
/**
+ * When separate shader programs are enabled, only input/outputs between
+ * the stages of a multi-stage separate program can be safely removed
+ * from the shader interface. Other input/outputs must remains active.
+ *
+ * This is also used to make sure xfb varyings that are unused by the
+ * fragment shader are not removed.
+ */
+ unsigned always_active_io:1;
+
+ /**
* Interpolation mode for shader inputs / outputs
*
* \sa glsl_interp_mode
@@ -220,7 +230,7 @@
* be tightly packed. In other words, consecutive array elements
* should be stored one component apart, rather than one slot apart.
*/
- bool compact:1;
+ unsigned compact:1;
/**
* Whether this is a fragment shader output implicitly initialized with
@@ -1204,7 +1214,6 @@
* - nir_texop_txf_ms
* - nir_texop_txs
* - nir_texop_lod
- * - nir_texop_tg4
* - nir_texop_query_levels
* - nir_texop_texture_samples
* - nir_texop_samples_identical
@@ -1369,6 +1378,10 @@
return -1;
}
+void nir_tex_instr_add_src(nir_tex_instr *tex,
+ nir_tex_src_type src_type,
+ nir_src src);
+
void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx);
typedef struct {
@@ -1890,9 +1903,6 @@
* access plus one
*/
unsigned num_inputs, num_uniforms, num_outputs, num_shared;
-
- /** The shader stage, such as MESA_SHADER_VERTEX. */
- gl_shader_stage stage;
} nir_shader;
static inline nir_function_impl *
@@ -2211,6 +2221,8 @@
void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
nir_dest new_dest);
+void nir_instr_rewrite_deref(nir_instr *instr, nir_deref_var **deref,
+ nir_deref_var *new_deref);
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
unsigned num_components, unsigned bit_size,
@@ -2329,7 +2341,7 @@
static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
static inline bool should_clone_nir(void) { return false; }
static inline bool should_print_nir(void) { return false; }
-#endif /* DEBUG */
+#endif /* NDEBUG */
#define _PASS(nir, do_pass) do { \
do_pass \
@@ -2405,6 +2417,9 @@
void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
int (*type_size)(const struct glsl_type *));
+/* Some helpers to do very simple linking */
+bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
+
typedef enum {
/* If set, this forces all non-flat fragment shader inputs to be
* interpolated as if with the "sample" qualifier. This requires
@@ -2432,14 +2447,19 @@
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
bool nir_lower_vec_to_movs(nir_shader *shader);
+void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
+ bool alpha_to_one);
bool nir_lower_alu_to_scalar(nir_shader *shader);
bool nir_lower_load_const_to_scalar(nir_shader *shader);
bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
bool nir_lower_phis_to_scalar(nir_shader *shader);
void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
+void nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
bool nir_lower_samplers(nir_shader *shader,
const struct gl_shader_program *shader_program);
+bool nir_lower_samplers_as_deref(nir_shader *shader,
+ const struct gl_shader_program *shader_program);
bool nir_lower_system_values(nir_shader *shader);
@@ -2577,6 +2597,7 @@
bool nir_lower_atomics(nir_shader *shader,
const struct gl_shader_program *shader_program);
bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset);
+bool nir_lower_uniforms_to_ubo(nir_shader *shader);
bool nir_lower_to_source_mods(nir_shader *shader);
bool nir_lower_gs_intrinsics(nir_shader *shader);
diff -Nru mesa-17.2.4/src/compiler/nir/nir_intrinsics.h mesa-17.3.3/src/compiler/nir/nir_intrinsics.h
--- mesa-17.2.4/src/compiler/nir/nir_intrinsics.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_intrinsics.h 2018-01-18 21:30:28.000000000 +0000
@@ -322,6 +322,7 @@
idx0, idx1, idx2, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+SYSTEM_VALUE(frag_coord, 4, 0, xx, xx, xx)
SYSTEM_VALUE(front_face, 1, 0, xx, xx, xx)
SYSTEM_VALUE(vertex_id, 1, 0, xx, xx, xx)
SYSTEM_VALUE(vertex_id_zero_base, 1, 0, xx, xx, xx)
diff -Nru mesa-17.2.4/src/compiler/nir/nir_linking_helpers.c mesa-17.3.3/src/compiler/nir/nir_linking_helpers.c
--- mesa-17.2.4/src/compiler/nir/nir_linking_helpers.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_linking_helpers.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "util/set.h"
+#include "util/hash_table.h"
+
+/* This file contains various little helpers for doing simple linking in
+ * NIR. Eventually, we'll probably want a full-blown varying packing
+ * implementation in here. Right now, it just deletes unused things.
+ */
+
+/**
+ * Returns the bits in the inputs_read, outputs_written, or
+ * system_values_read bitfield corresponding to this variable.
+ */
+static uint64_t
+get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
+{
+ /* TODO: add support for tess patches */
+ if (var->data.patch || var->data.location < 0)
+ return 0;
+
+ assert(var->data.mode == nir_var_shader_in ||
+ var->data.mode == nir_var_shader_out ||
+ var->data.mode == nir_var_system_value);
+ assert(var->data.location >= 0);
+
+ const struct glsl_type *type = var->type;
+ if (nir_is_per_vertex_io(var, stage)) {
+ assert(glsl_type_is_array(type));
+ type = glsl_get_array_element(type);
+ }
+
+ unsigned slots = glsl_count_attribute_slots(type, false);
+ return ((1ull << slots) - 1) << var->data.location;
+}
+
+static void
+tcs_add_output_reads(nir_shader *shader, uint64_t *read)
+{
+ nir_foreach_function(function, shader) {
+ if (function->impl) {
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin_instr =
+ nir_instr_as_intrinsic(instr);
+ if (intrin_instr->intrinsic == nir_intrinsic_load_var &&
+ intrin_instr->variables[0]->var->data.mode ==
+ nir_var_shader_out) {
+
+ nir_variable *var = intrin_instr->variables[0]->var;
+ read[var->data.location_frac] |=
+ get_variable_io_mask(intrin_instr->variables[0]->var,
+ shader->info.stage);
+ }
+ }
+ }
+ }
+ }
+}
+
+static bool
+remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
+ uint64_t *used_by_other_stage)
+{
+ bool progress = false;
+
+ nir_foreach_variable_safe(var, var_list) {
+ /* TODO: add patch support */
+ if (var->data.patch)
+ continue;
+
+ if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
+ continue;
+
+ if (var->data.always_active_io)
+ continue;
+
+ uint64_t other_stage = used_by_other_stage[var->data.location_frac];
+
+ if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
+ /* This one is invalid, make it a global variable instead */
+ var->data.location = 0;
+ var->data.mode = nir_var_global;
+
+ exec_node_remove(&var->node);
+ exec_list_push_tail(&shader->globals, &var->node);
+
+ progress = true;
+ }
+ }
+
+ return progress;
+}
+
+bool
+nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
+{
+ assert(producer->info.stage != MESA_SHADER_FRAGMENT);
+ assert(consumer->info.stage != MESA_SHADER_VERTEX);
+
+ uint64_t read[4] = { 0 }, written[4] = { 0 };
+
+ nir_foreach_variable(var, &producer->outputs) {
+ written[var->data.location_frac] |=
+ get_variable_io_mask(var, producer->info.stage);
+ }
+
+ nir_foreach_variable(var, &consumer->inputs) {
+ read[var->data.location_frac] |=
+ get_variable_io_mask(var, consumer->info.stage);
+ }
+
+ /* Each TCS invocation can read data written by other TCS invocations,
+ * so even if the outputs are not used by the TES we must also make
+ * sure they are not read by the TCS before demoting them to globals.
+ */
+ if (producer->info.stage == MESA_SHADER_TESS_CTRL)
+ tcs_add_output_reads(producer, read);
+
+ bool progress = false;
+ progress = remove_unused_io_vars(producer, &producer->outputs, read);
+
+ progress =
+ remove_unused_io_vars(consumer, &consumer->inputs, written) || progress;
+
+ return progress;
+}
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_alpha_test.c mesa-17.3.3/src/compiler/nir/nir_lower_alpha_test.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_alpha_test.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_alpha_test.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ *
+ * Implements GL alpha testing by comparing the output color's alpha to the
+ * alpha_ref intrinsic and emitting a discard based on it.
+ *
+ * The alpha_to_one value overrides the source alpha to 1.0 to implement
+ * GL_SAMPLE_ALPHA_TO_ONE, which applies before the alpha test (and would be
+ * rather silly to use with alpha test, but the spec permits).
+ */
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+
+void
+nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
+ bool alpha_to_one)
+{
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+
+ nir_foreach_function(function, shader) {
+ nir_function_impl *impl = function->impl;
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ b.cursor = nir_before_cf_list(&impl->body);
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ nir_variable *out = NULL;
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_store_var:
+ out = intr->variables[0]->var;
+ break;
+ case nir_intrinsic_store_output:
+ /* already had i/o lowered.. lookup the matching output var: */
+ nir_foreach_variable(var, &shader->outputs) {
+ int drvloc = var->data.driver_location;
+ if (nir_intrinsic_base(intr) == drvloc) {
+ out = var;
+ break;
+ }
+ }
+ assume(out);
+ break;
+ default:
+ continue;
+ }
+
+ if (out->data.mode != nir_var_shader_out)
+ continue;
+
+ if (out->data.location != FRAG_RESULT_COLOR &&
+ out->data.location != FRAG_RESULT_DATA0)
+ continue;
+
+ b.cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *alpha;
+ if (alpha_to_one) {
+ alpha = nir_imm_float(&b, 1.0);
+ } else {
+ alpha = nir_channel(&b, nir_ssa_for_src(&b, intr->src[0], 4),
+ 3);
+ }
+
+ nir_ssa_def *condition =
+ nir_compare_func(&b, func,
+ alpha, nir_load_alpha_ref_float(&b));
+
+ nir_intrinsic_instr *discard =
+ nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_discard_if);
+ discard->num_components = 1;
+ discard->src[0] = nir_src_for_ssa(nir_inot(&b, condition));
+ nir_builder_instr_insert(&b, &discard->instr);
+ shader->info.fs.uses_discard = true;
+ }
+ }
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+}
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_atomics.c mesa-17.3.3/src/compiler/nir/nir_lower_atomics.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_atomics.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_atomics.c 2018-01-18 21:30:28.000000000 +0000
@@ -100,7 +100,7 @@
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
nir_intrinsic_set_base(new_instr,
- shader_program->data->UniformStorage[uniform_loc].opaque[shader->stage].index);
+ shader_program->data->UniformStorage[uniform_loc].opaque[shader->info.stage].index);
nir_load_const_instr *offset_const =
nir_load_const_instr_create(mem_ctx, 1, 32);
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_bitmap.c mesa-17.3.3/src/compiler/nir/nir_lower_bitmap.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_bitmap.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_bitmap.c 2018-01-18 21:30:28.000000000 +0000
@@ -133,7 +133,7 @@
nir_lower_bitmap(nir_shader *shader,
const nir_lower_bitmap_options *options)
{
- assert(shader->stage == MESA_SHADER_FRAGMENT);
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
lower_bitmap_impl(nir_shader_get_entrypoint(shader), options);
}
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_clamp_color_outputs.c mesa-17.3.3/src/compiler/nir/nir_lower_clamp_color_outputs.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_clamp_color_outputs.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_clamp_color_outputs.c 2018-01-18 21:30:28.000000000 +0000
@@ -33,7 +33,7 @@
static bool
is_color_output(lower_state *state, nir_variable *out)
{
- switch (state->shader->stage) {
+ switch (state->shader->info.stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_GEOMETRY:
switch (out->data.location) {
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c mesa-17.3.3/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c 2018-01-18 21:30:28.000000000 +0000
@@ -48,7 +48,7 @@
* array length.
*/
const struct glsl_type *type = var->type;
- if (nir_is_per_vertex_io(var, nir->stage))
+ if (nir_is_per_vertex_io(var, nir->info.stage))
type = glsl_get_array_element(type);
assert(glsl_type_is_array(type));
@@ -158,7 +158,7 @@
cull->data.location = VARYING_SLOT_CLIP_DIST0;
} else {
/* Turn the ClipDistance array into a combined one */
- update_type(clip, nir->stage, clip_array_size + cull_array_size);
+ update_type(clip, nir->info.stage, clip_array_size + cull_array_size);
/* Rewrite CullDistance to reference the combined array */
nir_foreach_function(function, nir) {
@@ -194,10 +194,10 @@
{
bool progress = false;
- if (nir->stage <= MESA_SHADER_GEOMETRY)
+ if (nir->info.stage <= MESA_SHADER_GEOMETRY)
progress |= combine_clip_cull(nir, &nir->outputs, true);
- if (nir->stage > MESA_SHADER_VERTEX)
+ if (nir->info.stage > MESA_SHADER_VERTEX)
progress |= combine_clip_cull(nir, &nir->inputs, false);
return progress;
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_drawpixels.c mesa-17.3.3/src/compiler/nir/nir_lower_drawpixels.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_drawpixels.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_drawpixels.c 2018-01-18 21:30:28.000000000 +0000
@@ -252,7 +252,7 @@
.shader = shader,
};
- assert(shader->stage == MESA_SHADER_FRAGMENT);
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
nir_foreach_function(function, shader) {
if (function->impl)
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_io.c mesa-17.3.3/src/compiler/nir/nir_lower_io.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_io.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_io.c 2018-01-18 21:30:28.000000000 +0000
@@ -167,7 +167,7 @@
nir_intrinsic_op op;
switch (mode) {
case nir_var_shader_in:
- if (nir->stage == MESA_SHADER_FRAGMENT &&
+ if (nir->info.stage == MESA_SHADER_FRAGMENT &&
nir->options->use_interpolated_input_intrinsics &&
var->data.interpolation != INTERP_MODE_FLAT) {
assert(vertex_index == NULL);
@@ -412,7 +412,7 @@
b->cursor = nir_before_instr(instr);
- const bool per_vertex = nir_is_per_vertex_io(var, b->shader->stage);
+ const bool per_vertex = nir_is_per_vertex_io(var, b->shader->info.stage);
nir_ssa_def *offset;
nir_ssa_def *vertex_index = NULL;
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_io_to_scalar.c mesa-17.3.3/src/compiler/nir/nir_lower_io_to_scalar.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_io_to_scalar.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_io_to_scalar.c 2018-01-18 21:30:28.000000000 +0000
@@ -127,3 +127,257 @@
}
}
}
+
+static nir_variable **
+get_channel_variables(struct hash_table *ht, nir_variable *var)
+{
+ nir_variable **chan_vars;
+ struct hash_entry *entry = _mesa_hash_table_search(ht, var);
+ if (!entry) {
+ chan_vars = (nir_variable **) calloc(4, sizeof(nir_variable *));
+ _mesa_hash_table_insert(ht, var, chan_vars);
+ } else {
+ chan_vars = (nir_variable **) entry->data;
+ }
+
+ return chan_vars;
+}
+
+/*
+ * This function differs from nir_deref_clone() in that it gets its type from
+ * the parent deref rather than our source deref. This is useful when splitting
+ * vectors because we want to use the scalar type of the new parent rather than
+ * then the old vector type.
+ */
+static nir_deref_array *
+clone_deref_array(const nir_deref_array *darr, nir_deref *parent)
+{
+ nir_deref_array *ndarr = nir_deref_array_create(parent);
+
+ ndarr->deref.type = glsl_get_array_element(parent->type);
+ if (darr->deref.child)
+ ndarr->deref.child =
+ &clone_deref_array(nir_deref_as_array(darr->deref.child),
+ &ndarr->deref)->deref;
+
+ ndarr->deref_array_type = darr->deref_array_type;
+ ndarr->base_offset = darr->base_offset;
+ if (ndarr->deref_array_type == nir_deref_array_type_indirect)
+ nir_src_copy(&ndarr->indirect, &darr->indirect, parent);
+
+ return ndarr;
+}
+
+static void
+lower_load_to_scalar_early(nir_builder *b, nir_intrinsic_instr *intr,
+ nir_variable *var, struct hash_table *split_inputs,
+ struct hash_table *split_outputs)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+
+ assert(intr->dest.is_ssa);
+
+ nir_ssa_def *loads[4];
+
+ nir_variable **chan_vars;
+ if (var->data.mode == nir_var_shader_in) {
+ chan_vars = get_channel_variables(split_inputs, var);
+ } else {
+ chan_vars = get_channel_variables(split_outputs, var);
+ }
+
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_variable *chan_var = chan_vars[var->data.location_frac + i];
+ if (!chan_vars[var->data.location_frac + i]) {
+ chan_var = nir_variable_clone(var, b->shader);
+ chan_var->data.location_frac = var->data.location_frac + i;
+ chan_var->type = glsl_channel_type(chan_var->type);
+
+ chan_vars[var->data.location_frac + i] = chan_var;
+
+ nir_shader_add_variable(b->shader, chan_var);
+ }
+
+ nir_intrinsic_instr *chan_intr =
+ nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+ nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest,
+ 1, intr->dest.ssa.bit_size, NULL);
+ chan_intr->num_components = 1;
+ chan_intr->variables[0] = nir_deref_var_create(chan_intr, chan_var);
+
+ if (intr->variables[0]->deref.child) {
+ chan_intr->variables[0]->deref.child =
+ &clone_deref_array(nir_deref_as_array(intr->variables[0]->deref.child),
+ &chan_intr->variables[0]->deref)->deref;
+ }
+
+ if (intr->intrinsic == nir_intrinsic_interp_var_at_offset ||
+ intr->intrinsic == nir_intrinsic_interp_var_at_sample)
+ nir_src_copy(chan_intr->src, intr->src, &chan_intr->instr);
+
+ nir_builder_instr_insert(b, &chan_intr->instr);
+
+ loads[i] = &chan_intr->dest.ssa;
+ }
+
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+ nir_src_for_ssa(nir_vec(b, loads,
+ intr->num_components)));
+
+ /* Remove the old load intrinsic */
+ nir_instr_remove(&intr->instr);
+}
+
+static void
+lower_store_output_to_scalar_early(nir_builder *b, nir_intrinsic_instr *intr,
+ nir_variable *var,
+ struct hash_table *split_outputs)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *value = nir_ssa_for_src(b, intr->src[0], intr->num_components);
+
+ nir_variable **chan_vars = get_channel_variables(split_outputs, var);
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ if (!(nir_intrinsic_write_mask(intr) & (1 << i)))
+ continue;
+
+ nir_variable *chan_var = chan_vars[var->data.location_frac + i];
+ if (!chan_vars[var->data.location_frac + i]) {
+ chan_var = nir_variable_clone(var, b->shader);
+ chan_var->data.location_frac = var->data.location_frac + i;
+ chan_var->type = glsl_channel_type(chan_var->type);
+
+ chan_vars[var->data.location_frac + i] = chan_var;
+
+ nir_shader_add_variable(b->shader, chan_var);
+ }
+
+ nir_intrinsic_instr *chan_intr =
+ nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+ chan_intr->num_components = 1;
+
+ nir_intrinsic_set_write_mask(chan_intr, 0x1);
+
+ chan_intr->variables[0] = nir_deref_var_create(chan_intr, chan_var);
+ chan_intr->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
+
+ if (intr->variables[0]->deref.child) {
+ chan_intr->variables[0]->deref.child =
+ &clone_deref_array(nir_deref_as_array(intr->variables[0]->deref.child),
+ &chan_intr->variables[0]->deref)->deref;
+ }
+
+ nir_builder_instr_insert(b, &chan_intr->instr);
+ }
+
+ /* Remove the old store intrinsic */
+ nir_instr_remove(&intr->instr);
+}
+
+/*
+ * This function is intended to be called earlier than nir_lower_io_to_scalar()
+ * i.e. before nir_lower_io() is called.
+ */
+void
+nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask)
+{
+ struct hash_table *split_inputs =
+ _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ struct hash_table *split_outputs =
+ _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ nir_foreach_function(function, shader) {
+ if (function->impl) {
+ nir_builder b;
+ nir_builder_init(&b, function->impl);
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ if (intr->num_components == 1)
+ continue;
+
+ if (intr->intrinsic != nir_intrinsic_load_var &&
+ intr->intrinsic != nir_intrinsic_store_var &&
+ intr->intrinsic != nir_intrinsic_interp_var_at_centroid &&
+ intr->intrinsic != nir_intrinsic_interp_var_at_sample &&
+ intr->intrinsic != nir_intrinsic_interp_var_at_offset)
+ continue;
+
+ nir_variable *var = intr->variables[0]->var;
+ nir_variable_mode mode = var->data.mode;
+
+ /* TODO: add patch support */
+ if (var->data.patch)
+ continue;
+
+ /* TODO: add doubles support */
+ if (glsl_type_is_64bit(glsl_without_array(var->type)))
+ continue;
+
+ if (var->data.location < VARYING_SLOT_VAR0 &&
+ var->data.location >= 0)
+ continue;
+
+ /* Don't bother splitting if we can't opt away any unused
+ * components.
+ */
+ if (var->data.always_active_io)
+ continue;
+
+ /* Skip types we cannot split */
+ if (glsl_type_is_matrix(glsl_without_array(var->type)) ||
+ glsl_type_is_struct(glsl_without_array(var->type)))
+ continue;
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_interp_var_at_centroid:
+ case nir_intrinsic_interp_var_at_sample:
+ case nir_intrinsic_interp_var_at_offset:
+ case nir_intrinsic_load_var:
+ if ((mask & nir_var_shader_in && mode == nir_var_shader_in) ||
+ (mask & nir_var_shader_out && mode == nir_var_shader_out))
+ lower_load_to_scalar_early(&b, intr, var, split_inputs,
+ split_outputs);
+ break;
+ case nir_intrinsic_store_var:
+ if (mask & nir_var_shader_out &&
+ mode == nir_var_shader_out)
+ lower_store_output_to_scalar_early(&b, intr, var,
+ split_outputs);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Remove old input from the shaders inputs list */
+ struct hash_entry *entry;
+ hash_table_foreach(split_inputs, entry) {
+ nir_variable *var = (nir_variable *) entry->key;
+ exec_node_remove(&var->node);
+
+ free(entry->data);
+ }
+
+ /* Remove old output from the shaders outputs list */
+ hash_table_foreach(split_outputs, entry) {
+ nir_variable *var = (nir_variable *) entry->key;
+ exec_node_remove(&var->node);
+
+ free(entry->data);
+ }
+
+ _mesa_hash_table_destroy(split_inputs, NULL);
+ _mesa_hash_table_destroy(split_outputs, NULL);
+}
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_io_to_temporaries.c mesa-17.3.3/src/compiler/nir/nir_lower_io_to_temporaries.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_io_to_temporaries.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_io_to_temporaries.c 2018-01-18 21:30:28.000000000 +0000
@@ -76,7 +76,7 @@
static void
emit_output_copies_impl(struct lower_io_state *state, nir_function_impl *impl)
{
- if (state->shader->stage == MESA_SHADER_GEOMETRY) {
+ if (state->shader->info.stage == MESA_SHADER_GEOMETRY) {
/* For geometry shaders, we have to emit the output copies right
* before each EmitVertex call.
*/
@@ -152,7 +152,7 @@
{
struct lower_io_state state;
- if (shader->stage == MESA_SHADER_TESS_CTRL)
+ if (shader->info.stage == MESA_SHADER_TESS_CTRL)
return;
state.shader = shader;
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_io_types.c mesa-17.3.3/src/compiler/nir/nir_lower_io_types.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_io_types.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_io_types.c 2018-01-18 21:30:28.000000000 +0000
@@ -131,7 +131,7 @@
(var->data.mode != nir_var_shader_out))
continue;
- bool vs_in = (state->shader->stage == MESA_SHADER_VERTEX) &&
+ bool vs_in = (state->shader->info.stage == MESA_SHADER_VERTEX) &&
(var->data.mode == nir_var_shader_in);
if (glsl_count_attribute_slots(var->type, vs_in) == 1)
continue;
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_samplers_as_deref.c mesa-17.3.3/src/compiler/nir/nir_lower_samplers_as_deref.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_samplers_as_deref.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_samplers_as_deref.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
+ * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ * Copyright © 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file
+ *
+ * Lower sampler and image references of (non-bindless) uniforms by removing
+ * struct dereferences, and synthesizing new uniform variables without structs
+ * if required.
+ *
+ * This will allow backends to have a simple, uniform treatment of bindless and
+ * non-bindless samplers and images.
+ *
+ * Example:
+ *
+ * struct S {
+ * sampler2D tex[2];
+ * sampler2D other;
+ * };
+ * uniform S s[2];
+ *
+ * tmp = texture(s[n].tex[m], coord);
+ *
+ * Becomes:
+ *
+ * decl_var uniform INTERP_MODE_NONE sampler2D[2][2] lower@s.tex (...)
+ *
+ * vec1 32 ssa_idx = $(2 * n + m)
+ * vec4 32 ssa_out = tex ssa_coord (coord), lower@s.tex[n][m] (texture), lower@s.tex[n][m] (sampler)
+ *
+ * and lower@s.tex has var->data.binding set to the base index as defined by
+ * the opaque uniform mapping.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "main/compiler.h"
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+
+struct lower_samplers_as_deref_state {
+ nir_shader *shader;
+ const struct gl_shader_program *shader_program;
+ struct hash_table *remap_table;
+};
+
+static void
+remove_struct_derefs(nir_deref *tail,
+ struct lower_samplers_as_deref_state *state,
+ nir_builder *b, char **path, unsigned *location)
+{
+ if (!tail->child)
+ return;
+
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array: {
+ unsigned length = glsl_get_length(tail->type);
+
+ remove_struct_derefs(tail->child, state, b, path, location);
+
+ tail->type = glsl_get_array_instance(tail->child->type, length);
+ break;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+
+ *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+ ralloc_asprintf_append(path, ".%s",
+ glsl_get_struct_elem_name(tail->type, deref_struct->index));
+
+ remove_struct_derefs(tail->child, state, b, path, location);
+
+ /* Drop the struct deref and re-parent. */
+ ralloc_steal(tail, tail->child->child);
+ tail->type = tail->child->type;
+ tail->child = tail->child->child;
+ break;
+ }
+
+ default:
+ unreachable("Invalid deref type");
+ break;
+ }
+}
+
+static void
+lower_deref(nir_deref_var *deref,
+ struct lower_samplers_as_deref_state *state,
+ nir_builder *b)
+{
+ nir_variable *var = deref->var;
+ gl_shader_stage stage = state->shader->info.stage;
+ unsigned location = var->data.location;
+ unsigned binding;
+ const struct glsl_type *orig_type = deref->deref.type;
+ char *path;
+
+ assert(var->data.mode == nir_var_uniform);
+
+ path = ralloc_asprintf(state->remap_table, "lower@%s", var->name);
+ remove_struct_derefs(&deref->deref, state, b, &path, &location);
+
+ assert(location < state->shader_program->data->NumUniformStorage &&
+ state->shader_program->data->UniformStorage[location].opaque[stage].active);
+
+ binding = state->shader_program->data->UniformStorage[location].opaque[stage].index;
+
+ if (orig_type == deref->deref.type) {
+ /* Fast path: We did not encounter any struct derefs. */
+ var->data.binding = binding;
+ return;
+ }
+
+ uint32_t hash = _mesa_key_hash_string(path);
+ struct hash_entry *h =
+ _mesa_hash_table_search_pre_hashed(state->remap_table, hash, path);
+
+ if (h) {
+ var = (nir_variable *)h->data;
+ } else {
+ var = nir_variable_create(state->shader, nir_var_uniform, deref->deref.type, path);
+ var->data.binding = binding;
+ _mesa_hash_table_insert_pre_hashed(state->remap_table, hash, path, var);
+ }
+
+ deref->var = var;
+}
+
+static bool
+lower_sampler(nir_tex_instr *instr, struct lower_samplers_as_deref_state *state,
+ nir_builder *b)
+{
+ /* In GLSL, we only fill out the texture field. The sampler is inferred */
+ assert(instr->texture != NULL);
+ assert(instr->sampler == NULL);
+
+ b->cursor = nir_before_instr(&instr->instr);
+ lower_deref(instr->texture, state, b);
+
+ if (instr->op != nir_texop_txf_ms &&
+ instr->op != nir_texop_txf_ms_mcs &&
+ instr->op != nir_texop_samples_identical) {
+ nir_instr_rewrite_deref(&instr->instr, &instr->sampler,
+ nir_deref_var_clone(instr->texture, instr));
+ } else {
+ assert(!instr->sampler);
+ }
+
+ return true;
+}
+
+static bool
+lower_intrinsic(nir_intrinsic_instr *instr,
+ struct lower_samplers_as_deref_state *state,
+ nir_builder *b)
+{
+ if (instr->intrinsic == nir_intrinsic_image_load ||
+ instr->intrinsic == nir_intrinsic_image_store ||
+ instr->intrinsic == nir_intrinsic_image_atomic_add ||
+ instr->intrinsic == nir_intrinsic_image_atomic_min ||
+ instr->intrinsic == nir_intrinsic_image_atomic_max ||
+ instr->intrinsic == nir_intrinsic_image_atomic_and ||
+ instr->intrinsic == nir_intrinsic_image_atomic_or ||
+ instr->intrinsic == nir_intrinsic_image_atomic_xor ||
+ instr->intrinsic == nir_intrinsic_image_atomic_exchange ||
+ instr->intrinsic == nir_intrinsic_image_atomic_comp_swap ||
+ instr->intrinsic == nir_intrinsic_image_size) {
+ b->cursor = nir_before_instr(&instr->instr);
+ lower_deref(instr->variables[0], state, b);
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+lower_impl(nir_function_impl *impl, struct lower_samplers_as_deref_state *state)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ bool progress = false;
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type == nir_instr_type_tex)
+ progress |= lower_sampler(nir_instr_as_tex(instr), state, &b);
+ else if (instr->type == nir_instr_type_intrinsic)
+ progress |= lower_intrinsic(nir_instr_as_intrinsic(instr), state, &b);
+ }
+ }
+
+ return progress;
+}
+
+bool
+nir_lower_samplers_as_deref(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
+{
+ bool progress = false;
+ struct lower_samplers_as_deref_state state;
+
+ state.shader = shader;
+ state.shader_program = shader_program;
+ state.remap_table = _mesa_hash_table_create(NULL, _mesa_key_hash_string,
+ _mesa_key_string_equal);
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl, &state);
+ }
+
+ /* keys are freed automatically by ralloc */
+ _mesa_hash_table_destroy(state.remap_table, NULL);
+
+ return progress;
+}
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_samplers.c mesa-17.3.3/src/compiler/nir/nir_lower_samplers.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_samplers.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_samplers.c 2018-01-18 21:30:28.000000000 +0000
@@ -109,32 +109,9 @@
assert(array_elements >= 1);
indirect = nir_umin(b, indirect, nir_imm_int(b, array_elements - 1));
- /* First, we have to resize the array of texture sources */
- nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
- instr->num_srcs + 2);
-
- for (unsigned i = 0; i < instr->num_srcs; i++) {
- new_srcs[i].src_type = instr->src[i].src_type;
- nir_instr_move_src(&instr->instr, &new_srcs[i].src,
- &instr->src[i].src);
- }
-
- ralloc_free(instr->src);
- instr->src = new_srcs;
-
- /* Now we can go ahead and move the source over to being a
- * first-class texture source.
- */
- instr->src[instr->num_srcs].src_type = nir_tex_src_texture_offset;
- instr->num_srcs++;
- nir_instr_rewrite_src(&instr->instr,
- &instr->src[instr->num_srcs - 1].src,
+ nir_tex_instr_add_src(instr, nir_tex_src_texture_offset,
nir_src_for_ssa(indirect));
-
- instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
- instr->num_srcs++;
- nir_instr_rewrite_src(&instr->instr,
- &instr->src[instr->num_srcs - 1].src,
+ nir_tex_instr_add_src(instr, nir_tex_src_sampler_offset,
nir_src_for_ssa(indirect));
instr->texture_array_size = array_elements;
@@ -180,7 +157,8 @@
nir_foreach_function(function, shader) {
if (function->impl)
- progress |= lower_impl(function->impl, shader_program, shader->stage);
+ progress |= lower_impl(function->impl, shader_program,
+ shader->info.stage);
}
return progress;
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_tex.c mesa-17.3.3/src/compiler/nir/nir_lower_tex.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_tex.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_tex.c 2018-01-18 21:30:28.000000000 +0000
@@ -813,6 +813,20 @@
progress = true;
continue;
}
+
+ /* TXF, TXS and TXL require a LOD but not everything we implement using those
+ * three opcodes provides one. Provide a default LOD of 0.
+ */
+ if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
+ (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
+ tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
+ (tex->op == nir_texop_tex &&
+ b->shader->info.stage != MESA_SHADER_FRAGMENT))) {
+ b->cursor = nir_before_instr(&tex->instr);
+ nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
+ progress = true;
+ continue;
+ }
}
return progress;
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_two_sided_color.c mesa-17.3.3/src/compiler/nir/nir_lower_two_sided_color.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_two_sided_color.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_two_sided_color.c 2018-01-18 21:30:28.000000000 +0000
@@ -193,7 +193,7 @@
.shader = shader,
};
- if (shader->stage != MESA_SHADER_FRAGMENT)
+ if (shader->info.stage != MESA_SHADER_FRAGMENT)
return;
if (setup_inputs(&state) != 0)
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_uniforms_to_ubo.c mesa-17.3.3/src/compiler/nir/nir_lower_uniforms_to_ubo.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_uniforms_to_ubo.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_uniforms_to_ubo.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Remap load_uniform intrinsics to UBO accesses of UBO binding point 0. Both
+ * the base and the offset are interpreted as 16-byte units.
+ *
+ * Simultaneously, remap existing UBO accesses by increasing their binding
+ * point by 1.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static bool
+lower_instr(nir_intrinsic_instr *instr, nir_builder *b)
+{
+ b->cursor = nir_before_instr(&instr->instr);
+
+ if (instr->intrinsic == nir_intrinsic_load_ubo) {
+ nir_ssa_def *old_idx = nir_ssa_for_src(b, instr->src[0], 1);
+ nir_ssa_def *new_idx = nir_iadd(b, old_idx, nir_imm_int(b, 1));
+ nir_instr_rewrite_src(&instr->instr, &instr->src[0],
+ nir_src_for_ssa(new_idx));
+ return true;
+ }
+
+ if (instr->intrinsic == nir_intrinsic_load_uniform) {
+ nir_ssa_def *ubo_idx = nir_imm_int(b, 0);
+ nir_ssa_def *ubo_offset =
+ nir_imul(b, nir_imm_int(b, 16),
+ nir_iadd(b, nir_imm_int(b, nir_intrinsic_base(instr)),
+ nir_ssa_for_src(b, instr->src[0], 1)));
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
+ load->num_components = instr->num_components;
+ load->src[0] = nir_src_for_ssa(ubo_idx);
+ load->src[1] = nir_src_for_ssa(ubo_offset);
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ load->num_components, instr->dest.ssa.bit_size,
+ instr->dest.ssa.name);
+ nir_builder_instr_insert(b, &load->instr);
+ nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
+
+ nir_instr_remove(&instr->instr);
+ return true;
+ }
+
+ return false;
+}
+
+bool
+nir_lower_uniforms_to_ubo(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl) {
+ nir_builder builder;
+ nir_builder_init(&builder, function->impl);
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_intrinsic)
+ progress |= lower_instr(nir_instr_as_intrinsic(instr),
+ &builder);
+ }
+ }
+
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+
+ return progress;
+}
+
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_wpos_center.c mesa-17.3.3/src/compiler/nir/nir_lower_wpos_center.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_wpos_center.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_wpos_center.c 2018-01-18 21:30:28.000000000 +0000
@@ -105,7 +105,7 @@
bool progress = false;
nir_builder b;
- assert(shader->stage == MESA_SHADER_FRAGMENT);
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
nir_foreach_function(function, shader) {
if (function->impl) {
diff -Nru mesa-17.2.4/src/compiler/nir/nir_lower_wpos_ytransform.c mesa-17.3.3/src/compiler/nir/nir_lower_wpos_ytransform.c
--- mesa-17.2.4/src/compiler/nir/nir_lower_wpos_ytransform.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_lower_wpos_ytransform.c 2018-01-18 21:30:28.000000000 +0000
@@ -302,8 +302,10 @@
nir_deref_var *dvar = intr->variables[0];
nir_variable *var = dvar->var;
- if (var->data.mode == nir_var_shader_in &&
- var->data.location == VARYING_SLOT_POS) {
+ if ((var->data.mode == nir_var_shader_in &&
+ var->data.location == VARYING_SLOT_POS) ||
+ (var->data.mode == nir_var_system_value &&
+ var->data.location == SYSTEM_VALUE_FRAG_COORD)) {
/* gl_FragCoord should not have array/struct derefs: */
assert(dvar->deref.child == NULL);
lower_fragcoord(state, intr);
@@ -346,7 +348,7 @@
.shader = shader,
};
- assert(shader->stage == MESA_SHADER_FRAGMENT);
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
nir_foreach_function(function, shader) {
if (function->impl)
diff -Nru mesa-17.2.4/src/compiler/nir/nir_metadata.c mesa-17.3.3/src/compiler/nir/nir_metadata.c
--- mesa-17.2.4/src/compiler/nir/nir_metadata.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_metadata.c 2018-01-18 21:30:28.000000000 +0000
@@ -59,7 +59,7 @@
impl->valid_metadata &= preserved;
}
-#ifdef DEBUG
+#ifndef NDEBUG
/**
* Make sure passes properly invalidate metadata (part 1).
*
diff -Nru mesa-17.2.4/src/compiler/nir/nir_opcodes.py mesa-17.3.3/src/compiler/nir/nir_opcodes.py
--- mesa-17.2.4/src/compiler/nir/nir_opcodes.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_opcodes.py 2018-01-18 21:30:28.000000000 +0000
@@ -156,7 +156,7 @@
"((src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f))"))
unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
-unop("fabs", tfloat, "bit_size == 64 ? fabs(src0) : fabsf(src0)")
+unop("fabs", tfloat, "fabs(src0)")
unop("fsat", tfloat, ("bit_size == 64 ? " +
"((src0 > 1.0) ? 1.0 : ((src0 <= 0.0) ? 0.0 : src0)) : " +
"((src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0))"))
@@ -717,12 +717,12 @@
unsigned base = src0, insert = src1;
int offset = src2, bits = src3;
if (bits == 0) {
- dst = 0;
+ dst = base;
} else if (offset < 0 || bits < 0 || bits + offset > 32) {
dst = 0;
} else {
unsigned mask = ((1ull << bits) - 1) << offset;
- dst = (base & ~mask) | ((insert << bits) & mask);
+ dst = (base & ~mask) | ((insert << offset) & mask);
}
""")
diff -Nru mesa-17.2.4/src/compiler/nir/nir_opt_intrinsics.c mesa-17.3.3/src/compiler/nir/nir_opt_intrinsics.c
--- mesa-17.2.4/src/compiler/nir/nir_opt_intrinsics.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_opt_intrinsics.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,6 +28,26 @@
* \file nir_opt_intrinsics.c
*/
+static nir_ssa_def *
+high_subgroup_mask(nir_builder *b,
+ nir_ssa_def *count,
+ uint64_t base_mask)
+{
+ /* group_mask could probably be calculated more efficiently but we want to
+ * be sure not to shift by 64 if the subgroup size is 64 because the GLSL
+ * shift operator is undefined in that case. In any case if we were worried
+ * about efficency this should probably be done further down because the
+ * subgroup size is likely to be known at compile time.
+ */
+ nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
+ nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
+ nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
+ nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
+ nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
+
+ return nir_iand(b, higher_bits, group_mask);
+}
+
static bool
opt_intrinsics_impl(nir_function_impl *impl)
{
@@ -95,10 +115,10 @@
replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
break;
case nir_intrinsic_load_subgroup_ge_mask:
- replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count);
+ replacement = high_subgroup_mask(&b, count, ~0ull);
break;
case nir_intrinsic_load_subgroup_gt_mask:
- replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count);
+ replacement = high_subgroup_mask(&b, count, ~1ull);
break;
case nir_intrinsic_load_subgroup_le_mask:
replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count));
diff -Nru mesa-17.2.4/src/compiler/nir/nir_opt_loop_unroll.c mesa-17.3.3/src/compiler/nir/nir_opt_loop_unroll.c
--- mesa-17.2.4/src/compiler/nir/nir_opt_loop_unroll.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_opt_loop_unroll.c 2018-01-18 21:30:28.000000000 +0000
@@ -33,8 +33,10 @@
* to give about the same results. Around 5 instructions per node. But some
* loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
* we set it to 26.
+ * This was bumped to 96 because it unrolled more loops with a positive
+ * effect (vulkan ssao demo).
*/
-#define LOOP_UNROLL_LIMIT 26
+#define LOOP_UNROLL_LIMIT 96
/* Prepare this loop for unrolling by first converting to lcssa and then
* converting the phis from the loops first block and the block that follows
diff -Nru mesa-17.2.4/src/compiler/nir/nir_print.c mesa-17.3.3/src/compiler/nir/nir_print.c
--- mesa-17.2.4/src/compiler/nir/nir_print.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_print.c 2018-01-18 21:30:28.000000000 +0000
@@ -416,7 +416,7 @@
const char *loc = NULL;
char buf[4];
- switch (state->shader->stage) {
+ switch (state->shader->info.stage) {
case MESA_SHADER_VERTEX:
if (var->data.mode == nir_var_shader_in)
loc = gl_vert_attrib_name(var->data.location);
@@ -447,7 +447,31 @@
loc = buf;
}
- fprintf(fp, " (%s, %u, %u)%s", loc, var->data.driver_location, var->data.binding,
+ /* For shader I/O vars that have been split to components or packed,
+ * print the fractional location within the input/output.
+ */
+ unsigned int num_components =
+ glsl_get_components(glsl_without_array(var->type));
+ const char *components = NULL;
+ char components_local[6] = {'.' /* the rest is 0-filled */};
+ switch (var->data.mode) {
+ case nir_var_shader_in:
+ case nir_var_shader_out:
+ if (num_components < 4 && num_components != 0) {
+ const char *xyzw = "xyzw";
+ for (int i = 0; i < num_components; i++)
+ components_local[i + 1] = xyzw[i + var->data.location_frac];
+
+ components = components_local;
+ }
+ break;
+ default:
+ break;
+ }
+
+ fprintf(fp, " (%s%s, %u, %u)%s", loc,
+ components ? components : "",
+ var->data.driver_location, var->data.binding,
var->data.compact ? " compact" : "");
}
@@ -1157,7 +1181,7 @@
state.annotations = annotations;
- fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
+ fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->info.stage));
if (shader->info.name)
fprintf(fp, "name: %s\n", shader->info.name);
@@ -1165,7 +1189,7 @@
if (shader->info.label)
fprintf(fp, "label: %s\n", shader->info.label);
- switch (shader->stage) {
+ switch (shader->info.stage) {
case MESA_SHADER_COMPUTE:
fprintf(fp, "local-size: %u, %u, %u%s\n",
shader->info.cs.local_size[0],
diff -Nru mesa-17.2.4/src/compiler/nir/nir_remove_dead_variables.c mesa-17.3.3/src/compiler/nir/nir_remove_dead_variables.c
--- mesa-17.2.4/src/compiler/nir/nir_remove_dead_variables.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_remove_dead_variables.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,7 +28,8 @@
#include "nir.h"
static void
-add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live)
+add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live,
+ nir_variable_mode modes)
{
unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
@@ -47,6 +48,14 @@
break;
}
+ /* This pass can't be used on I/O variables after they've been lowered. */
+ case nir_intrinsic_load_input:
+ assert(!(modes & nir_var_shader_in));
+ break;
+ case nir_intrinsic_store_output:
+ assert(!(modes & nir_var_shader_out));
+ break;
+
default:
for (unsigned i = 0; i < num_vars; i++) {
_mesa_set_add(live, instr->variables[i]->var);
@@ -84,7 +93,7 @@
}
static void
-add_var_use_shader(nir_shader *shader, struct set *live)
+add_var_use_shader(nir_shader *shader, struct set *live, nir_variable_mode modes)
{
nir_foreach_function(function, shader) {
if (function->impl) {
@@ -92,7 +101,8 @@
nir_foreach_instr(instr, block) {
switch(instr->type) {
case nir_instr_type_intrinsic:
- add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live);
+ add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live,
+ modes);
break;
case nir_instr_type_call:
@@ -162,7 +172,7 @@
struct set *live =
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
- add_var_use_shader(shader, live);
+ add_var_use_shader(shader, live, modes);
if (modes & nir_var_uniform)
progress = remove_dead_vars(&shader->uniforms, live) || progress;
diff -Nru mesa-17.2.4/src/compiler/nir/nir_validate.c mesa-17.3.3/src/compiler/nir/nir_validate.c
--- mesa-17.2.4/src/compiler/nir/nir_validate.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir/nir_validate.c 2018-01-18 21:30:28.000000000 +0000
@@ -35,7 +35,7 @@
/* Since this file is just a pile of asserts, don't bother compiling it if
* we're not building a debug build.
*/
-#ifdef DEBUG
+#ifndef NDEBUG
/*
* Per-register validation state.
@@ -973,7 +973,7 @@
assert(glsl_type_is_array(var->type));
const struct glsl_type *type = glsl_get_array_element(var->type);
- if (nir_is_per_vertex_io(var, state->shader->stage)) {
+ if (nir_is_per_vertex_io(var, state->shader->info.stage)) {
assert(glsl_type_is_array(type));
assert(glsl_type_is_scalar(glsl_get_array_element(type)));
} else {
diff -Nru mesa-17.2.4/src/compiler/nir_types.cpp mesa-17.3.3/src/compiler/nir_types.cpp
--- mesa-17.2.4/src/compiler/nir_types.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir_types.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -49,6 +49,13 @@
}
const glsl_type *
+glsl_get_array_instance(const glsl_type *type,
+ unsigned array_size)
+{
+ return glsl_type::get_array_instance(type, array_size);
+}
+
+const glsl_type *
glsl_get_struct_field(const glsl_type *type, unsigned index)
{
return type->fields.structure[index].type;
@@ -143,6 +150,12 @@
}
bool
+glsl_type_is_64bit(const glsl_type *type)
+{
+ return type->is_64bit();
+}
+
+bool
glsl_type_is_void(const glsl_type *type)
{
return type->is_void();
@@ -384,3 +397,30 @@
return glsl_type::get_instance(type->base_type, type->matrix_columns,
type->vector_elements);
}
+
+const glsl_type *
+glsl_channel_type(const glsl_type *t)
+{
+ switch (glsl_get_base_type(t)) {
+ case GLSL_TYPE_ARRAY: {
+ const glsl_type *base = glsl_channel_type(glsl_get_array_element(t));
+ return glsl_array_type(base, glsl_get_length(t));
+ }
+ case GLSL_TYPE_UINT:
+ return glsl_uint_type();
+ case GLSL_TYPE_INT:
+ return glsl_int_type();
+ case GLSL_TYPE_FLOAT:
+ return glsl_float_type();
+ case GLSL_TYPE_BOOL:
+ return glsl_bool_type();
+ case GLSL_TYPE_DOUBLE:
+ return glsl_double_type();
+ case GLSL_TYPE_UINT64:
+ return glsl_uint64_t_type();
+ case GLSL_TYPE_INT64:
+ return glsl_int64_t_type();
+ default:
+ unreachable("Unhandled base type glsl_channel_type()");
+ }
+}
diff -Nru mesa-17.2.4/src/compiler/nir_types.h mesa-17.3.3/src/compiler/nir_types.h
--- mesa-17.2.4/src/compiler/nir_types.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/nir_types.h 2018-01-18 21:30:28.000000000 +0000
@@ -48,6 +48,8 @@
const struct glsl_type *glsl_get_array_element(const struct glsl_type *type);
const struct glsl_type *glsl_without_array(const struct glsl_type *type);
+const struct glsl_type *glsl_get_array_instance(const struct glsl_type *type,
+ unsigned array_size);
const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
@@ -104,6 +106,7 @@
return 0;
}
+bool glsl_type_is_64bit(const struct glsl_type *type);
bool glsl_type_is_void(const struct glsl_type *type);
bool glsl_type_is_error(const struct glsl_type *type);
bool glsl_type_is_vector(const struct glsl_type *type);
@@ -160,6 +163,8 @@
const struct glsl_type *glsl_transposed_type(const struct glsl_type *type);
+const struct glsl_type *glsl_channel_type(const struct glsl_type *type);
+
#ifdef __cplusplus
}
#endif
diff -Nru mesa-17.2.4/src/compiler/shader_enums.h mesa-17.3.3/src/compiler/shader_enums.h
--- mesa-17.2.4/src/compiler/shader_enums.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/shader_enums.h 2018-01-18 21:30:28.000000000 +0000
@@ -567,6 +567,13 @@
INTERP_MODE_COUNT /**< Number of interpolation qualifiers */
};
+enum glsl_interface_packing {
+ GLSL_INTERFACE_PACKING_STD140,
+ GLSL_INTERFACE_PACKING_SHARED,
+ GLSL_INTERFACE_PACKING_PACKED,
+ GLSL_INTERFACE_PACKING_STD430
+};
+
const char *glsl_interp_mode_name(enum glsl_interp_mode qual);
/**
@@ -663,6 +670,23 @@
TESS_SPACING_FRACTIONAL_EVEN,
};
+/**
+ * A compare function enum for use in compiler lowering passes. This is in
+ * the same order as GL's compare functions (shifted down by GL_NEVER), and is
+ * exactly the same as gallium's PIPE_FUNC_*.
+ */
+enum compare_func
+{
+ COMPARE_FUNC_NEVER,
+ COMPARE_FUNC_LESS,
+ COMPARE_FUNC_EQUAL,
+ COMPARE_FUNC_LEQUAL,
+ COMPARE_FUNC_GREATER,
+ COMPARE_FUNC_NOTEQUAL,
+ COMPARE_FUNC_GEQUAL,
+ COMPARE_FUNC_ALWAYS,
+};
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff -Nru mesa-17.2.4/src/compiler/shader_info.h mesa-17.3.3/src/compiler/shader_info.h
--- mesa-17.2.4/src/compiler/shader_info.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/shader_info.h 2018-01-18 21:30:28.000000000 +0000
@@ -70,6 +70,9 @@
/* Whether or not this shader ever uses textureGather() */
bool uses_texture_gather;
+ /** Bitfield of which textures are used by texelFetch() */
+ uint32_t textures_used_by_txf;
+
/* The size of the gl_ClipDistance[] array, if declared. */
unsigned clip_distance_array_size;
diff -Nru mesa-17.2.4/src/compiler/spirv/spirv_to_nir.c mesa-17.3.3/src/compiler/spirv/spirv_to_nir.c
--- mesa-17.2.4/src/compiler/spirv/spirv_to_nir.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/spirv/spirv_to_nir.c 2018-01-18 21:30:28.000000000 +0000
@@ -262,7 +262,7 @@
if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) {
val->ext_handler = vtn_handle_glsl450_instruction;
} else {
- assert(!"Unsupported extension");
+ unreachable("Unsupported extension");
}
break;
}
@@ -724,7 +724,7 @@
case SpvImageFormatR16ui: return 0x8234; /* GL_R16UI */
case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */
default:
- assert(!"Invalid image format");
+ unreachable("Invalid image format");
return 0;
}
}
@@ -919,7 +919,7 @@
else if (dim == GLSL_SAMPLER_DIM_SUBPASS)
dim = GLSL_SAMPLER_DIM_SUBPASS_MS;
else
- assert(!"Unsupported multisampled image type");
+ unreachable("Unsupported multisampled image type");
}
val->type->image_format = translate_image_format(format);
@@ -934,7 +934,7 @@
val->type->type = glsl_image_type(dim, is_array,
glsl_get_base_type(sampled_type));
} else {
- assert(!"We need to know if the image will be sampled");
+ unreachable("We need to know if the image will be sampled");
}
break;
}
@@ -1378,7 +1378,7 @@
break;
case SpvOpConstantSampler:
- assert(!"OpConstantSampler requires Kernel Capability");
+ unreachable("OpConstantSampler requires Kernel Capability");
break;
default:
@@ -1490,6 +1490,8 @@
struct vtn_value *val =
vtn_push_value(b, w[2], vtn_value_type_sampled_image);
val->sampled_image = ralloc(b, struct vtn_sampled_image);
+ val->sampled_image->type =
+ vtn_value(b, w[1], vtn_value_type_type)->type;
val->sampled_image->image =
vtn_value(b, w[3], vtn_value_type_pointer)->pointer;
val->sampled_image->sampler =
@@ -1516,16 +1518,12 @@
sampled = *sampled_val->sampled_image;
} else {
assert(sampled_val->value_type == vtn_value_type_pointer);
+ sampled.type = sampled_val->pointer->type;
sampled.image = NULL;
sampled.sampler = sampled_val->pointer;
}
- const struct glsl_type *image_type;
- if (sampled.image) {
- image_type = sampled.image->var->var->interface_type;
- } else {
- image_type = sampled.sampler->var->var->interface_type;
- }
+ const struct glsl_type *image_type = sampled.type->type;
const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image_type);
const bool is_array = glsl_sampler_type_is_array(image_type);
const bool is_shadow = glsl_sampler_type_is_shadow(image_type);
@@ -1757,6 +1755,7 @@
case nir_texop_txb:
case nir_texop_txl:
case nir_texop_txd:
+ case nir_texop_tg4:
/* These operations require a sampler */
instr->sampler = nir_deref_var_clone(sampler, instr);
break;
@@ -1764,7 +1763,6 @@
case nir_texop_txf_ms:
case nir_texop_txs:
case nir_texop_lod:
- case nir_texop_tg4:
case nir_texop_query_levels:
case nir_texop_texture_samples:
case nir_texop_samples_identical:
@@ -2627,7 +2625,7 @@
case SpvExecutionModeOutputTriangleStrip:
return 5; /* GL_TRIANGLE_STRIP */
default:
- assert(!"Invalid primitive type");
+ unreachable("Invalid primitive type");
return 4;
}
}
@@ -2647,7 +2645,7 @@
case SpvExecutionModeInputTrianglesAdjacency:
return 6;
default:
- assert(!"Invalid GS input mode");
+ unreachable("Invalid GS input mode");
return 0;
}
}
@@ -2707,6 +2705,7 @@
case SpvCapabilitySampled1D:
case SpvCapabilityImage1D:
case SpvCapabilitySampledCubeArray:
+ case SpvCapabilityImageCubeArray:
case SpvCapabilitySampledBuffer:
case SpvCapabilityImageBuffer:
case SpvCapabilityImageQuery:
@@ -2730,7 +2729,6 @@
case SpvCapabilityAtomicStorage:
case SpvCapabilityInt16:
case SpvCapabilityStorageImageMultisample:
- case SpvCapabilityImageCubeArray:
case SpvCapabilityInt8:
case SpvCapabilitySparseResidency:
case SpvCapabilityMinLod:
@@ -2802,7 +2800,8 @@
case SpvOpMemoryModel:
assert(w[1] == SpvAddressingModelLogical);
- assert(w[2] == SpvMemoryModelGLSL450);
+ assert(w[2] == SpvMemoryModelSimple ||
+ w[2] == SpvMemoryModelGLSL450);
break;
case SpvOpEntryPoint: {
@@ -2863,34 +2862,34 @@
break;
case SpvExecutionModeEarlyFragmentTests:
- assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
b->shader->info.fs.early_fragment_tests = true;
break;
case SpvExecutionModeInvocations:
- assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
b->shader->info.gs.invocations = MAX2(1, mode->literals[0]);
break;
case SpvExecutionModeDepthReplacing:
- assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
break;
case SpvExecutionModeDepthGreater:
- assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER;
break;
case SpvExecutionModeDepthLess:
- assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS;
break;
case SpvExecutionModeDepthUnchanged:
- assert(b->shader->stage == MESA_SHADER_FRAGMENT);
+ assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
break;
case SpvExecutionModeLocalSize:
- assert(b->shader->stage == MESA_SHADER_COMPUTE);
+ assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
b->shader->info.cs.local_size[0] = mode->literals[0];
b->shader->info.cs.local_size[1] = mode->literals[1];
b->shader->info.cs.local_size[2] = mode->literals[2];
@@ -2899,11 +2898,11 @@
break; /* Nothing to do with this */
case SpvExecutionModeOutputVertices:
- if (b->shader->stage == MESA_SHADER_TESS_CTRL ||
- b->shader->stage == MESA_SHADER_TESS_EVAL) {
+ if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
b->shader->info.tess.tcs_vertices_out = mode->literals[0];
} else {
- assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
b->shader->info.gs.vertices_out = mode->literals[0];
}
break;
@@ -2915,12 +2914,12 @@
case SpvExecutionModeInputTrianglesAdjacency:
case SpvExecutionModeQuads:
case SpvExecutionModeIsolines:
- if (b->shader->stage == MESA_SHADER_TESS_CTRL ||
- b->shader->stage == MESA_SHADER_TESS_EVAL) {
+ if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
b->shader->info.tess.primitive_mode =
gl_primitive_from_spv_execution_mode(mode->exec_mode);
} else {
- assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
b->shader->info.gs.vertices_in =
vertices_in_from_spv_execution_mode(mode->exec_mode);
}
@@ -2929,44 +2928,39 @@
case SpvExecutionModeOutputPoints:
case SpvExecutionModeOutputLineStrip:
case SpvExecutionModeOutputTriangleStrip:
- assert(b->shader->stage == MESA_SHADER_GEOMETRY);
+ assert(b->shader->info.stage == MESA_SHADER_GEOMETRY);
b->shader->info.gs.output_primitive =
gl_primitive_from_spv_execution_mode(mode->exec_mode);
break;
case SpvExecutionModeSpacingEqual:
- assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
- b->shader->stage == MESA_SHADER_TESS_EVAL);
+ assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ b->shader->info.stage == MESA_SHADER_TESS_EVAL);
b->shader->info.tess.spacing = TESS_SPACING_EQUAL;
break;
case SpvExecutionModeSpacingFractionalEven:
- assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
- b->shader->stage == MESA_SHADER_TESS_EVAL);
+ assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ b->shader->info.stage == MESA_SHADER_TESS_EVAL);
b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_EVEN;
break;
case SpvExecutionModeSpacingFractionalOdd:
- assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
- b->shader->stage == MESA_SHADER_TESS_EVAL);
+ assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ b->shader->info.stage == MESA_SHADER_TESS_EVAL);
b->shader->info.tess.spacing = TESS_SPACING_FRACTIONAL_ODD;
break;
case SpvExecutionModeVertexOrderCw:
- assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
- b->shader->stage == MESA_SHADER_TESS_EVAL);
- /* Vulkan's notion of CCW seems to match the hardware backends,
- * but be the opposite of OpenGL. Currently NIR follows GL semantics,
- * so we set it backwards here.
- */
- b->shader->info.tess.ccw = true;
+ assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ b->shader->info.stage == MESA_SHADER_TESS_EVAL);
+ b->shader->info.tess.ccw = false;
break;
case SpvExecutionModeVertexOrderCcw:
- assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
- b->shader->stage == MESA_SHADER_TESS_EVAL);
- /* Backwards; see above */
- b->shader->info.tess.ccw = false;
+ assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ b->shader->info.stage == MESA_SHADER_TESS_EVAL);
+ b->shader->info.tess.ccw = true;
break;
case SpvExecutionModePointMode:
- assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
- b->shader->stage == MESA_SHADER_TESS_EVAL);
+ assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
+ b->shader->info.stage == MESA_SHADER_TESS_EVAL);
b->shader->info.tess.point_mode = true;
break;
@@ -2975,7 +2969,7 @@
break;
case SpvExecutionModeXfb:
- assert(!"Unhandled execution mode");
+ unreachable("Unhandled execution mode");
break;
case SpvExecutionModeVecTypeHint:
@@ -3009,7 +3003,7 @@
case SpvOpMemberDecorate:
case SpvOpGroupDecorate:
case SpvOpGroupMemberDecorate:
- assert(!"Invalid opcode types and variables section");
+ unreachable("Invalid opcode types and variables section");
break;
case SpvOpTypeVoid:
diff -Nru mesa-17.2.4/src/compiler/spirv/vtn_cfg.c mesa-17.3.3/src/compiler/spirv/vtn_cfg.c
--- mesa-17.2.4/src/compiler/spirv/vtn_cfg.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/spirv/vtn_cfg.c 2018-01-18 21:30:28.000000000 +0000
@@ -356,8 +356,16 @@
switch_case, switch_break,
loop_break, loop_cont);
- if (if_stmt->then_type == vtn_branch_type_none &&
- if_stmt->else_type == vtn_branch_type_none) {
+ if (then_block == else_block) {
+ block->branch_type = if_stmt->then_type;
+ if (block->branch_type == vtn_branch_type_none) {
+ block = then_block;
+ continue;
+ } else {
+ return;
+ }
+ } else if (if_stmt->then_type == vtn_branch_type_none &&
+ if_stmt->else_type == vtn_branch_type_none) {
/* Neither side of the if is something we can short-circuit. */
assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
struct vtn_block *merge_block =
diff -Nru mesa-17.2.4/src/compiler/spirv/vtn_private.h mesa-17.3.3/src/compiler/spirv/vtn_private.h
--- mesa-17.2.4/src/compiler/spirv/vtn_private.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/spirv/vtn_private.h 2018-01-18 21:30:28.000000000 +0000
@@ -411,6 +411,7 @@
};
struct vtn_sampled_image {
+ struct vtn_type *type;
struct vtn_pointer *image; /* Image or array of images */
struct vtn_pointer *sampler; /* Sampler */
};
diff -Nru mesa-17.2.4/src/compiler/spirv/vtn_variables.c mesa-17.3.3/src/compiler/spirv/vtn_variables.c
--- mesa-17.2.4/src/compiler/spirv/vtn_variables.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/compiler/spirv/vtn_variables.c 2018-01-18 21:30:28.000000000 +0000
@@ -102,10 +102,15 @@
if (link.mode == vtn_access_mode_literal) {
return nir_imm_int(&b->nb, link.id * stride);
} else if (stride == 1) {
- return vtn_ssa_value(b, link.id)->def;
+ nir_ssa_def *ssa = vtn_ssa_value(b, link.id)->def;
+ if (ssa->bit_size != 32)
+ ssa = nir_u2u32(&b->nb, ssa);
+ return ssa;
} else {
- return nir_imul(&b->nb, vtn_ssa_value(b, link.id)->def,
- nir_imm_int(&b->nb, stride));
+ nir_ssa_def *src0 = vtn_ssa_value(b, link.id)->def;
+ if (src0->bit_size != 32)
+ src0 = nir_u2u32(&b->nb, src0);
+ return nir_imul(&b->nb, src0, nir_imm_int(&b->nb, stride));
}
}
@@ -513,35 +518,37 @@
*index_out = get_vulkan_resource_index(b, ptr, &type, &idx);
nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
- for (; idx < ptr->chain->length; idx++) {
- enum glsl_base_type base_type = glsl_get_base_type(type->type);
- switch (base_type) {
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_INT:
- case GLSL_TYPE_UINT64:
- case GLSL_TYPE_INT64:
- case GLSL_TYPE_FLOAT:
- case GLSL_TYPE_DOUBLE:
- case GLSL_TYPE_BOOL:
- case GLSL_TYPE_ARRAY:
- offset = nir_iadd(&b->nb, offset,
- vtn_access_link_as_ssa(b, ptr->chain->link[idx],
- type->stride));
-
- type = type->array_element;
- break;
-
- case GLSL_TYPE_STRUCT: {
- assert(ptr->chain->link[idx].mode == vtn_access_mode_literal);
- unsigned member = ptr->chain->link[idx].id;
- offset = nir_iadd(&b->nb, offset,
- nir_imm_int(&b->nb, type->offsets[member]));
- type = type->members[member];
- break;
- }
+ if (ptr->chain) {
+ for (; idx < ptr->chain->length; idx++) {
+ enum glsl_base_type base_type = glsl_get_base_type(type->type);
+ switch (base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT64:
+ case GLSL_TYPE_INT64:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_ARRAY:
+ offset = nir_iadd(&b->nb, offset,
+ vtn_access_link_as_ssa(b, ptr->chain->link[idx],
+ type->stride));
+
+ type = type->array_element;
+ break;
+
+ case GLSL_TYPE_STRUCT: {
+ assert(ptr->chain->link[idx].mode == vtn_access_mode_literal);
+ unsigned member = ptr->chain->link[idx].id;
+ offset = nir_iadd(&b->nb, offset,
+ nir_imm_int(&b->nb, type->offsets[member]));
+ type = type->members[member];
+ break;
+ }
- default:
- unreachable("Invalid type for deref");
+ default:
+ unreachable("Invalid type for deref");
+ }
}
}
@@ -599,7 +606,7 @@
return type->stride * glsl_get_length(type->type);
default:
- assert(!"Invalid block type");
+ unreachable("Invalid block type");
return 0;
}
}
@@ -825,7 +832,7 @@
&access_offset, &access_size);
break;
default:
- assert(!"Invalid block variable mode");
+ unreachable("Invalid block variable mode");
}
nir_ssa_def *offset, *index = NULL;
@@ -1043,7 +1050,7 @@
set_mode_system_value(mode);
break;
case SpvBuiltInPrimitiveId:
- if (b->shader->stage == MESA_SHADER_FRAGMENT) {
+ if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
assert(*mode == nir_var_shader_in);
*location = VARYING_SLOT_PRIMITIVE_ID;
} else if (*mode == nir_var_shader_out) {
@@ -1059,18 +1066,18 @@
break;
case SpvBuiltInLayer:
*location = VARYING_SLOT_LAYER;
- if (b->shader->stage == MESA_SHADER_FRAGMENT)
+ if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
*mode = nir_var_shader_in;
- else if (b->shader->stage == MESA_SHADER_GEOMETRY)
+ else if (b->shader->info.stage == MESA_SHADER_GEOMETRY)
*mode = nir_var_shader_out;
else
unreachable("invalid stage for SpvBuiltInLayer");
break;
case SpvBuiltInViewportIndex:
*location = VARYING_SLOT_VIEWPORT;
- if (b->shader->stage == MESA_SHADER_GEOMETRY)
+ if (b->shader->info.stage == MESA_SHADER_GEOMETRY)
*mode = nir_var_shader_out;
- else if (b->shader->stage == MESA_SHADER_FRAGMENT)
+ else if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
*mode = nir_var_shader_in;
else
unreachable("invalid stage for SpvBuiltInViewportIndex");
@@ -1350,11 +1357,11 @@
if (dec->decoration == SpvDecorationLocation) {
unsigned location = dec->literals[0];
bool is_vertex_input;
- if (b->shader->stage == MESA_SHADER_FRAGMENT &&
+ if (b->shader->info.stage == MESA_SHADER_FRAGMENT &&
vtn_var->mode == vtn_variable_mode_output) {
is_vertex_input = false;
location += FRAG_RESULT_DATA0;
- } else if (b->shader->stage == MESA_SHADER_VERTEX &&
+ } else if (b->shader->info.stage == MESA_SHADER_VERTEX &&
vtn_var->mode == vtn_variable_mode_input) {
is_vertex_input = true;
location += VERT_ATTRIB_GENERIC0;
@@ -1425,7 +1432,7 @@
mode = vtn_variable_mode_ssbo;
nir_mode = 0;
} else {
- assert(!"Invalid uniform variable type");
+ unreachable("Invalid uniform variable type");
}
break;
case SpvStorageClassStorageBuffer:
@@ -1440,7 +1447,7 @@
mode = vtn_variable_mode_sampler;
nir_mode = nir_var_uniform;
} else {
- assert(!"Invalid uniform constant variable type");
+ unreachable("Invalid uniform constant variable type");
}
break;
case SpvStorageClassPushConstant:
@@ -1648,7 +1655,7 @@
int array_length = -1;
struct vtn_type *interface_type = var->type;
- if (is_per_vertex_inout(var, b->shader->stage)) {
+ if (is_per_vertex_inout(var, b->shader->info.stage)) {
/* In Geometry shaders (and some tessellation), inputs come
* in per-vertex arrays. However, some builtins come in
* non-per-vertex, hence the need for the is_array check. In
@@ -1800,6 +1807,7 @@
struct vtn_value *val =
vtn_push_value(b, w[2], vtn_value_type_sampled_image);
val->sampled_image = ralloc(b, struct vtn_sampled_image);
+ val->sampled_image->type = base_val->sampled_image->type;
val->sampled_image->image =
vtn_pointer_dereference(b, base_val->sampled_image->image, chain);
val->sampled_image->sampler = base_val->sampled_image->sampler;
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/egl_dri2.c mesa-17.3.3/src/egl/drivers/dri2/egl_dri2.c
--- mesa-17.2.4/src/egl/drivers/dri2/egl_dri2.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/egl_dri2.c 2018-01-18 21:30:28.000000000 +0000
@@ -25,8 +25,6 @@
* Kristian Høgsberg
*/
-#define WL_HIDE_DEPRECATED
-
#include
#include
#include
@@ -46,7 +44,6 @@
#endif
#include
#include
-#include "GL/mesa_glinterop.h"
#include
#include
@@ -61,9 +58,11 @@
#endif
#include "egl_dri2.h"
+#include "GL/mesa_glinterop.h"
#include "loader/loader.h"
#include "util/u_atomic.h"
#include "util/u_vector.h"
+#include "mapi/glapi/glapi.h"
/* The kernel header drm_fourcc.h defines the DRM formats below. We duplicate
* some of the definitions here so that building Mesa won't bleeding-edge
@@ -93,6 +92,8 @@
#define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1)
#endif
+#define NUM_ATTRIBS 10
+
static void
dri_set_background_context(void *loaderPrivate)
{
@@ -102,6 +103,26 @@
_eglBindContextToThread(ctx, t);
}
+static void
+dri2_gl_flush()
+{
+ static void (*glFlush)(void);
+ static mtx_t glFlushMutex = _MTX_INITIALIZER_NP;
+
+ mtx_lock(&glFlushMutex);
+ if (!glFlush)
+ glFlush = _glapi_get_proc_address("glFlush");
+ mtx_unlock(&glFlushMutex);
+
+ /* if glFlush is not available things are horribly broken */
+ if (!glFlush) {
+ _eglLog(_EGL_WARNING, "DRI2: failed to find glFlush entry point");
+ return;
+ }
+
+ glFlush();
+}
+
static GLboolean
dri_is_thread_safe(void *loaderPrivate)
{
@@ -435,6 +456,7 @@
{ __DRI2_FENCE, 1, offsetof(struct dri2_egl_display, fence) },
{ __DRI2_RENDERER_QUERY, 1, offsetof(struct dri2_egl_display, rendererQuery) },
{ __DRI2_INTEROP, 1, offsetof(struct dri2_egl_display, interop) },
+ { __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) },
{ NULL, 0, 0 }
};
@@ -667,6 +689,11 @@
disp->Extensions.KHR_no_config_context = EGL_TRUE;
disp->Extensions.KHR_surfaceless_context = EGL_TRUE;
+ /* Report back to EGL the bitmask of priorities supported */
+ disp->Extensions.IMG_context_priority =
+ dri2_renderer_query_integer(dri2_dpy,
+ __DRI2_RENDERER_HAS_CONTEXT_PRIORITY);
+
if (dri2_renderer_query_integer(dri2_dpy,
__DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
@@ -741,6 +768,41 @@
}
}
+void
+dri2_setup_swap_interval(_EGLDisplay *disp, int max_swap_interval)
+{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+ GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
+
+ /* Allow driconf to override applications.*/
+ if (dri2_dpy->config)
+ dri2_dpy->config->configQueryi(dri2_dpy->dri_screen,
+ "vblank_mode", &vblank_mode);
+ switch (vblank_mode) {
+ case DRI_CONF_VBLANK_NEVER:
+ dri2_dpy->min_swap_interval = 0;
+ dri2_dpy->max_swap_interval = 0;
+ dri2_dpy->default_swap_interval = 0;
+ break;
+ case DRI_CONF_VBLANK_ALWAYS_SYNC:
+ dri2_dpy->min_swap_interval = 1;
+ dri2_dpy->max_swap_interval = max_swap_interval;
+ dri2_dpy->default_swap_interval = 1;
+ break;
+ case DRI_CONF_VBLANK_DEF_INTERVAL_0:
+ dri2_dpy->min_swap_interval = 0;
+ dri2_dpy->max_swap_interval = max_swap_interval;
+ dri2_dpy->default_swap_interval = 0;
+ break;
+ default:
+ case DRI_CONF_VBLANK_DEF_INTERVAL_1:
+ dri2_dpy->min_swap_interval = 0;
+ dri2_dpy->max_swap_interval = max_swap_interval;
+ dri2_dpy->default_swap_interval = 1;
+ break;
+ }
+}
+
/* All platforms but DRM call this function to create the screen and populate
* the driver_configs. DRM inherits that information from its display - GBM.
*/
@@ -842,14 +904,6 @@
return EGL_TRUE;
}
- /* not until swrast_dri is supported */
- if (disp->Options.UseFallback)
- return EGL_FALSE;
-
- /* Nothing to initialize for a test only display */
- if (disp->Options.TestOnly)
- return EGL_TRUE;
-
switch (disp->Platform) {
#ifdef HAVE_SURFACELESS_PLATFORM
case _EGL_PLATFORM_SURFACELESS:
@@ -881,17 +935,13 @@
return EGL_FALSE;
}
- if (ret) {
- dri2_dpy = dri2_egl_display(disp);
-
- if (!dri2_dpy) {
- return EGL_FALSE;
- }
+ if (!ret)
+ return EGL_FALSE;
- dri2_dpy->ref_count++;
- }
+ dri2_dpy = dri2_egl_display(disp);
+ dri2_dpy->ref_count++;
- return ret;
+ return EGL_TRUE;
}
/**
@@ -922,8 +972,11 @@
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
- if (dri2_dpy->own_dri_screen)
+ if (dri2_dpy->own_dri_screen) {
+ if (dri2_dpy->vtbl->close_screen_notify)
+ dri2_dpy->vtbl->close_screen_notify(disp);
dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
+ }
if (dri2_dpy->fd >= 0)
close(dri2_dpy->fd);
if (dri2_dpy->driver)
@@ -988,6 +1041,40 @@
disp->DriverData = NULL;
}
+__DRIbuffer *
+dri2_egl_surface_alloc_local_buffer(struct dri2_egl_surface *dri2_surf,
+ unsigned int att, unsigned int format)
+{
+ struct dri2_egl_display *dri2_dpy =
+ dri2_egl_display(dri2_surf->base.Resource.Display);
+
+ if (att >= ARRAY_SIZE(dri2_surf->local_buffers))
+ return NULL;
+
+ if (!dri2_surf->local_buffers[att]) {
+ dri2_surf->local_buffers[att] =
+ dri2_dpy->dri2->allocateBuffer(dri2_dpy->dri_screen, att, format,
+ dri2_surf->base.Width, dri2_surf->base.Height);
+ }
+
+ return dri2_surf->local_buffers[att];
+}
+
+void
+dri2_egl_surface_free_local_buffers(struct dri2_egl_surface *dri2_surf)
+{
+ struct dri2_egl_display *dri2_dpy =
+ dri2_egl_display(dri2_surf->base.Resource.Display);
+
+ for (int i = 0; i < ARRAY_SIZE(dri2_surf->local_buffers); i++) {
+ if (dri2_surf->local_buffers[i]) {
+ dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen,
+ dri2_surf->local_buffers[i]);
+ dri2_surf->local_buffers[i] = NULL;
+ }
+ }
+}
+
/**
* Called via eglTerminate(), drv->API.Terminate().
*
@@ -1077,7 +1164,7 @@
{
int pos = 0;
- assert(*num_attribs >= 8);
+ assert(*num_attribs >= NUM_ATTRIBS);
ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
ctx_attribs[pos++] = dri2_ctx->base.ClientMajorVersion;
@@ -1114,6 +1201,28 @@
ctx_attribs[pos++] = __DRI_CTX_RESET_LOSE_CONTEXT;
}
+ if (dri2_ctx->base.ContextPriority != EGL_CONTEXT_PRIORITY_MEDIUM_IMG) {
+ unsigned val;
+
+ switch (dri2_ctx->base.ContextPriority) {
+ case EGL_CONTEXT_PRIORITY_HIGH_IMG:
+ val = __DRI_CTX_PRIORITY_HIGH;
+ break;
+ case EGL_CONTEXT_PRIORITY_MEDIUM_IMG:
+ val = __DRI_CTX_PRIORITY_MEDIUM;
+ break;
+ case EGL_CONTEXT_PRIORITY_LOW_IMG:
+ val = __DRI_CTX_PRIORITY_LOW;
+ break;
+ default:
+ _eglError(EGL_BAD_CONFIG, "eglCreateContext");
+ return false;
+ }
+
+ ctx_attribs[pos++] = __DRI_CTX_ATTRIB_PRIORITY;
+ ctx_attribs[pos++] = val;
+ }
+
*num_attribs = pos;
return true;
@@ -1228,8 +1337,8 @@
if (dri2_dpy->image_driver) {
unsigned error;
- unsigned num_attribs = 8;
- uint32_t ctx_attribs[8];
+ unsigned num_attribs = NUM_ATTRIBS;
+ uint32_t ctx_attribs[NUM_ATTRIBS];
if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
&num_attribs))
@@ -1248,8 +1357,8 @@
} else if (dri2_dpy->dri2) {
if (dri2_dpy->dri2->base.version >= 3) {
unsigned error;
- unsigned num_attribs = 8;
- uint32_t ctx_attribs[8];
+ unsigned num_attribs = NUM_ATTRIBS;
+ uint32_t ctx_attribs[NUM_ATTRIBS];
if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
&num_attribs))
@@ -1277,8 +1386,8 @@
assert(dri2_dpy->swrast);
if (dri2_dpy->swrast->base.version >= 3) {
unsigned error;
- unsigned num_attribs = 8;
- uint32_t ctx_attribs[8];
+ unsigned num_attribs = NUM_ATTRIBS;
+ uint32_t ctx_attribs[NUM_ATTRIBS];
if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
&num_attribs))
@@ -1331,6 +1440,45 @@
return EGL_TRUE;
}
+EGLBoolean
+dri2_init_surface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
+ _EGLConfig *conf, const EGLint *attrib_list, EGLBoolean enable_out_fence)
+{
+ struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+
+ dri2_surf->out_fence_fd = -1;
+ dri2_surf->enable_out_fence = false;
+ if (dri2_dpy->fence && dri2_dpy->fence->base.version >= 2 &&
+ dri2_dpy->fence->get_capabilities &&
+ (dri2_dpy->fence->get_capabilities(dri2_dpy->dri_screen) &
+ __DRI_FENCE_CAP_NATIVE_FD)) {
+ dri2_surf->enable_out_fence = enable_out_fence;
+ }
+
+ return _eglInitSurface(surf, dpy, type, conf, attrib_list);
+}
+
+static void
+dri2_surface_set_out_fence_fd( _EGLSurface *surf, int fence_fd)
+{
+ struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
+
+ if (dri2_surf->out_fence_fd >= 0)
+ close(dri2_surf->out_fence_fd);
+
+ dri2_surf->out_fence_fd = fence_fd;
+}
+
+void
+dri2_fini_surface(_EGLSurface *surf)
+{
+ struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
+
+ dri2_surface_set_out_fence_fd(surf, -1);
+ dri2_surf->enable_out_fence = false;
+}
+
static EGLBoolean
dri2_destroy_surface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
{
@@ -1342,6 +1490,28 @@
return dri2_dpy->vtbl->destroy_surface(drv, dpy, surf);
}
+static void
+dri2_surf_update_fence_fd(_EGLContext *ctx,
+ _EGLDisplay *dpy, _EGLSurface *surf)
+{
+ __DRIcontext *dri_ctx = dri2_egl_context(ctx)->dri_context;
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+ struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
+ int fence_fd = -1;
+ void *fence;
+
+ if (!dri2_surf->enable_out_fence)
+ return;
+
+ fence = dri2_dpy->fence->create_fence_fd(dri_ctx, -1);
+ if (fence) {
+ fence_fd = dri2_dpy->fence->get_fence_fd(dri2_dpy->dri_screen,
+ fence);
+ dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, fence);
+ }
+ dri2_surface_set_out_fence_fd(surf, fence_fd);
+}
+
/**
* Called via eglMakeCurrent(), drv->API.MakeCurrent().
*/
@@ -1349,7 +1519,6 @@
dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
_EGLSurface *rsurf, _EGLContext *ctx)
{
- struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
_EGLContext *old_ctx;
@@ -1370,7 +1539,7 @@
/* flush before context switch */
if (old_ctx)
- dri2_drv->glFlush();
+ dri2_gl_flush();
ddraw = (dsurf) ? dri2_dpy->vtbl->get_dri_drawable(dsurf) : NULL;
rdraw = (rsurf) ? dri2_dpy->vtbl->get_dri_drawable(rsurf) : NULL;
@@ -1378,6 +1547,9 @@
if (old_ctx) {
__DRIcontext *old_cctx = dri2_egl_context(old_ctx)->dri_context;
+
+ if (old_dsurf)
+ dri2_surf_update_fence_fd(old_ctx, disp, old_dsurf);
dri2_dpy->core->unbindContext(old_cctx);
}
@@ -1433,9 +1605,7 @@
static _EGLProc
dri2_get_proc_address(_EGLDriver *drv, const char *procname)
{
- struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
-
- return dri2_drv->get_proc_address(procname);
+ return _glapi_get_proc_address(procname);
}
static _EGLSurface*
@@ -1471,6 +1641,8 @@
EGLint interval)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+ if (!dri2_dpy->vtbl->swap_interval)
+ return EGL_TRUE;
return dri2_dpy->vtbl->swap_interval(drv, dpy, surf, interval);
}
@@ -1514,6 +1686,10 @@
dri2_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+ _EGLContext *ctx = _eglGetCurrentContext();
+
+ if (ctx && surf)
+ dri2_surf_update_fence_fd(ctx, dpy, surf);
return dri2_dpy->vtbl->swap_buffers(drv, dpy, surf);
}
@@ -1523,6 +1699,10 @@
const EGLint *rects, EGLint n_rects)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+ _EGLContext *ctx = _eglGetCurrentContext();
+
+ if (ctx && surf)
+ dri2_surf_update_fence_fd(ctx, dpy, surf);
return dri2_dpy->vtbl->swap_buffers_with_damage(drv, dpy, surf,
rects, n_rects);
}
@@ -1707,6 +1887,29 @@
return &dri2_img->base;
}
+/**
+ * Translate a DRI Image extension error code into an EGL error code.
+ */
+static EGLint
+egl_error_from_dri_image_error(int dri_error)
+{
+ switch (dri_error) {
+ case __DRI_IMAGE_ERROR_SUCCESS:
+ return EGL_SUCCESS;
+ case __DRI_IMAGE_ERROR_BAD_ALLOC:
+ return EGL_BAD_ALLOC;
+ case __DRI_IMAGE_ERROR_BAD_MATCH:
+ return EGL_BAD_MATCH;
+ case __DRI_IMAGE_ERROR_BAD_PARAMETER:
+ return EGL_BAD_PARAMETER;
+ case __DRI_IMAGE_ERROR_BAD_ACCESS:
+ return EGL_BAD_ACCESS;
+ default:
+ assert(0);
+ return EGL_BAD_ALLOC;
+ }
+}
+
static _EGLImage *
dri2_create_image_khr_renderbuffer(_EGLDisplay *disp, _EGLContext *ctx,
EGLClientBuffer buffer,
@@ -1727,9 +1930,27 @@
return EGL_NO_IMAGE_KHR;
}
- dri_image =
- dri2_dpy->image->createImageFromRenderbuffer(dri2_ctx->dri_context,
- renderbuffer, NULL);
+ if (dri2_dpy->image->base.version >= 17 &&
+ dri2_dpy->image->createImageFromRenderbuffer2) {
+ unsigned error = ~0;
+
+ dri_image = dri2_dpy->image->createImageFromRenderbuffer2(
+ dri2_ctx->dri_context, renderbuffer, NULL, &error);
+
+ assert(!!dri_image == (error == __DRI_IMAGE_ERROR_SUCCESS));
+
+ if (!dri_image) {
+ _eglError(egl_error_from_dri_image_error(error), "dri2_create_image_khr");
+ return EGL_NO_IMAGE_KHR;
+ }
+ } else {
+ dri_image = dri2_dpy->image->createImageFromRenderbuffer(
+ dri2_ctx->dri_context, renderbuffer, NULL);
+ if (!dri_image) {
+ _eglError(EGL_BAD_ALLOC, "dri2_create_image_khr");
+ return EGL_NO_IMAGE_KHR;
+ }
+ }
return dri2_create_image_from_dri(disp, dri_image);
}
@@ -1809,35 +2030,10 @@
static void
dri2_create_image_khr_texture_error(int dri_error)
{
- EGLint egl_error;
-
- switch (dri_error) {
- case __DRI_IMAGE_ERROR_SUCCESS:
- return;
-
- case __DRI_IMAGE_ERROR_BAD_ALLOC:
- egl_error = EGL_BAD_ALLOC;
- break;
-
- case __DRI_IMAGE_ERROR_BAD_MATCH:
- egl_error = EGL_BAD_MATCH;
- break;
-
- case __DRI_IMAGE_ERROR_BAD_PARAMETER:
- egl_error = EGL_BAD_PARAMETER;
- break;
-
- case __DRI_IMAGE_ERROR_BAD_ACCESS:
- egl_error = EGL_BAD_ACCESS;
- break;
-
- default:
- assert(0);
- egl_error = EGL_BAD_MATCH;
- break;
- }
+ EGLint egl_error = egl_error_from_dri_image_error(dri_error);
- _eglError(egl_error, "dri2_create_image_khr_texture");
+ if (egl_error != EGL_SUCCESS)
+ _eglError(egl_error, "dri2_create_image_khr_texture");
}
static _EGLImage *
@@ -2133,6 +2329,24 @@
return 0;
}
+ for (unsigned i = plane_n; i < DMA_BUF_MAX_PLANES; i++) {
+ /**
+ * The modifiers extension spec says:
+ *
+ * "Modifiers may modify any attribute of a buffer import, including
+ * but not limited to adding extra planes to a format which
+ * otherwise does not have those planes. As an example, a modifier
+ * may add a plane for an external compression buffer to a
+ * single-plane format. The exact meaning and effect of any
+ * modifier is canonically defined by drm_fourcc.h, not as part of
+ * this extension."
+ */
+ if (attrs->DMABufPlaneModifiersLo[i].IsPresent &&
+ attrs->DMABufPlaneModifiersHi[i].IsPresent) {
+ plane_n = i + 1;
+ }
+ }
+
/**
* The spec says:
*
@@ -2159,25 +2373,7 @@
for (unsigned i = plane_n; i < DMA_BUF_MAX_PLANES; ++i) {
if (attrs->DMABufPlaneFds[i].IsPresent ||
attrs->DMABufPlaneOffsets[i].IsPresent ||
- attrs->DMABufPlanePitches[i].IsPresent ||
- attrs->DMABufPlaneModifiersLo[i].IsPresent ||
- attrs->DMABufPlaneModifiersHi[i].IsPresent) {
-
- /**
- * The modifiers extension spec says:
- *
- * "Modifiers may modify any attribute of a buffer import, including
- * but not limited to adding extra planes to a format which
- * otherwise does not have those planes. As an example, a modifier
- * may add a plane for an external compression buffer to a
- * single-plane format. The exact meaning and effect of any
- * modifier is canonically defined by drm_fourcc.h, not as part of
- * this extension."
- */
- if (attrs->DMABufPlaneModifiersLo[i].IsPresent &&
- attrs->DMABufPlaneModifiersHi[i].IsPresent)
- continue;
-
+ attrs->DMABufPlanePitches[i].IsPresent) {
_eglError(EGL_BAD_ATTRIBUTE, "too many plane attributes");
return 0;
}
@@ -2589,17 +2785,16 @@
dri2_dpy->image->destroyImage(buffer->driver_buffer);
}
-static struct wayland_drm_callbacks wl_drm_callbacks = {
- .authenticate = NULL,
- .reference_buffer = dri2_wl_reference_buffer,
- .release_buffer = dri2_wl_release_buffer
-};
-
static EGLBoolean
dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay *disp,
struct wl_display *wl_dpy)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+ const struct wayland_drm_callbacks wl_drm_callbacks = {
+ .authenticate = (int(*)(void *, uint32_t)) dri2_dpy->vtbl->authenticate,
+ .reference_buffer = dri2_wl_reference_buffer,
+ .release_buffer = dri2_wl_release_buffer
+ };
int flags = 0;
uint64_t cap;
@@ -2608,9 +2803,6 @@
if (dri2_dpy->wl_server_drm)
return EGL_FALSE;
- wl_drm_callbacks.authenticate =
- (int(*)(void *, uint32_t)) dri2_dpy->vtbl->authenticate;
-
if (drmGetCap(dri2_dpy->fd, DRM_CAP_PRIME, &cap) == 0 &&
cap == (DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT) &&
dri2_dpy->image->base.version >= 7 &&
@@ -2871,7 +3063,6 @@
EGLint flags, EGLTime timeout)
{
_EGLContext *ctx = _eglGetCurrentContext();
- struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync);
@@ -2910,7 +3101,7 @@
if (dri2_ctx && dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR &&
(flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)) {
/* flush context if EGL_SYNC_FLUSH_COMMANDS_BIT_KHR is set */
- dri2_drv->glFlush();
+ dri2_gl_flush();
}
/* if timeout is EGL_FOREVER_KHR, it should wait without any timeout.*/
@@ -3023,136 +3214,68 @@
return dri2_dpy->interop->export_object(dri2_ctx->dri_context, in, out);
}
-static void
-dri2_unload(_EGLDriver *drv)
-{
- struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
-
- dlclose(dri2_drv->handle);
- free(dri2_drv);
-}
-
-static EGLBoolean
-dri2_load(_EGLDriver *drv)
-{
- struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
-#ifdef HAVE_ANDROID_PLATFORM
- const char *libname = "libglapi.so";
-#elif defined(__APPLE__)
- const char *libname = "libglapi.0.dylib";
-#elif defined(__CYGWIN__)
- const char *libname = "cygglapi-0.dll";
-#else
- const char *libname = "libglapi.so.0";
-#endif
- void *handle;
-
- /* RTLD_GLOBAL to make sure glapi symbols are visible to DRI drivers */
- handle = dlopen(libname, RTLD_LAZY | RTLD_GLOBAL);
- if (!handle) {
- _eglLog(_EGL_WARNING, "DRI2: failed to open glapi provider");
- goto no_handle;
- }
-
- dri2_drv->get_proc_address = (_EGLProc (*)(const char *))
- dlsym(handle, "_glapi_get_proc_address");
-
- /* if glapi is not available, loading DRI drivers will fail */
- if (!dri2_drv->get_proc_address) {
- _eglLog(_EGL_WARNING, "DRI2: failed to find _glapi_get_proc_address");
- goto no_symbol;
- }
-
- dri2_drv->glFlush = (void (*)(void))
- dri2_drv->get_proc_address("glFlush");
-
- /* if glFlush is not available things are horribly broken */
- if (!dri2_drv->glFlush) {
- _eglLog(_EGL_WARNING, "DRI2: failed to find glFlush entry point");
- goto no_symbol;
- }
-
- dri2_drv->handle = handle;
- return EGL_TRUE;
-
-no_symbol:
- dlclose(handle);
-no_handle:
- return EGL_FALSE;
-}
-
/**
* This is the main entrypoint into the driver, called by libEGL.
* Create a new _EGLDriver object and init its dispatch table.
*/
_EGLDriver *
-_eglBuiltInDriverDRI2(const char *args)
+_eglBuiltInDriver(void)
{
- struct dri2_egl_driver *dri2_drv;
-
- (void) args;
-
- dri2_drv = calloc(1, sizeof *dri2_drv);
+ _EGLDriver *dri2_drv = calloc(1, sizeof *dri2_drv);
if (!dri2_drv)
return NULL;
- if (!dri2_load(&dri2_drv->base)) {
- free(dri2_drv);
- return NULL;
- }
-
- _eglInitDriverFallbacks(&dri2_drv->base);
- dri2_drv->base.API.Initialize = dri2_initialize;
- dri2_drv->base.API.Terminate = dri2_terminate;
- dri2_drv->base.API.CreateContext = dri2_create_context;
- dri2_drv->base.API.DestroyContext = dri2_destroy_context;
- dri2_drv->base.API.MakeCurrent = dri2_make_current;
- dri2_drv->base.API.CreateWindowSurface = dri2_create_window_surface;
- dri2_drv->base.API.CreatePixmapSurface = dri2_create_pixmap_surface;
- dri2_drv->base.API.CreatePbufferSurface = dri2_create_pbuffer_surface;
- dri2_drv->base.API.DestroySurface = dri2_destroy_surface;
- dri2_drv->base.API.GetProcAddress = dri2_get_proc_address;
- dri2_drv->base.API.WaitClient = dri2_wait_client;
- dri2_drv->base.API.WaitNative = dri2_wait_native;
- dri2_drv->base.API.BindTexImage = dri2_bind_tex_image;
- dri2_drv->base.API.ReleaseTexImage = dri2_release_tex_image;
- dri2_drv->base.API.SwapInterval = dri2_swap_interval;
- dri2_drv->base.API.SwapBuffers = dri2_swap_buffers;
- dri2_drv->base.API.SwapBuffersWithDamageEXT = dri2_swap_buffers_with_damage;
- dri2_drv->base.API.SwapBuffersRegionNOK = dri2_swap_buffers_region;
- dri2_drv->base.API.SetDamageRegion = dri2_set_damage_region;
- dri2_drv->base.API.PostSubBufferNV = dri2_post_sub_buffer;
- dri2_drv->base.API.CopyBuffers = dri2_copy_buffers,
- dri2_drv->base.API.QueryBufferAge = dri2_query_buffer_age;
- dri2_drv->base.API.CreateImageKHR = dri2_create_image;
- dri2_drv->base.API.DestroyImageKHR = dri2_destroy_image_khr;
- dri2_drv->base.API.CreateWaylandBufferFromImageWL = dri2_create_wayland_buffer_from_image;
- dri2_drv->base.API.QuerySurface = dri2_query_surface;
+ _eglInitDriverFallbacks(dri2_drv);
+ dri2_drv->API.Initialize = dri2_initialize;
+ dri2_drv->API.Terminate = dri2_terminate;
+ dri2_drv->API.CreateContext = dri2_create_context;
+ dri2_drv->API.DestroyContext = dri2_destroy_context;
+ dri2_drv->API.MakeCurrent = dri2_make_current;
+ dri2_drv->API.CreateWindowSurface = dri2_create_window_surface;
+ dri2_drv->API.CreatePixmapSurface = dri2_create_pixmap_surface;
+ dri2_drv->API.CreatePbufferSurface = dri2_create_pbuffer_surface;
+ dri2_drv->API.DestroySurface = dri2_destroy_surface;
+ dri2_drv->API.GetProcAddress = dri2_get_proc_address;
+ dri2_drv->API.WaitClient = dri2_wait_client;
+ dri2_drv->API.WaitNative = dri2_wait_native;
+ dri2_drv->API.BindTexImage = dri2_bind_tex_image;
+ dri2_drv->API.ReleaseTexImage = dri2_release_tex_image;
+ dri2_drv->API.SwapInterval = dri2_swap_interval;
+ dri2_drv->API.SwapBuffers = dri2_swap_buffers;
+ dri2_drv->API.SwapBuffersWithDamageEXT = dri2_swap_buffers_with_damage;
+ dri2_drv->API.SwapBuffersRegionNOK = dri2_swap_buffers_region;
+ dri2_drv->API.SetDamageRegion = dri2_set_damage_region;
+ dri2_drv->API.PostSubBufferNV = dri2_post_sub_buffer;
+ dri2_drv->API.CopyBuffers = dri2_copy_buffers,
+ dri2_drv->API.QueryBufferAge = dri2_query_buffer_age;
+ dri2_drv->API.CreateImageKHR = dri2_create_image;
+ dri2_drv->API.DestroyImageKHR = dri2_destroy_image_khr;
+ dri2_drv->API.CreateWaylandBufferFromImageWL = dri2_create_wayland_buffer_from_image;
+ dri2_drv->API.QuerySurface = dri2_query_surface;
#ifdef HAVE_LIBDRM
- dri2_drv->base.API.CreateDRMImageMESA = dri2_create_drm_image_mesa;
- dri2_drv->base.API.ExportDRMImageMESA = dri2_export_drm_image_mesa;
- dri2_drv->base.API.ExportDMABUFImageQueryMESA = dri2_export_dma_buf_image_query_mesa;
- dri2_drv->base.API.ExportDMABUFImageMESA = dri2_export_dma_buf_image_mesa;
- dri2_drv->base.API.QueryDmaBufFormatsEXT = dri2_query_dma_buf_formats;
- dri2_drv->base.API.QueryDmaBufModifiersEXT = dri2_query_dma_buf_modifiers;
+ dri2_drv->API.CreateDRMImageMESA = dri2_create_drm_image_mesa;
+ dri2_drv->API.ExportDRMImageMESA = dri2_export_drm_image_mesa;
+ dri2_drv->API.ExportDMABUFImageQueryMESA = dri2_export_dma_buf_image_query_mesa;
+ dri2_drv->API.ExportDMABUFImageMESA = dri2_export_dma_buf_image_mesa;
+ dri2_drv->API.QueryDmaBufFormatsEXT = dri2_query_dma_buf_formats;
+ dri2_drv->API.QueryDmaBufModifiersEXT = dri2_query_dma_buf_modifiers;
#endif
#ifdef HAVE_WAYLAND_PLATFORM
- dri2_drv->base.API.BindWaylandDisplayWL = dri2_bind_wayland_display_wl;
- dri2_drv->base.API.UnbindWaylandDisplayWL = dri2_unbind_wayland_display_wl;
- dri2_drv->base.API.QueryWaylandBufferWL = dri2_query_wayland_buffer_wl;
-#endif
- dri2_drv->base.API.GetSyncValuesCHROMIUM = dri2_get_sync_values_chromium;
- dri2_drv->base.API.CreateSyncKHR = dri2_create_sync;
- dri2_drv->base.API.ClientWaitSyncKHR = dri2_client_wait_sync;
- dri2_drv->base.API.SignalSyncKHR = dri2_signal_sync;
- dri2_drv->base.API.WaitSyncKHR = dri2_server_wait_sync;
- dri2_drv->base.API.DestroySyncKHR = dri2_destroy_sync;
- dri2_drv->base.API.GLInteropQueryDeviceInfo = dri2_interop_query_device_info;
- dri2_drv->base.API.GLInteropExportObject = dri2_interop_export_object;
- dri2_drv->base.API.DupNativeFenceFDANDROID = dri2_dup_native_fence_fd;
+ dri2_drv->API.BindWaylandDisplayWL = dri2_bind_wayland_display_wl;
+ dri2_drv->API.UnbindWaylandDisplayWL = dri2_unbind_wayland_display_wl;
+ dri2_drv->API.QueryWaylandBufferWL = dri2_query_wayland_buffer_wl;
+#endif
+ dri2_drv->API.GetSyncValuesCHROMIUM = dri2_get_sync_values_chromium;
+ dri2_drv->API.CreateSyncKHR = dri2_create_sync;
+ dri2_drv->API.ClientWaitSyncKHR = dri2_client_wait_sync;
+ dri2_drv->API.SignalSyncKHR = dri2_signal_sync;
+ dri2_drv->API.WaitSyncKHR = dri2_server_wait_sync;
+ dri2_drv->API.DestroySyncKHR = dri2_destroy_sync;
+ dri2_drv->API.GLInteropQueryDeviceInfo = dri2_interop_query_device_info;
+ dri2_drv->API.GLInteropExportObject = dri2_interop_export_object;
+ dri2_drv->API.DupNativeFenceFDANDROID = dri2_dup_native_fence_fd;
- dri2_drv->base.Name = "DRI2";
- dri2_drv->base.Unload = dri2_unload;
+ dri2_drv->Name = "DRI2";
- return &dri2_drv->base;
+ return dri2_drv;
}
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/egl_dri2_fallbacks.h mesa-17.3.3/src/egl/drivers/dri2/egl_dri2_fallbacks.h
--- mesa-17.2.4/src/egl/drivers/dri2/egl_dri2_fallbacks.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/egl_dri2_fallbacks.h 2018-01-18 21:30:28.000000000 +0000
@@ -56,20 +56,6 @@
}
static inline EGLBoolean
-dri2_fallback_swap_interval(_EGLDriver *drv, _EGLDisplay *dpy,
- _EGLSurface *surf, EGLint interval)
-{
- if (interval > surf->Config->MaxSwapInterval)
- interval = surf->Config->MaxSwapInterval;
- else if (interval < surf->Config->MinSwapInterval)
- interval = surf->Config->MinSwapInterval;
-
- surf->SwapInterval = interval;
-
- return EGL_TRUE;
-}
-
-static inline EGLBoolean
dri2_fallback_swap_buffers_with_damage(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLSurface *surf,
const EGLint *rects, EGLint n_rects)
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/egl_dri2.h mesa-17.3.3/src/egl/drivers/dri2/egl_dri2.h
--- mesa-17.2.4/src/egl/drivers/dri2/egl_dri2.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/egl_dri2.h 2018-01-18 21:30:28.000000000 +0000
@@ -44,7 +44,7 @@
#ifdef HAVE_WAYLAND_PLATFORM
#include
-#include "wayland-egl-priv.h"
+#include "wayland-egl-backend.h"
/* forward declarations of protocol elements */
struct zwp_linux_dmabuf_v1;
#endif
@@ -79,15 +79,6 @@
struct wl_buffer;
-struct dri2_egl_driver
-{
- _EGLDriver base;
-
- void *handle;
- _EGLProc (*get_proc_address)(const char *procname);
- void (*glFlush)(void);
-};
-
struct dri2_egl_display_vtbl {
int (*authenticate)(_EGLDisplay *disp, uint32_t id);
@@ -154,6 +145,8 @@
EGLuint64KHR *sbc);
__DRIdrawable *(*get_dri_drawable)(_EGLSurface *surf);
+
+ void (*close_screen_notify)(_EGLDisplay *dpy);
};
struct dri2_egl_display
@@ -283,8 +276,10 @@
struct gbm_dri_surface *gbm_surf;
#endif
+ /* EGL-owned buffers */
+ __DRIbuffer *local_buffers[__DRI_BUFFER_COUNT];
+
#if defined(HAVE_WAYLAND_PLATFORM) || defined(HAVE_DRM_PLATFORM)
- __DRIbuffer *dri_buffers[__DRI_BUFFER_COUNT];
struct {
#ifdef HAVE_WAYLAND_PLATFORM
struct wl_buffer *wl_buffer;
@@ -309,9 +304,6 @@
__DRIimage *dri_image_back;
__DRIimage *dri_image_front;
- /* EGL-owned buffers */
- __DRIbuffer *local_buffers[__DRI_BUFFER_COUNT];
-
/* Used to record all the buffers created by ANativeWindow and their ages.
* Usually Android uses at most triple buffers in ANativeWindow
* so hardcode the number of color_buffers to 3.
@@ -326,6 +318,8 @@
__DRIimage *front;
unsigned int visual;
#endif
+ int out_fence_fd;
+ EGLBoolean enable_out_fence;
};
struct dri2_egl_config
@@ -370,6 +364,9 @@
void
dri2_setup_screen(_EGLDisplay *disp);
+void
+dri2_setup_swap_interval(_EGLDisplay *disp, int max_swap_interval);
+
EGLBoolean
dri2_load_driver_swrast(_EGLDisplay *disp);
@@ -451,4 +448,18 @@
void
dri2_display_destroy(_EGLDisplay *disp);
+__DRIbuffer *
+dri2_egl_surface_alloc_local_buffer(struct dri2_egl_surface *dri2_surf,
+ unsigned int att, unsigned int format);
+
+void
+dri2_egl_surface_free_local_buffers(struct dri2_egl_surface *dri2_surf);
+
+EGLBoolean
+dri2_init_surface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
+ _EGLConfig *conf, const EGLint *attrib_list, EGLBoolean enable_out_fence);
+
+void
+dri2_fini_surface(_EGLSurface *surf);
+
#endif /* EGL_DRI2_INCLUDED */
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h mesa-17.3.3/src/egl/drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h
--- mesa-17.2.4/src/egl/drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h 2017-10-30 14:50:58.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h 2018-01-18 21:31:11.000000000 +0000
@@ -1,4 +1,4 @@
-/* Generated by wayland-scanner 1.14.0 */
+/* Generated by wayland-scanner 1.14.90 */
#ifndef LINUX_DMABUF_UNSTABLE_V1_CLIENT_PROTOCOL_H
#define LINUX_DMABUF_UNSTABLE_V1_CLIENT_PROTOCOL_H
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/linux-dmabuf-unstable-v1-protocol.c mesa-17.3.3/src/egl/drivers/dri2/linux-dmabuf-unstable-v1-protocol.c
--- mesa-17.2.4/src/egl/drivers/dri2/linux-dmabuf-unstable-v1-protocol.c 2017-10-30 14:50:58.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/linux-dmabuf-unstable-v1-protocol.c 2018-01-18 21:31:11.000000000 +0000
@@ -1,4 +1,4 @@
-/* Generated by wayland-scanner 1.14.0 */
+/* Generated by wayland-scanner 1.14.90 */
/*
* Copyright © 2014, 2015 Collabora, Ltd.
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/platform_android.c mesa-17.3.3/src/egl/drivers/dri2/platform_android.c
--- mesa-17.2.4/src/egl/drivers/dri2/platform_android.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/platform_android.c 2018-01-18 21:30:28.000000000 +0000
@@ -229,19 +229,18 @@
*/
mtx_unlock(&disp->Mutex);
- /* Queue the buffer without a sync fence. This informs the ANativeWindow
- * that it may access the buffer immediately.
+ /* Queue the buffer with stored out fence fd. The ANativeWindow or buffer
+ * consumer may choose to wait for the fence to signal before accessing
+ * it. If fence fd value is -1, buffer can be accessed by consumer
+ * immediately. Consumer or application shouldn't rely on timestamp
+ * associated with fence if the fence fd is -1.
*
- * From ANativeWindow::dequeueBuffer:
- *
- * The fenceFd argument specifies a libsync fence file descriptor for
- * a fence that must signal before the buffer can be accessed. If
- * the buffer can be accessed immediately then a value of -1 should
- * be used. The caller must not use the file descriptor after it
- * is passed to queueBuffer, and the ANativeWindow implementation
- * is responsible for closing it.
+ * Ownership of fd is transferred to consumer after queueBuffer and the
+ * consumer is responsible for closing it. Caller must not use the fd
+ * after passing it to queueBuffer.
*/
- int fence_fd = -1;
+ int fence_fd = dri2_surf->out_fence_fd;
+ dri2_surf->out_fence_fd = -1;
dri2_surf->window->queueBuffer(dri2_surf->window, dri2_surf->buffer,
fence_fd);
@@ -263,48 +262,17 @@
droid_window_cancel_buffer(struct dri2_egl_surface *dri2_surf)
{
int ret;
+ int fence_fd = dri2_surf->out_fence_fd;
- ret = dri2_surf->window->cancelBuffer(dri2_surf->window, dri2_surf->buffer, -1);
+ dri2_surf->out_fence_fd = -1;
+ ret = dri2_surf->window->cancelBuffer(dri2_surf->window,
+ dri2_surf->buffer, fence_fd);
if (ret < 0) {
_eglLog(_EGL_WARNING, "ANativeWindow::cancelBuffer failed");
dri2_surf->base.Lost = EGL_TRUE;
}
}
-static __DRIbuffer *
-droid_alloc_local_buffer(struct dri2_egl_surface *dri2_surf,
- unsigned int att, unsigned int format)
-{
- struct dri2_egl_display *dri2_dpy =
- dri2_egl_display(dri2_surf->base.Resource.Display);
-
- if (att >= ARRAY_SIZE(dri2_surf->local_buffers))
- return NULL;
-
- if (!dri2_surf->local_buffers[att]) {
- dri2_surf->local_buffers[att] =
- dri2_dpy->dri2->allocateBuffer(dri2_dpy->dri_screen, att, format,
- dri2_surf->base.Width, dri2_surf->base.Height);
- }
-
- return dri2_surf->local_buffers[att];
-}
-
-static void
-droid_free_local_buffers(struct dri2_egl_surface *dri2_surf)
-{
- struct dri2_egl_display *dri2_dpy =
- dri2_egl_display(dri2_surf->base.Resource.Display);
-
- for (int i = 0; i < ARRAY_SIZE(dri2_surf->local_buffers); i++) {
- if (dri2_surf->local_buffers[i]) {
- dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen,
- dri2_surf->local_buffers[i]);
- dri2_surf->local_buffers[i] = NULL;
- }
- }
-}
-
static _EGLSurface *
droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
_EGLConfig *conf, void *native_window,
@@ -323,13 +291,13 @@
return NULL;
}
- if (!_eglInitSurface(&dri2_surf->base, disp, type, conf, attrib_list))
+ if (!dri2_init_surface(&dri2_surf->base, disp, type, conf, attrib_list, true))
goto cleanup_surface;
if (type == EGL_WINDOW_BIT) {
int format;
- if (!window || window->common.magic != ANDROID_NATIVE_WINDOW_MAGIC) {
+ if (window->common.magic != ANDROID_NATIVE_WINDOW_MAGIC) {
_eglError(EGL_BAD_NATIVE_WINDOW, "droid_create_surface");
goto cleanup_surface;
}
@@ -400,7 +368,7 @@
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
- droid_free_local_buffers(dri2_surf);
+ dri2_egl_surface_free_local_buffers(dri2_surf);
if (dri2_surf->base.Type == EGL_WINDOW_BIT) {
if (dri2_surf->buffer)
@@ -423,6 +391,7 @@
dri2_dpy->core->destroyDrawable(dri2_surf->dri_drawable);
+ dri2_fini_surface(surf);
free(dri2_surf);
return EGL_TRUE;
@@ -447,7 +416,7 @@
/* free outdated buffers and update the surface size */
if (dri2_surf->base.Width != dri2_surf->buffer->width ||
dri2_surf->base.Height != dri2_surf->buffer->height) {
- droid_free_local_buffers(dri2_surf);
+ dri2_egl_surface_free_local_buffers(dri2_surf);
dri2_surf->base.Width = dri2_surf->buffer->width;
dri2_surf->base.Height = dri2_surf->buffer->height;
}
@@ -970,7 +939,7 @@
case __DRI_BUFFER_ACCUM:
case __DRI_BUFFER_DEPTH_STENCIL:
case __DRI_BUFFER_HIZ:
- local = droid_alloc_local_buffer(dri2_surf,
+ local = dri2_egl_surface_alloc_local_buffer(dri2_surf,
attachments[i], attachments[i + 1]);
if (local) {
@@ -1013,6 +982,18 @@
return dri2_surf->buffers;
}
+static unsigned
+droid_get_capability(void *loaderPrivate, enum dri_loader_cap cap)
+{
+ /* Note: loaderPrivate is _EGLDisplay* */
+ switch (cap) {
+ case DRI_LOADER_CAP_RGBA_ORDERING:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
static EGLBoolean
droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
{
@@ -1106,7 +1087,6 @@
.create_pbuffer_surface = droid_create_pbuffer_surface,
.destroy_surface = droid_destroy_surface,
.create_image = droid_create_image_khr,
- .swap_interval = dri2_fallback_swap_interval,
.swap_buffers = droid_swap_buffers,
.swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,
.swap_buffers_region = dri2_fallback_swap_buffers_region,
@@ -1125,18 +1105,20 @@
};
static const __DRIdri2LoaderExtension droid_dri2_loader_extension = {
- .base = { __DRI_DRI2_LOADER, 3 },
+ .base = { __DRI_DRI2_LOADER, 4 },
.getBuffers = NULL,
.flushFrontBuffer = droid_flush_front_buffer,
.getBuffersWithFormat = droid_get_buffers_with_format,
+ .getCapability = droid_get_capability,
};
static const __DRIimageLoaderExtension droid_image_loader_extension = {
- .base = { __DRI_IMAGE_LOADER, 1 },
+ .base = { __DRI_IMAGE_LOADER, 2 },
.getBuffers = droid_image_get_buffers,
.flushFrontBuffer = droid_flush_front_buffer,
+ .getCapability = droid_get_capability,
};
static const __DRIextension *droid_dri2_loader_extensions[] = {
@@ -1154,12 +1136,16 @@
};
EGLBoolean
-dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
+dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)
{
struct dri2_egl_display *dri2_dpy;
const char *err;
int ret;
+ /* Not supported yet */
+ if (disp->Options.UseFallback)
+ return EGL_FALSE;
+
loader_set_logger(_eglLog);
dri2_dpy = calloc(1, sizeof(*dri2_dpy));
@@ -1174,7 +1160,7 @@
goto cleanup;
}
- dpy->DriverData = (void *) dri2_dpy;
+ disp->DriverData = (void *) dri2_dpy;
dri2_dpy->fd = droid_open_device(dri2_dpy);
if (dri2_dpy->fd < 0) {
@@ -1194,41 +1180,41 @@
* the __DRI_DRI2_LOADER extension */
if (!dri2_dpy->is_render_node) {
dri2_dpy->loader_extensions = droid_dri2_loader_extensions;
- if (!dri2_load_driver(dpy)) {
+ if (!dri2_load_driver(disp)) {
err = "DRI2: failed to load driver";
goto cleanup;
}
} else {
dri2_dpy->loader_extensions = droid_image_loader_extensions;
- if (!dri2_load_driver_dri3(dpy)) {
+ if (!dri2_load_driver_dri3(disp)) {
err = "DRI3: failed to load driver";
goto cleanup;
}
}
- if (!dri2_create_screen(dpy)) {
+ if (!dri2_create_screen(disp)) {
err = "DRI2: failed to create screen";
goto cleanup;
}
- if (!dri2_setup_extensions(dpy)) {
+ if (!dri2_setup_extensions(disp)) {
err = "DRI2: failed to setup extensions";
goto cleanup;
}
- dri2_setup_screen(dpy);
+ dri2_setup_screen(disp);
- if (!droid_add_configs_for_visuals(drv, dpy)) {
+ if (!droid_add_configs_for_visuals(drv, disp)) {
err = "DRI2: failed to add configs";
goto cleanup;
}
- dpy->Extensions.ANDROID_framebuffer_target = EGL_TRUE;
- dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
- dpy->Extensions.ANDROID_recordable = EGL_TRUE;
- dpy->Extensions.EXT_buffer_age = EGL_TRUE;
+ disp->Extensions.ANDROID_framebuffer_target = EGL_TRUE;
+ disp->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
+ disp->Extensions.ANDROID_recordable = EGL_TRUE;
+ disp->Extensions.EXT_buffer_age = EGL_TRUE;
#if ANDROID_API_LEVEL >= 23
- dpy->Extensions.KHR_partial_update = EGL_TRUE;
+ disp->Extensions.KHR_partial_update = EGL_TRUE;
#endif
/* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1239,6 +1225,6 @@
return EGL_TRUE;
cleanup:
- dri2_display_destroy(dpy);
+ dri2_display_destroy(disp);
return _eglError(EGL_NOT_INITIALIZED, err);
}
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/platform_drm.c mesa-17.3.3/src/egl/drivers/dri2/platform_drm.c
--- mesa-17.2.4/src/egl/drivers/dri2/platform_drm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/platform_drm.c 2018-01-18 21:30:28.000000000 +0000
@@ -91,14 +91,14 @@
}
static _EGLSurface *
-dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
- _EGLConfig *conf, void *native_window,
- const EGLint *attrib_list)
+dri2_drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
+ _EGLConfig *conf, void *native_surface,
+ const EGLint *attrib_list)
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
struct dri2_egl_surface *dri2_surf;
- struct gbm_surface *window = native_window;
+ struct gbm_surface *surface = native_surface;
struct gbm_dri_surface *surf;
const __DRIconfig *config;
@@ -110,25 +110,14 @@
return NULL;
}
- if (!_eglInitSurface(&dri2_surf->base, disp, type, conf, attrib_list))
+ if (!dri2_init_surface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list, false))
goto cleanup_surf;
- switch (type) {
- case EGL_WINDOW_BIT:
- if (!window) {
- _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
- goto cleanup_surf;
- }
-
- surf = gbm_dri_surface(window);
- dri2_surf->gbm_surf = surf;
- dri2_surf->base.Width = surf->base.width;
- dri2_surf->base.Height = surf->base.height;
- surf->dri_private = dri2_surf;
- break;
- default:
- goto cleanup_surf;
- }
+ surf = gbm_dri_surface(surface);
+ dri2_surf->gbm_surf = surf;
+ dri2_surf->base.Width = surf->base.width;
+ dri2_surf->base.Height = surf->base.height;
+ surf->dri_private = dri2_surf;
config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
dri2_surf->base.GLColorspace);
@@ -160,15 +149,6 @@
}
static _EGLSurface *
-dri2_drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
- _EGLConfig *conf, void *native_window,
- const EGLint *attrib_list)
-{
- return dri2_drm_create_surface(drv, disp, EGL_WINDOW_BIT, conf,
- native_window, attrib_list);
-}
-
-static _EGLSurface *
dri2_drm_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *disp,
_EGLConfig *conf, void *native_window,
const EGLint *attrib_list)
@@ -196,12 +176,9 @@
gbm_bo_destroy(dri2_surf->color_buffers[i].bo);
}
- for (unsigned i = 0; i < __DRI_BUFFER_COUNT; i++) {
- if (dri2_surf->dri_buffers[i])
- dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen,
- dri2_surf->dri_buffers[i]);
- }
+ dri2_egl_surface_free_local_buffers(dri2_surf);
+ dri2_fini_surface(surf);
free(surf);
return EGL_TRUE;
@@ -291,39 +268,18 @@
buffer->flags = 0;
}
-static int
-get_aux_bo(struct dri2_egl_surface *dri2_surf,
- unsigned int attachment, unsigned int format, __DRIbuffer *buffer)
-{
- struct dri2_egl_display *dri2_dpy =
- dri2_egl_display(dri2_surf->base.Resource.Display);
- __DRIbuffer *b = dri2_surf->dri_buffers[attachment];
-
- if (b == NULL) {
- b = dri2_dpy->dri2->allocateBuffer(dri2_dpy->dri_screen,
- attachment, format,
- dri2_surf->base.Width,
- dri2_surf->base.Height);
- dri2_surf->dri_buffers[attachment] = b;
- }
- if (b == NULL)
- return -1;
-
- memcpy(buffer, b, sizeof *buffer);
-
- return 0;
-}
-
static __DRIbuffer *
dri2_drm_get_buffers_with_format(__DRIdrawable *driDrawable,
- int *width, int *height,
- unsigned int *attachments, int count,
- int *out_count, void *loaderPrivate)
+ int *width, int *height,
+ unsigned int *attachments, int count,
+ int *out_count, void *loaderPrivate)
{
struct dri2_egl_surface *dri2_surf = loaderPrivate;
int i, j;
for (i = 0, j = 0; i < 2 * count; i += 2, j++) {
+ __DRIbuffer *local;
+
assert(attachments[i] < __DRI_BUFFER_COUNT);
assert(j < ARRAY_SIZE(dri2_surf->buffers));
@@ -336,11 +292,14 @@
back_bo_to_dri_buffer(dri2_surf, &dri2_surf->buffers[j]);
break;
default:
- if (get_aux_bo(dri2_surf, attachments[i], attachments[i + 1],
- &dri2_surf->buffers[j]) < 0) {
- _eglError(EGL_BAD_ALLOC, "failed to allocate aux buffer");
+ local = dri2_egl_surface_alloc_local_buffer(dri2_surf, attachments[i],
+ attachments[i + 1]);
+
+ if (!local) {
+ _eglError(EGL_BAD_ALLOC, "failed to allocate local buffer");
return NULL;
}
+ dri2_surf->buffers[j] = *local;
break;
}
}
@@ -526,6 +485,9 @@
struct dri2_egl_surface *dri2_surf = loaderPrivate;
int internal_stride;
struct gbm_dri_bo *bo;
+ uint32_t bpp;
+ int x_bytes, width_bytes;
+ char *src, *dst;
if (op != __DRI_SWRAST_IMAGE_OP_DRAW &&
op != __DRI_SWRAST_IMAGE_OP_SWAP)
@@ -535,14 +497,26 @@
return;
bo = gbm_dri_bo(dri2_surf->current->bo);
+
+ bpp = gbm_bo_get_bpp(&bo->base);
+ if (bpp == 0)
+ return;
+
+ x_bytes = x * (bpp >> 3);
+ width_bytes = width * (bpp >> 3);
+
if (gbm_dri_bo_map_dumb(bo) == NULL)
return;
internal_stride = bo->base.stride;
+ dst = bo->map + x_bytes + (y * internal_stride);
+ src = data;
+
for (int i = 0; i < height; i++) {
- memcpy(bo->map + (x + i) * internal_stride + y,
- data + i * stride, stride);
+ memcpy(dst, src, width_bytes);
+ dst += internal_stride;
+ src += stride;
}
gbm_dri_bo_unmap_dumb(bo);
@@ -560,20 +534,35 @@
struct dri2_egl_surface *dri2_surf = loaderPrivate;
int internal_stride, stride;
struct gbm_dri_bo *bo;
+ uint32_t bpp;
+ int x_bytes, width_bytes;
+ char *src, *dst;
if (get_swrast_front_bo(dri2_surf) < 0)
return;
bo = gbm_dri_bo(dri2_surf->current->bo);
- if (gbm_dri_bo_map_dumb(bo) == NULL)
+
+ bpp = gbm_bo_get_bpp(&bo->base);
+ if (bpp == 0)
return;
+ x_bytes = x * (bpp >> 3);
+ width_bytes = width * (bpp >> 3);
+
internal_stride = bo->base.stride;
- stride = width * 4;
+ stride = width_bytes;
+
+ if (gbm_dri_bo_map_dumb(bo) == NULL)
+ return;
+
+ dst = data;
+ src = bo->map + x_bytes + (y * internal_stride);
for (int i = 0; i < height; i++) {
- memcpy(data + i * stride,
- bo->map + (x + i) * internal_stride + y, stride);
+ memcpy(dst, src, width_bytes);
+ dst += stride;
+ src += internal_stride;
}
gbm_dri_bo_unmap_dumb(bo);
@@ -644,7 +633,6 @@
.create_pbuffer_surface = dri2_fallback_create_pbuffer_surface,
.destroy_surface = dri2_drm_destroy_surface,
.create_image = dri2_drm_create_image_khr,
- .swap_interval = dri2_fallback_swap_interval,
.swap_buffers = dri2_drm_swap_buffers,
.swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,
.swap_buffers_region = dri2_fallback_swap_buffers_region,
@@ -664,6 +652,10 @@
struct gbm_device *gbm;
const char *err;
+ /* Not supported yet */
+ if (disp->Options.UseFallback)
+ return EGL_FALSE;
+
loader_set_logger(_eglLog);
dri2_dpy = calloc(1, sizeof *dri2_dpy);
@@ -679,8 +671,6 @@
int n = snprintf(buf, sizeof(buf), DRM_DEV_NAME, DRM_DIR_NAME, 0);
if (n != -1 && n < sizeof(buf))
dri2_dpy->fd = loader_open_device(buf);
- if (dri2_dpy->fd < 0)
- dri2_dpy->fd = loader_open_device("/dev/dri/card0");
gbm = gbm_create_device(dri2_dpy->fd);
if (gbm == NULL) {
err = "DRI2: failed to create gbm device";
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/platform_surfaceless.c mesa-17.3.3/src/egl/drivers/dri2/platform_surfaceless.c
--- mesa-17.2.4/src/egl/drivers/dri2/platform_surfaceless.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/platform_surfaceless.c 2018-01-18 21:30:28.000000000 +0000
@@ -124,7 +124,7 @@
return NULL;
}
- if (!_eglInitSurface(&dri2_surf->base, disp, type, conf, attrib_list))
+ if (!dri2_init_surface(&dri2_surf->base, disp, type, conf, attrib_list, false))
goto cleanup_surface;
config = dri2_get_dri_config(dri2_conf, type,
@@ -165,6 +165,7 @@
dri2_dpy->core->destroyDrawable(dri2_surf->dri_drawable);
+ dri2_fini_surface(surf);
free(dri2_surf);
return EGL_TRUE;
}
@@ -234,7 +235,6 @@
.create_pbuffer_surface = dri2_surfaceless_create_pbuffer_surface,
.destroy_surface = surfaceless_destroy_surface,
.create_image = dri2_create_image_khr,
- .swap_interval = dri2_fallback_swap_interval,
.swap_buffers = surfaceless_swap_buffers,
.swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,
.swap_buffers_region = dri2_fallback_swap_buffers_region,
@@ -267,12 +267,53 @@
NULL,
};
+static bool
+surfaceless_probe_device(_EGLDisplay *dpy, bool swrast)
+{
+ struct dri2_egl_display *dri2_dpy = dpy->DriverData;
+ const int limit = 64;
+ const int base = 128;
+ int fd;
+ int i;
+
+ for (i = 0; i < limit; ++i) {
+ char *card_path;
+ if (asprintf(&card_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, base + i) < 0)
+ continue;
+
+ fd = loader_open_device(card_path);
+ free(card_path);
+ if (fd < 0)
+ continue;
+
+ if (swrast)
+ dri2_dpy->driver_name = strdup("kms_swrast");
+ else
+ dri2_dpy->driver_name = loader_get_driver_for_fd(fd);
+ if (!dri2_dpy->driver_name) {
+ close(fd);
+ continue;
+ }
+
+ dri2_dpy->fd = fd;
+ if (dri2_load_driver_dri3(dpy))
+ return true;
+
+ close(fd);
+ dri2_dpy->fd = -1;
+ free(dri2_dpy->driver_name);
+ dri2_dpy->driver_name = NULL;
+ }
+
+ return false;
+}
+
EGLBoolean
dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp)
{
struct dri2_egl_display *dri2_dpy;
const char* err;
- int driver_loaded = 0;
+ bool driver_loaded = false;
loader_set_logger(_eglLog);
@@ -283,33 +324,14 @@
dri2_dpy->fd = -1;
disp->DriverData = (void *) dri2_dpy;
- const int limit = 64;
- const int base = 128;
- for (int i = 0; i < limit; ++i) {
- char *card_path;
- if (asprintf(&card_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, base + i) < 0)
- continue;
-
- dri2_dpy->fd = loader_open_device(card_path);
-
- free(card_path);
- if (dri2_dpy->fd < 0)
- continue;
-
- dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd);
- if (dri2_dpy->driver_name) {
- if (dri2_load_driver_dri3(disp)) {
- driver_loaded = 1;
- break;
- }
- free(dri2_dpy->driver_name);
- dri2_dpy->driver_name = NULL;
- }
- close(dri2_dpy->fd);
- dri2_dpy->fd = -1;
+ if (!disp->Options.UseFallback) {
+ driver_loaded = surfaceless_probe_device(disp, false);
+ if (!driver_loaded)
+ _eglLog(_EGL_WARNING,
+ "No hardware driver found, falling back to software rendering");
}
- if (!driver_loaded) {
+ if (!driver_loaded && !surfaceless_probe_device(disp, true)) {
err = "DRI2: failed to load driver";
goto cleanup;
}
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/platform_wayland.c mesa-17.3.3/src/egl/drivers/dri2/platform_wayland.c
--- mesa-17.2.4/src/egl/drivers/dri2/platform_wayland.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/platform_wayland.c 2018-01-18 21:30:28.000000000 +0000
@@ -63,10 +63,6 @@
HAS_RGB565 = 4,
};
-static EGLBoolean
-dri2_wl_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
- EGLint interval);
-
static int
roundtrip(struct dri2_egl_display *dri2_dpy)
{
@@ -146,7 +142,7 @@
return NULL;
}
- if (!_eglInitSurface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list))
+ if (!dri2_init_surface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list, false))
goto cleanup_surf;
if (dri2_dpy->wl_dmabuf || dri2_dpy->wl_drm) {
@@ -166,12 +162,6 @@
dri2_surf->format = WL_SHM_FORMAT_ARGB8888;
}
- if (!window) {
- _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
- goto cleanup_surf;
- }
-
- dri2_surf->wl_win = window;
dri2_surf->wl_queue = wl_display_create_queue(dri2_dpy->wl_dpy);
if (!dri2_surf->wl_queue) {
_eglError(EGL_BAD_ALLOC, "dri2_create_surface");
@@ -204,18 +194,15 @@
wl_proxy_set_queue((struct wl_proxy *)dri2_surf->wl_surface_wrapper,
dri2_surf->wl_queue);
+ dri2_surf->wl_win = window;
dri2_surf->wl_win->private = dri2_surf;
dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
-
- dri2_surf->base.Width = -1;
- dri2_surf->base.Height = -1;
+ if (dri2_dpy->flush)
+ dri2_surf->wl_win->resize_callback = resize_callback;
config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
dri2_surf->base.GLColorspace);
- if (dri2_dpy->flush)
- dri2_surf->wl_win->resize_callback = resize_callback;
-
if (dri2_dpy->image_driver)
createNewDrawable = dri2_dpy->image_driver->createNewDrawable;
else if (dri2_dpy->dri2)
@@ -230,8 +217,7 @@
goto cleanup_surf_wrapper;
}
- dri2_wl_swap_interval(drv, disp, &dri2_surf->base,
- dri2_dpy->default_swap_interval);
+ dri2_surf->base.SwapInterval = dri2_dpy->default_swap_interval;
return &dri2_surf->base;
@@ -291,13 +277,8 @@
dri2_surf->color_buffers[i].data_size);
}
- if (dri2_dpy->dri2) {
- for (int i = 0; i < __DRI_BUFFER_COUNT; i++)
- if (dri2_surf->dri_buffers[i] &&
- dri2_surf->dri_buffers[i]->attachment != __DRI_BUFFER_BACK_LEFT)
- dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen,
- dri2_surf->dri_buffers[i]);
- }
+ if (dri2_dpy->dri2)
+ dri2_egl_surface_free_local_buffers(dri2_surf);
if (dri2_surf->throttle_callback)
wl_callback_destroy(dri2_surf->throttle_callback);
@@ -314,6 +295,7 @@
wl_proxy_wrapper_destroy(dri2_surf->wl_drm_wrapper);
wl_event_queue_destroy(dri2_surf->wl_queue);
+ dri2_fini_surface(surf);
free(surf);
return EGL_TRUE;
@@ -344,13 +326,8 @@
dri2_surf->color_buffers[i].locked = false;
}
- if (dri2_dpy->dri2) {
- for (int i = 0; i < __DRI_BUFFER_COUNT; i++)
- if (dri2_surf->dri_buffers[i] &&
- dri2_surf->dri_buffers[i]->attachment != __DRI_BUFFER_BACK_LEFT)
- dri2_dpy->dri2->releaseBuffer(dri2_dpy->dri_screen,
- dri2_surf->dri_buffers[i]);
- }
+ if (dri2_dpy->dri2)
+ dri2_egl_surface_free_local_buffers(dri2_surf);
}
static int
@@ -510,29 +487,6 @@
}
static int
-get_aux_bo(struct dri2_egl_surface *dri2_surf,
- unsigned int attachment, unsigned int format, __DRIbuffer *buffer)
-{
- struct dri2_egl_display *dri2_dpy =
- dri2_egl_display(dri2_surf->base.Resource.Display);
- __DRIbuffer *b = dri2_surf->dri_buffers[attachment];
-
- if (b == NULL) {
- b = dri2_dpy->dri2->allocateBuffer(dri2_dpy->dri_screen,
- attachment, format,
- dri2_surf->base.Width,
- dri2_surf->base.Height);
- dri2_surf->dri_buffers[attachment] = b;
- }
- if (b == NULL)
- return -1;
-
- memcpy(buffer, b, sizeof *buffer);
-
- return 0;
-}
-
-static int
update_buffers(struct dri2_egl_surface *dri2_surf)
{
struct dri2_egl_display *dri2_dpy =
@@ -586,16 +540,21 @@
return NULL;
for (i = 0, j = 0; i < 2 * count; i += 2, j++) {
+ __DRIbuffer *local;
+
switch (attachments[i]) {
case __DRI_BUFFER_BACK_LEFT:
back_bo_to_dri_buffer(dri2_surf, &dri2_surf->buffers[j]);
break;
default:
- if (get_aux_bo(dri2_surf, attachments[i], attachments[i + 1],
- &dri2_surf->buffers[j]) < 0) {
- _eglError(EGL_BAD_ALLOC, "failed to allocate aux buffer");
+ local = dri2_egl_surface_alloc_local_buffer(dri2_surf, attachments[i],
+ attachments[i + 1]);
+
+ if (!local) {
+ _eglError(EGL_BAD_ALLOC, "failed to allocate local buffer");
return NULL;
}
+ dri2_surf->buffers[j] = *local;
break;
}
}
@@ -712,6 +671,35 @@
.done = wayland_throttle_callback
};
+static EGLBoolean
+get_fourcc(struct dri2_egl_display *dri2_dpy,
+ __DRIimage *image, int *fourcc)
+{
+ EGLBoolean query;
+ int dri_format;
+
+ query = dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FOURCC,
+ fourcc);
+ if (query)
+ return true;
+
+ query = dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FORMAT,
+ &dri_format);
+ if (!query)
+ return false;
+
+ switch (dri_format) {
+ case __DRI_IMAGE_FORMAT_ARGB8888:
+ *fourcc = __DRI_IMAGE_FOURCC_ARGB8888;
+ return true;
+ case __DRI_IMAGE_FORMAT_XRGB8888:
+ *fourcc = __DRI_IMAGE_FOURCC_XRGB8888;
+ return true;
+ default:
+ return false;
+ }
+}
+
static struct wl_buffer *
create_wl_buffer(struct dri2_egl_display *dri2_dpy,
struct dri2_egl_surface *dri2_surf,
@@ -725,8 +713,7 @@
query = dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_WIDTH, &width);
query &= dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_HEIGHT,
&height);
- query &= dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FOURCC,
- &fourcc);
+ query &= get_fourcc(dri2_dpy, image, &fourcc);
if (!query)
return NULL;
@@ -969,7 +956,7 @@
static EGLBoolean
dri2_wl_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
{
- return dri2_wl_swap_buffers_with_damage (drv, disp, draw, NULL, 0);
+ return dri2_wl_swap_buffers_with_damage(drv, disp, draw, NULL, 0);
}
static struct wl_buffer *
@@ -1000,7 +987,7 @@
buffer = create_wl_buffer(dri2_dpy, NULL, image);
/* The buffer object will have been created with our internal event queue
- * because it is using the wl_drm object as a proxy factory. We want the
+ * because it is using wl_dmabuf/wl_drm as a proxy factory. We want the
* buffer to be used by the application so we'll reset it to the display's
* default event queue. This isn't actually racy, as the only event the
* buffer can get is a buffer release, which doesn't happen with an explicit
@@ -1186,58 +1173,15 @@
.global_remove = registry_handle_global_remove
};
-static EGLBoolean
-dri2_wl_swap_interval(_EGLDriver *drv,
- _EGLDisplay *disp,
- _EGLSurface *surf,
- EGLint interval)
-{
- if (interval > surf->Config->MaxSwapInterval)
- interval = surf->Config->MaxSwapInterval;
- else if (interval < surf->Config->MinSwapInterval)
- interval = surf->Config->MinSwapInterval;
-
- surf->SwapInterval = interval;
-
- return EGL_TRUE;
-}
-
static void
-dri2_wl_setup_swap_interval(struct dri2_egl_display *dri2_dpy)
+dri2_wl_setup_swap_interval(_EGLDisplay *disp)
{
- GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
-
/* We can't use values greater than 1 on Wayland because we are using the
* frame callback to synchronise the frame and the only way we be sure to
* get a frame callback is to attach a new buffer. Therefore we can't just
* sit drawing nothing to wait until the next ‘n’ frame callbacks */
- if (dri2_dpy->config)
- dri2_dpy->config->configQueryi(dri2_dpy->dri_screen,
- "vblank_mode", &vblank_mode);
- switch (vblank_mode) {
- case DRI_CONF_VBLANK_NEVER:
- dri2_dpy->min_swap_interval = 0;
- dri2_dpy->max_swap_interval = 0;
- dri2_dpy->default_swap_interval = 0;
- break;
- case DRI_CONF_VBLANK_ALWAYS_SYNC:
- dri2_dpy->min_swap_interval = 1;
- dri2_dpy->max_swap_interval = 1;
- dri2_dpy->default_swap_interval = 1;
- break;
- case DRI_CONF_VBLANK_DEF_INTERVAL_0:
- dri2_dpy->min_swap_interval = 0;
- dri2_dpy->max_swap_interval = 1;
- dri2_dpy->default_swap_interval = 0;
- break;
- default:
- case DRI_CONF_VBLANK_DEF_INTERVAL_1:
- dri2_dpy->min_swap_interval = 0;
- dri2_dpy->max_swap_interval = 1;
- dri2_dpy->default_swap_interval = 1;
- break;
- }
+ dri2_setup_swap_interval(disp, 1);
}
static const struct dri2_egl_display_vtbl dri2_wl_display_vtbl = {
@@ -1247,7 +1191,6 @@
.create_pbuffer_surface = dri2_fallback_create_pbuffer_surface,
.destroy_surface = dri2_wl_destroy_surface,
.create_image = dri2_create_image_khr,
- .swap_interval = dri2_wl_swap_interval,
.swap_buffers = dri2_wl_swap_buffers,
.swap_buffers_with_damage = dri2_wl_swap_buffers_with_damage,
.swap_buffers_region = dri2_fallback_swap_buffers_region,
@@ -1418,7 +1361,7 @@
dri2_setup_screen(disp);
- dri2_wl_setup_swap_interval(dri2_dpy);
+ dri2_wl_setup_swap_interval(disp);
/* To use Prime, we must have _DRI_IMAGE v7 at least.
* createImageFromFds support indicates that Prime export/import
@@ -1565,7 +1508,7 @@
static int
os_create_anonymous_file(off_t size)
{
- static const char template[] = "/mesa-shared-XXXXXX";
+ static const char templ[] = "/mesa-shared-XXXXXX";
const char *path;
char *name;
int fd;
@@ -1577,12 +1520,12 @@
return -1;
}
- name = malloc(strlen(path) + sizeof(template));
+ name = malloc(strlen(path) + sizeof(templ));
if (!name)
return -1;
strcpy(name, path);
- strcat(name, template);
+ strcat(name, templ);
fd = create_tmpfile_cloexec(name);
@@ -1947,8 +1890,7 @@
.create_pixmap_surface = dri2_wl_create_pixmap_surface,
.create_pbuffer_surface = dri2_fallback_create_pbuffer_surface,
.destroy_surface = dri2_wl_destroy_surface,
- .create_image = dri2_fallback_create_image_khr,
- .swap_interval = dri2_wl_swap_interval,
+ .create_image = dri2_create_image_khr,
.swap_buffers = dri2_wl_swrast_swap_buffers,
.swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,
.swap_buffers_region = dri2_fallback_swap_buffers_region,
@@ -1971,6 +1913,7 @@
static const __DRIextension *swrast_loader_extensions[] = {
&swrast_loader_extension.base,
+ &image_lookup_extension.base,
NULL,
};
@@ -2032,7 +1975,7 @@
dri2_setup_screen(disp);
- dri2_wl_setup_swap_interval(dri2_dpy);
+ dri2_wl_setup_swap_interval(disp);
if (!dri2_wl_add_configs_for_visuals(drv, disp)) {
_eglError(EGL_NOT_INITIALIZED, "DRI2: failed to add configs");
@@ -2054,17 +1997,13 @@
EGLBoolean
dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp)
{
- EGLBoolean initialized = EGL_TRUE;
+ EGLBoolean initialized = EGL_FALSE;
- int hw_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL);
+ if (!disp->Options.UseFallback)
+ initialized = dri2_initialize_wayland_drm(drv, disp);
- if (hw_accel) {
- if (!dri2_initialize_wayland_drm(drv, disp)) {
- initialized = dri2_initialize_wayland_swrast(drv, disp);
- }
- } else {
+ if (!initialized)
initialized = dri2_initialize_wayland_swrast(drv, disp);
- }
return initialized;
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/platform_x11.c mesa-17.3.3/src/egl/drivers/dri2/platform_x11.c
--- mesa-17.2.4/src/egl/drivers/dri2/platform_x11.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/platform_x11.c 2018-01-18 21:30:28.000000000 +0000
@@ -40,6 +40,7 @@
#endif
#include
#include
+#include "util/debug.h"
#include "util/macros.h"
#include "egl_dri2.h"
@@ -99,8 +100,8 @@
xcb_free_gc(dri2_dpy->conn, dri2_surf->swapgc);
}
-static void
-swrastGetDrawableInfo(__DRIdrawable * draw,
+static bool
+x11_get_drawable_info(__DRIdrawable * draw,
int *x, int *y, int *w, int *h,
void *loaderPrivate)
{
@@ -110,14 +111,15 @@
xcb_get_geometry_cookie_t cookie;
xcb_get_geometry_reply_t *reply;
xcb_generic_error_t *error;
+ bool ret;
- *x = *y = *w = *h = 0;
cookie = xcb_get_geometry (dri2_dpy->conn, dri2_surf->drawable);
reply = xcb_get_geometry_reply (dri2_dpy->conn, cookie, &error);
if (reply == NULL)
- return;
+ return false;
if (error != NULL) {
+ ret = false;
_eglLog(_EGL_WARNING, "error in xcb_get_geometry");
free(error);
} else {
@@ -125,8 +127,19 @@
*y = reply->y;
*w = reply->width;
*h = reply->height;
+ ret = true;
}
free(reply);
+ return ret;
+}
+
+static void
+swrastGetDrawableInfo(__DRIdrawable * draw,
+ int *x, int *y, int *w, int *h,
+ void *loaderPrivate)
+{
+ *x = *y = *w = *h = 0;
+ x11_get_drawable_info(draw, x, y, w, h, loaderPrivate);
}
static void
@@ -210,12 +223,8 @@
xcb_get_geometry_cookie_t cookie;
xcb_get_geometry_reply_t *reply;
xcb_generic_error_t *error;
- xcb_drawable_t drawable;
const __DRIconfig *config;
- STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
- drawable = (uintptr_t) native_surface;
-
(void) drv;
dri2_surf = malloc(sizeof *dri2_surf);
@@ -224,7 +233,7 @@
return NULL;
}
- if (!_eglInitSurface(&dri2_surf->base, disp, type, conf, attrib_list))
+ if (!dri2_init_surface(&dri2_surf->base, disp, type, conf, attrib_list, false))
goto cleanup_surf;
dri2_surf->region = XCB_NONE;
@@ -234,14 +243,8 @@
dri2_surf->drawable, dri2_dpy->screen->root,
dri2_surf->base.Width, dri2_surf->base.Height);
} else {
- if (!drawable) {
- if (type == EGL_WINDOW_BIT)
- _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
- else
- _eglError(EGL_BAD_NATIVE_PIXMAP, "dri2_create_surface");
- goto cleanup_surf;
- }
- dri2_surf->drawable = drawable;
+ STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
+ dri2_surf->drawable = (uintptr_t) native_surface;
}
config = dri2_get_dri_config(dri2_conf, type,
@@ -369,7 +372,7 @@
_EGLConfig *conf, const EGLint *attrib_list)
{
return dri2_x11_create_surface(drv, disp, EGL_PBUFFER_BIT, conf,
- XCB_WINDOW_NONE, attrib_list);
+ NULL, attrib_list);
}
static EGLBoolean
@@ -392,6 +395,7 @@
if (surf->Type == EGL_PBUFFER_BIT)
xcb_free_pixmap (dri2_dpy->conn, dri2_surf->drawable);
+ dri2_fini_surface(surf);
free(surf);
return EGL_TRUE;
@@ -412,15 +416,14 @@
{
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
- int x, y, w = -1, h = -1;
+ int x, y, w, h;
__DRIdrawable *drawable = dri2_dpy->vtbl->get_dri_drawable(surf);
switch (attribute) {
case EGL_WIDTH:
case EGL_HEIGHT:
- swrastGetDrawableInfo(drawable, &x, &y, &w, &h, dri2_surf);
- if (w != -1 && h != -1) {
+ if (x11_get_drawable_info(drawable, &x, &y, &w, &h, dri2_surf)) {
surf->Width = w;
surf->Height = h;
}
@@ -957,16 +960,9 @@
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
- if (interval > surf->Config->MaxSwapInterval)
- interval = surf->Config->MaxSwapInterval;
- else if (interval < surf->Config->MinSwapInterval)
- interval = surf->Config->MinSwapInterval;
-
- if (interval != surf->SwapInterval && dri2_dpy->swap_available)
+ if (dri2_dpy->swap_available)
xcb_dri2_swap_interval(dri2_dpy->conn, dri2_surf->drawable, interval);
- surf->SwapInterval = interval;
-
return EGL_TRUE;
}
@@ -1134,8 +1130,7 @@
.create_pixmap_surface = dri2_x11_create_pixmap_surface,
.create_pbuffer_surface = dri2_x11_create_pbuffer_surface,
.destroy_surface = dri2_x11_destroy_surface,
- .create_image = dri2_fallback_create_image_khr,
- .swap_interval = dri2_fallback_swap_interval,
+ .create_image = dri2_create_image_khr,
.swap_buffers = dri2_x11_swap_buffers,
.set_damage_region = dri2_fallback_set_damage_region,
.swap_buffers_region = dri2_fallback_swap_buffers_region,
@@ -1179,6 +1174,7 @@
static const __DRIextension *swrast_loader_extensions[] = {
&swrast_loader_extension.base,
+ &image_lookup_extension.base,
NULL,
};
@@ -1268,9 +1264,9 @@
}
static void
-dri2_x11_setup_swap_interval(struct dri2_egl_display *dri2_dpy)
+dri2_x11_setup_swap_interval(_EGLDisplay *disp)
{
- GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
int arbitrary_max_interval = 1000;
/* default behavior for no SwapBuffers support: no vblank syncing
@@ -1284,34 +1280,9 @@
return;
/* If we do have swapbuffers, then we can support pretty much any swap
- * interval, but we allow driconf to override applications.
+ * interval.
*/
- if (dri2_dpy->config)
- dri2_dpy->config->configQueryi(dri2_dpy->dri_screen,
- "vblank_mode", &vblank_mode);
- switch (vblank_mode) {
- case DRI_CONF_VBLANK_NEVER:
- dri2_dpy->min_swap_interval = 0;
- dri2_dpy->max_swap_interval = 0;
- dri2_dpy->default_swap_interval = 0;
- break;
- case DRI_CONF_VBLANK_ALWAYS_SYNC:
- dri2_dpy->min_swap_interval = 1;
- dri2_dpy->max_swap_interval = arbitrary_max_interval;
- dri2_dpy->default_swap_interval = 1;
- break;
- case DRI_CONF_VBLANK_DEF_INTERVAL_0:
- dri2_dpy->min_swap_interval = 0;
- dri2_dpy->max_swap_interval = arbitrary_max_interval;
- dri2_dpy->default_swap_interval = 0;
- break;
- default:
- case DRI_CONF_VBLANK_DEF_INTERVAL_1:
- dri2_dpy->min_swap_interval = 0;
- dri2_dpy->max_swap_interval = arbitrary_max_interval;
- dri2_dpy->default_swap_interval = 1;
- break;
- }
+ dri2_setup_swap_interval(disp, arbitrary_max_interval);
}
#ifdef HAVE_DRI3
@@ -1356,7 +1327,7 @@
dri2_setup_screen(disp);
- dri2_x11_setup_swap_interval(dri2_dpy);
+ dri2_x11_setup_swap_interval(disp);
if (!dri2_dpy->is_different_gpu)
disp->Extensions.KHR_image_pixmap = EGL_TRUE;
@@ -1417,6 +1388,7 @@
static const __DRIextension *dri2_loader_extensions[] = {
&dri2_loader_extension.base,
&image_lookup_extension.base,
+ &use_invalidate.base,
&background_callable_extension.base,
NULL,
};
@@ -1456,7 +1428,7 @@
dri2_setup_screen(disp);
- dri2_x11_setup_swap_interval(dri2_dpy);
+ dri2_x11_setup_swap_interval(disp);
disp->Extensions.KHR_image_pixmap = EGL_TRUE;
disp->Extensions.NOK_swap_region = EGL_TRUE;
@@ -1488,9 +1460,9 @@
{
EGLBoolean initialized = EGL_FALSE;
- if (!getenv("LIBGL_ALWAYS_SOFTWARE")) {
+ if (!disp->Options.UseFallback) {
#ifdef HAVE_DRI3
- if (!getenv("LIBGL_DRI3_DISABLE"))
+ if (!env_var_as_boolean("LIBGL_DRI3_DISABLE", false))
initialized = dri2_initialize_x11_dri3(drv, disp);
#endif
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/platform_x11_dri3.c mesa-17.3.3/src/egl/drivers/dri2/platform_x11_dri3.c
--- mesa-17.2.4/src/egl/drivers/dri2/platform_x11_dri3.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/platform_x11_dri3.c 2018-01-18 21:30:28.000000000 +0000
@@ -45,43 +45,14 @@
return (struct dri3_egl_surface *)(((void*) draw) - offset);
}
-static int
-egl_dri3_get_swap_interval(struct loader_dri3_drawable *draw)
-{
- struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
-
- return dri3_surf->base.SwapInterval;
-}
-
-static int
-egl_dri3_clamp_swap_interval(struct loader_dri3_drawable *draw, int interval)
-{
- struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
-
- if (interval > dri3_surf->base.Config->MaxSwapInterval)
- interval = dri3_surf->base.Config->MaxSwapInterval;
- else if (interval < dri3_surf->base.Config->MinSwapInterval)
- interval = dri3_surf->base.Config->MinSwapInterval;
-
- return interval;
-}
-
-static void
-egl_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
-{
- struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
-
- dri3_surf->base.SwapInterval = interval;
-}
-
static void
egl_dri3_set_drawable_size(struct loader_dri3_drawable *draw,
int width, int height)
{
struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
- dri3_surf->base.Width = width;
- dri3_surf->base.Height = height;
+ dri3_surf->surf.base.Width = width;
+ dri3_surf->surf.base.Height = height;
}
static bool
@@ -90,7 +61,7 @@
struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
_EGLContext *ctx = _eglGetCurrentContext();
- return ctx->Resource.Display == dri3_surf->base.Resource.Display;
+ return ctx->Resource.Display == dri3_surf->surf.base.Resource.Display;
}
static __DRIcontext *
@@ -104,34 +75,19 @@
return dri2_ctx->dri_context;
}
-static __DRIscreen *
-egl_dri3_get_dri_screen(struct loader_dri3_drawable *draw)
-{
- _EGLContext *ctx = _eglGetCurrentContext();
- struct dri2_egl_context *dri2_ctx;
- if (!ctx)
- return NULL;
- dri2_ctx = dri2_egl_context(ctx);
- return dri2_egl_display(dri2_ctx->base.Resource.Display)->dri_screen;
-}
-
static void
egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
{
struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
- _EGLDisplay *disp = dri3_surf->base.Resource.Display;
+ _EGLDisplay *disp = dri3_surf->surf.base.Resource.Display;
- dri2_flush_drawable_for_swapbuffers(disp, &dri3_surf->base);
+ dri2_flush_drawable_for_swapbuffers(disp, &dri3_surf->surf.base);
}
static const struct loader_dri3_vtable egl_dri3_vtable = {
- .get_swap_interval = egl_dri3_get_swap_interval,
- .clamp_swap_interval = egl_dri3_clamp_swap_interval,
- .set_swap_interval = egl_dri3_set_swap_interval,
.set_drawable_size = egl_dri3_set_drawable_size,
.in_current_context = egl_dri3_in_current_context,
.get_dri_context = egl_dri3_get_dri_context,
- .get_dri_screen = egl_dri3_get_dri_screen,
.flush_drawable = egl_dri3_flush_drawable,
.show_fps = NULL,
};
@@ -145,6 +101,7 @@
loader_dri3_drawable_fini(&dri3_surf->loader_drawable);
+ dri2_fini_surface(surf);
free(surf);
return EGL_TRUE;
@@ -156,6 +113,7 @@
{
struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+ dri3_surf->surf.base.SwapInterval = interval;
loader_dri3_set_swap_interval(&dri3_surf->loader_drawable, interval);
return EGL_TRUE;
@@ -172,9 +130,6 @@
const __DRIconfig *dri_config;
xcb_drawable_t drawable;
- STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
- drawable = (uintptr_t) native_surface;
-
(void) drv;
dri3_surf = calloc(1, sizeof *dri3_surf);
@@ -183,18 +138,21 @@
return NULL;
}
- if (!_eglInitSurface(&dri3_surf->base, disp, type, conf, attrib_list))
+ if (!dri2_init_surface(&dri3_surf->surf.base, disp, type, conf, attrib_list, false))
goto cleanup_surf;
if (type == EGL_PBUFFER_BIT) {
drawable = xcb_generate_id(dri2_dpy->conn);
xcb_create_pixmap(dri2_dpy->conn, conf->BufferSize,
drawable, dri2_dpy->screen->root,
- dri3_surf->base.Width, dri3_surf->base.Height);
+ dri3_surf->surf.base.Width, dri3_surf->surf.base.Height);
+ } else {
+ STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
+ drawable = (uintptr_t) native_surface;
}
dri_config = dri2_get_dri_config(dri2_conf, type,
- dri3_surf->base.GLColorspace);
+ dri3_surf->surf.base.GLColorspace);
if (loader_dri3_drawable_init(dri2_dpy->conn, drawable,
dri2_dpy->dri_screen,
@@ -206,7 +164,7 @@
goto cleanup_pixmap;
}
- return &dri3_surf->base;
+ return &dri3_surf->surf.base;
cleanup_pixmap:
if (type == EGL_PBUFFER_BIT)
@@ -269,7 +227,7 @@
_EGLConfig *conf, const EGLint *attrib_list)
{
return dri3_create_surface(drv, disp, EGL_PBUFFER_BIT, conf,
- XCB_WINDOW_NONE, attrib_list);
+ NULL, attrib_list);
}
static EGLBoolean
@@ -346,8 +304,6 @@
_EGLContext *ctx, EGLenum target,
EGLClientBuffer buffer, const EGLint *attr_list)
{
- (void) drv;
-
switch (target) {
case EGL_NATIVE_PIXMAP_KHR:
return dri3_create_image_khr_pixmap(disp, ctx, buffer, attr_list);
@@ -444,6 +400,14 @@
return dri3_surf->loader_drawable.dri_drawable;
}
+static void
+dri3_close_screen_notify(_EGLDisplay *dpy)
+{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+
+ loader_dri3_close_screen(dri2_dpy->dri_screen);
+}
+
struct dri2_egl_display_vtbl dri3_x11_display_vtbl = {
.authenticate = dri3_authenticate,
.create_window_surface = dri3_create_window_surface,
@@ -463,6 +427,7 @@
.create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
.get_sync_values = dri3_get_sync_values,
.get_dri_drawable = dri3_get_dri_drawable,
+ .close_screen_notify = dri3_close_screen_notify,
};
EGLBoolean
diff -Nru mesa-17.2.4/src/egl/drivers/dri2/platform_x11_dri3.h mesa-17.3.3/src/egl/drivers/dri2/platform_x11_dri3.h
--- mesa-17.2.4/src/egl/drivers/dri2/platform_x11_dri3.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/dri2/platform_x11_dri3.h 2018-01-18 21:30:28.000000000 +0000
@@ -28,7 +28,7 @@
_EGL_DRIVER_TYPECAST(dri3_egl_surface, _EGLSurface, obj)
struct dri3_egl_surface {
- _EGLSurface base;
+ struct dri2_egl_surface surf;
struct loader_dri3_drawable loader_drawable;
};
diff -Nru mesa-17.2.4/src/egl/drivers/haiku/egl_haiku.cpp mesa-17.3.3/src/egl/drivers/haiku/egl_haiku.cpp
--- mesa-17.2.4/src/egl/drivers/haiku/egl_haiku.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/drivers/haiku/egl_haiku.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -54,11 +54,6 @@
_EGL_DRIVER_STANDARD_TYPECASTS(haiku_egl)
-struct haiku_egl_driver
-{
- _EGLDriver base;
-};
-
struct haiku_egl_config
{
_EGLConfig base;
@@ -308,48 +303,38 @@
}
-extern "C"
-void
-haiku_unload(_EGLDriver* drv)
-{
-
-}
-
-
/**
* This is the main entrypoint into the driver, called by libEGL.
* Create a new _EGLDriver object and init its dispatch table.
*/
extern "C"
_EGLDriver*
-_eglBuiltInDriverHaiku(const char *args)
+_eglBuiltInDriver(void)
{
CALLED();
- struct haiku_egl_driver* driver;
- driver = (struct haiku_egl_driver*) calloc(1, sizeof(*driver));
+ _EGLDriver* driver = calloc(1, sizeof(*driver));
if (!driver) {
_eglError(EGL_BAD_ALLOC, "_eglBuiltInDriverHaiku");
return NULL;
}
- _eglInitDriverFallbacks(&driver->base);
- driver->base.API.Initialize = init_haiku;
- driver->base.API.Terminate = haiku_terminate;
- driver->base.API.CreateContext = haiku_create_context;
- driver->base.API.DestroyContext = haiku_destroy_context;
- driver->base.API.MakeCurrent = haiku_make_current;
- driver->base.API.CreateWindowSurface = haiku_create_window_surface;
- driver->base.API.CreatePixmapSurface = haiku_create_pixmap_surface;
- driver->base.API.CreatePbufferSurface = haiku_create_pbuffer_surface;
- driver->base.API.DestroySurface = haiku_destroy_surface;
+ _eglInitDriverFallbacks(driver);
+ driver->API.Initialize = init_haiku;
+ driver->API.Terminate = haiku_terminate;
+ driver->API.CreateContext = haiku_create_context;
+ driver->API.DestroyContext = haiku_destroy_context;
+ driver->API.MakeCurrent = haiku_make_current;
+ driver->API.CreateWindowSurface = haiku_create_window_surface;
+ driver->API.CreatePixmapSurface = haiku_create_pixmap_surface;
+ driver->API.CreatePbufferSurface = haiku_create_pbuffer_surface;
+ driver->API.DestroySurface = haiku_destroy_surface;
- driver->base.API.SwapBuffers = haiku_swap_buffers;
+ driver->API.SwapBuffers = haiku_swap_buffers;
- driver->base.Name = "Haiku";
- driver->base.Unload = haiku_unload;
+ driver->Name = "Haiku";
TRACE("API Calls defined\n");
- return &driver->base;
+ return driver;
}
diff -Nru mesa-17.2.4/src/egl/main/eglapi.c mesa-17.3.3/src/egl/main/eglapi.c
--- mesa-17.2.4/src/egl/main/eglapi.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglapi.c 2018-01-18 21:30:28.000000000 +0000
@@ -88,7 +88,6 @@
#include
#include "c99_compat.h"
#include "c11/threads.h"
-#include "GL/mesa_glinterop.h"
#include "util/macros.h"
#include "eglglobals.h"
@@ -102,6 +101,7 @@
#include "eglimage.h"
#include "eglsync.h"
+#include "GL/mesa_glinterop.h"
/**
* Macros to help return an API entrypoint.
@@ -274,8 +274,7 @@
return EGL_TRUE;
}
- _eglDebugReportFull(EGL_BAD_ALLOC, funcName, funcName,
- EGL_DEBUG_MSG_CRITICAL_KHR, NULL, NULL);
+ _eglDebugReport(EGL_BAD_ALLOC, funcName, EGL_DEBUG_MSG_CRITICAL_KHR, NULL);
return EGL_FALSE;
}
@@ -377,7 +376,7 @@
static EGLDisplay
_eglGetPlatformDisplayCommon(EGLenum platform, void *native_display,
- const EGLint *attrib_list)
+ const EGLint *attrib_list)
{
_EGLDisplay *dpy;
@@ -491,6 +490,8 @@
_EGL_CHECK_EXTENSION(EXT_image_dma_buf_import_modifiers);
_EGL_CHECK_EXTENSION(EXT_swap_buffers_with_damage);
+ _EGL_CHECK_EXTENSION(IMG_context_priority);
+
_EGL_CHECK_EXTENSION(KHR_cl_event2);
_EGL_CHECK_EXTENSION(KHR_config_attribs);
_EGL_CHECK_EXTENSION(KHR_create_context);
@@ -584,7 +585,7 @@
RETURN_EGL_ERROR(NULL, EGL_BAD_DISPLAY, EGL_FALSE);
if (!disp->Initialized) {
- if (!_eglMatchDriver(disp, EGL_FALSE))
+ if (!_eglMatchDriver(disp))
RETURN_EGL_ERROR(disp, EGL_NOT_INITIALIZED, EGL_FALSE);
/* limit to APIs supported by core */
@@ -616,8 +617,8 @@
_eglCreateExtensionsString(disp);
_eglCreateAPIsString(disp);
snprintf(disp->VersionString, sizeof(disp->VersionString),
- "%d.%d (%s)", disp->Version / 10, disp->Version % 10,
- disp->Driver->Name);
+ "%d.%d (%s)", disp->Version / 10, disp->Version % 10,
+ disp->Driver->Name);
}
/* Update applications version of major and minor if not NULL */
@@ -715,7 +716,7 @@
_EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv);
ret = drv->API.ChooseConfig(drv, disp, attrib_list, configs,
- config_size, num_config);
+ config_size, num_config);
RETURN_EGL_EVAL(disp, ret);
}
@@ -978,13 +979,13 @@
_fixupNativePixmap(_EGLDisplay *disp, void *native_pixmap)
{
#ifdef HAVE_X11_PLATFORM
- /* The `native_pixmap` parameter for the X11 platform differs between
- * eglCreatePixmapSurface() and eglCreatePlatformPixmapSurfaceEXT(). In
- * eglCreatePixmapSurface(), the type of `native_pixmap` is an Xlib
- * `Pixmap`. In eglCreatePlatformPixmapSurfaceEXT(), the type is
- * `Pixmap*`. Convert `Pixmap*` to `Pixmap` because that's what
- * dri2_x11_create_pixmap_surface() expects.
- */
+ /* The `native_pixmap` parameter for the X11 platform differs between
+ * eglCreatePixmapSurface() and eglCreatePlatformPixmapSurfaceEXT(). In
+ * eglCreatePixmapSurface(), the type of `native_pixmap` is an Xlib
+ * `Pixmap`. In eglCreatePlatformPixmapSurfaceEXT(), the type is
+ * `Pixmap*`. Convert `Pixmap*` to `Pixmap` because that's what
+ * dri2_x11_create_pixmap_surface() expects.
+ */
if (disp && disp->Platform == _EGL_PLATFORM_X11 && native_pixmap != NULL)
return (void *)(* (Pixmap*) native_pixmap);
#endif
@@ -1021,6 +1022,9 @@
if ((conf->SurfaceType & EGL_PIXMAP_BIT) == 0)
RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
+ if (native_pixmap == NULL)
+ RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_NO_SURFACE);
+
surf = drv->API.CreatePixmapSurface(drv, disp, conf, native_pixmap,
attrib_list);
ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;
@@ -1038,13 +1042,13 @@
_EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_NO_SURFACE);
STATIC_ASSERT(sizeof(void*) == sizeof(pixmap));
return _eglCreatePixmapSurfaceCommon(disp, config, (void*) pixmap,
- attrib_list);
+ attrib_list);
}
static EGLSurface EGLAPIENTRY
eglCreatePlatformPixmapSurfaceEXT(EGLDisplay dpy, EGLConfig config,
- void *native_pixmap,
- const EGLint *attrib_list)
+ void *native_pixmap,
+ const EGLint *attrib_list)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
@@ -1201,7 +1205,17 @@
if (_eglGetSurfaceHandle(surf) == EGL_NO_SURFACE)
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
- ret = drv->API.SwapInterval(drv, disp, surf, interval);
+ interval = CLAMP(interval,
+ surf->Config->MinSwapInterval,
+ surf->Config->MaxSwapInterval);
+
+ if (surf->SwapInterval != interval)
+ ret = drv->API.SwapInterval(drv, disp, surf, interval);
+ else
+ ret = EGL_TRUE;
+
+ if (ret)
+ surf->SwapInterval = interval;
RETURN_EGL_EVAL(disp, ret);
}
@@ -1631,7 +1645,7 @@
static EGLImage
_eglCreateImageCommon(_EGLDisplay *disp, EGLContext ctx, EGLenum target,
- EGLClientBuffer buffer, const EGLint *attr_list)
+ EGLClientBuffer buffer, const EGLint *attr_list)
{
_EGLContext *context = _eglLookupContext(ctx, disp);
_EGLDriver *drv;
@@ -1649,8 +1663,8 @@
if (ctx != EGL_NO_CONTEXT && target == EGL_LINUX_DMA_BUF_EXT)
RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_NO_IMAGE_KHR);
- img = drv->API.CreateImageKHR(drv,
- disp, context, target, buffer, attr_list);
+ img = drv->API.CreateImageKHR(drv, disp, context, target,
+ buffer, attr_list);
ret = (img) ? _eglLinkImage(img) : EGL_NO_IMAGE_KHR;
RETURN_EGL_EVAL(disp, ret);
@@ -1758,7 +1772,7 @@
/* return an error if the client API doesn't support GL_OES_EGL_sync */
if (ctx && (ctx->Resource.Display != disp ||
- ctx->ClientAPI != EGL_OPENGL_ES_API))
+ ctx->ClientAPI != EGL_OPENGL_ES_API))
RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SYNC_KHR);
switch (type) {
@@ -2073,7 +2087,7 @@
static EGLBoolean EGLAPIENTRY
eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface,
- EGLint numRects, const EGLint *rects)
+ EGLint numRects, const EGLint *rects)
{
_EGLContext *ctx = _eglGetCurrentContext();
_EGLDisplay *disp = _eglLockDisplay(dpy);
@@ -2121,7 +2135,7 @@
static EGLBoolean EGLAPIENTRY
eglExportDRMImageMESA(EGLDisplay dpy, EGLImage image,
- EGLint *name, EGLint *handle, EGLint *stride)
+ EGLint *name, EGLint *handle, EGLint *stride)
{
_EGLDisplay *disp = _eglLockDisplay(dpy);
_EGLImage *img = _eglLookupImage(image, disp);
@@ -2323,7 +2337,7 @@
static EGLint EGLAPIENTRY
eglLabelObjectKHR(EGLDisplay dpy, EGLenum objectType, EGLObjectKHR object,
- EGLLabelKHR label)
+ EGLLabelKHR label)
{
_EGLDisplay *disp = NULL;
_EGLResourceType type;
@@ -2381,16 +2395,9 @@
RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_BAD_PARAMETER);
}
-static EGLBoolean
-_validDebugMessageLevel(EGLAttrib level)
-{
- return (level >= EGL_DEBUG_MSG_CRITICAL_KHR &&
- level <= EGL_DEBUG_MSG_INFO_KHR);
-}
-
static EGLint EGLAPIENTRY
eglDebugMessageControlKHR(EGLDEBUGPROCKHR callback,
- const EGLAttrib *attrib_list)
+ const EGLAttrib *attrib_list)
{
unsigned int newEnabled;
@@ -2403,20 +2410,24 @@
int i;
for (i = 0; attrib_list[i] != EGL_NONE; i += 2) {
- if (_validDebugMessageLevel(attrib_list[i])) {
+ switch (attrib_list[i]) {
+ case EGL_DEBUG_MSG_CRITICAL_KHR:
+ case EGL_DEBUG_MSG_ERROR_KHR:
+ case EGL_DEBUG_MSG_WARN_KHR:
+ case EGL_DEBUG_MSG_INFO_KHR:
if (attrib_list[i + 1])
newEnabled |= DebugBitFromType(attrib_list[i]);
else
newEnabled &= ~DebugBitFromType(attrib_list[i]);
- continue;
+ break;
+ default:
+ // On error, set the last error code, call the current
+ // debug callback, and return the error code.
+ mtx_unlock(_eglGlobal.Mutex);
+ _eglReportError(EGL_BAD_ATTRIBUTE, NULL,
+ "Invalid attribute 0x%04lx", (unsigned long) attrib_list[i]);
+ return EGL_BAD_ATTRIBUTE;
}
-
- // On error, set the last error code, call the current
- // debug callback, and return the error code.
- mtx_unlock(_eglGlobal.Mutex);
- _eglReportError(EGL_BAD_ATTRIBUTE, NULL,
- "Invalid attribute 0x%04lx", (unsigned long) attrib_list[i]);
- return EGL_BAD_ATTRIBUTE;
}
}
@@ -2439,25 +2450,25 @@
mtx_lock(_eglGlobal.Mutex);
- do {
- if (_validDebugMessageLevel(attribute)) {
- if (_eglGlobal.debugTypesEnabled & DebugBitFromType(attribute))
- *value = EGL_TRUE;
- else
- *value = EGL_FALSE;
- break;
- }
-
- if (attribute == EGL_DEBUG_CALLBACK_KHR) {
- *value = (EGLAttrib) _eglGlobal.debugCallback;
- break;
- }
-
+ switch (attribute) {
+ case EGL_DEBUG_MSG_CRITICAL_KHR:
+ case EGL_DEBUG_MSG_ERROR_KHR:
+ case EGL_DEBUG_MSG_WARN_KHR:
+ case EGL_DEBUG_MSG_INFO_KHR:
+ if (_eglGlobal.debugTypesEnabled & DebugBitFromType(attribute))
+ *value = EGL_TRUE;
+ else
+ *value = EGL_FALSE;
+ break;
+ case EGL_DEBUG_CALLBACK_KHR:
+ *value = (EGLAttrib) _eglGlobal.debugCallback;
+ break;
+ default:
mtx_unlock(_eglGlobal.Mutex);
_eglReportError(EGL_BAD_ATTRIBUTE, NULL,
- "Invalid attribute 0x%04lx", (unsigned long) attribute);
+ "Invalid attribute 0x%04lx", (unsigned long) attribute);
return EGL_FALSE;
- } while (0);
+ }
mtx_unlock(_eglGlobal.Mutex);
return EGL_TRUE;
diff -Nru mesa-17.2.4/src/egl/main/eglconfig.c mesa-17.3.3/src/egl/main/eglconfig.c
--- mesa-17.2.4/src/egl/main/eglconfig.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglconfig.c 2018-01-18 21:30:28.000000000 +0000
@@ -118,15 +118,16 @@
}
-enum {
- /* types */
+enum type {
ATTRIB_TYPE_INTEGER,
ATTRIB_TYPE_BOOLEAN,
ATTRIB_TYPE_BITMASK,
ATTRIB_TYPE_ENUM,
ATTRIB_TYPE_PSEUDO, /* non-queryable */
ATTRIB_TYPE_PLATFORM, /* platform-dependent */
- /* criteria */
+};
+
+enum criterion {
ATTRIB_CRITERION_EXACT,
ATTRIB_CRITERION_ATLEAST,
ATTRIB_CRITERION_MASK,
@@ -138,8 +139,8 @@
/* EGL spec Table 3.1 and 3.4 */
static const struct {
EGLint attr;
- EGLint type;
- EGLint criterion;
+ enum type type;
+ enum criterion criterion;
EGLint default_value;
} _eglValidationTable[] =
{
@@ -355,9 +356,6 @@
if (val != 0)
valid = EGL_FALSE;
break;
- default:
- assert(0);
- break;
}
if (!valid && for_matching) {
@@ -465,8 +463,8 @@
case ATTRIB_CRITERION_SPECIAL:
/* ignored here */
break;
- default:
- assert(0);
+ case ATTRIB_CRITERION_IGNORE:
+ unreachable("already handled above");
break;
}
diff -Nru mesa-17.2.4/src/egl/main/eglcontext.c mesa-17.3.3/src/egl/main/eglcontext.c
--- mesa-17.2.4/src/egl/main/eglcontext.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglcontext.c 2018-01-18 21:30:28.000000000 +0000
@@ -332,6 +332,60 @@
ctx->NoError = !!val;
break;
+ case EGL_CONTEXT_PRIORITY_LEVEL_IMG:
+ /* The EGL_IMG_context_priority spec says:
+ *
+ * "EGL_CONTEXT_PRIORITY_LEVEL_IMG determines the priority level of
+ * the context to be created. This attribute is a hint, as an
+ * implementation may not support multiple contexts at some
+ * priority levels and system policy may limit access to high
+ * priority contexts to appropriate system privilege level. The
+ * default value for EGL_CONTEXT_PRIORITY_LEVEL_IMG is
+ * EGL_CONTEXT_PRIORITY_MEDIUM_IMG."
+ */
+ {
+ int bit;
+
+ switch (val) {
+ case EGL_CONTEXT_PRIORITY_HIGH_IMG:
+ bit = __EGL_CONTEXT_PRIORITY_HIGH_BIT;
+ break;
+ case EGL_CONTEXT_PRIORITY_MEDIUM_IMG:
+ bit = __EGL_CONTEXT_PRIORITY_MEDIUM_BIT;
+ break;
+ case EGL_CONTEXT_PRIORITY_LOW_IMG:
+ bit = __EGL_CONTEXT_PRIORITY_LOW_BIT;
+ break;
+ default:
+ bit = -1;
+ break;
+ }
+
+ if (bit < 0) {
+ err = EGL_BAD_ATTRIBUTE;
+ break;
+ }
+
+ /* "This extension allows an EGLContext to be created with a
+ * priority hint. It is possible that an implementation will not
+ * honour the hint, especially if there are constraints on the
+ * number of high priority contexts available in the system, or
+ * system policy limits access to high priority contexts to
+ * appropriate system privilege level. A query is provided to find
+ * the real priority level assigned to the context after creation."
+ *
+ * We currently assume that the driver applies the priority hint
+ * and filters out any it cannot handle during the screen setup,
+ * e.g. dri2_setup_screen(). As such we can mask any change that
+ * the driver would fail, and ctx->ContextPriority matches the
+ * hint applied to the driver/hardware backend.
+ */
+ if (dpy->Extensions.IMG_context_priority & (1 << bit))
+ ctx->ContextPriority = val;
+
+ break;
+ }
+
default:
err = EGL_BAD_ATTRIBUTE;
break;
@@ -533,6 +587,7 @@
ctx->Flags = 0;
ctx->Profile = EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR;
ctx->ResetNotificationStrategy = EGL_NO_RESET_NOTIFICATION_KHR;
+ ctx->ContextPriority = EGL_CONTEXT_PRIORITY_MEDIUM_IMG;
err = _eglParseContextAttribList(ctx, dpy, attrib_list);
if (err == EGL_SUCCESS && ctx->Config) {
@@ -598,6 +653,9 @@
case EGL_RENDER_BUFFER:
*value = _eglQueryContextRenderBuffer(c);
break;
+ case EGL_CONTEXT_PRIORITY_LEVEL_IMG:
+ *value = c->ContextPriority;
+ break;
default:
return _eglError(EGL_BAD_ATTRIBUTE, "eglQueryContext");
}
diff -Nru mesa-17.2.4/src/egl/main/eglcontext.h mesa-17.3.3/src/egl/main/eglcontext.h
--- mesa-17.2.4/src/egl/main/eglcontext.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglcontext.h 2018-01-18 21:30:28.000000000 +0000
@@ -62,6 +62,7 @@
EGLint Flags;
EGLint Profile;
EGLint ResetNotificationStrategy;
+ EGLint ContextPriority;
EGLBoolean NoError;
/* The real render buffer when a window surface is bound */
diff -Nru mesa-17.2.4/src/egl/main/eglcurrent.c mesa-17.3.3/src/egl/main/eglcurrent.c
--- mesa-17.2.4/src/egl/main/eglcurrent.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglcurrent.c 2018-01-18 21:30:28.000000000 +0000
@@ -281,80 +281,49 @@
{
if (errCode != EGL_SUCCESS) {
EGLint type;
- if (errCode == EGL_BAD_ALLOC) {
+ if (errCode == EGL_BAD_ALLOC)
type = EGL_DEBUG_MSG_CRITICAL_KHR;
- } else {
+ else
type = EGL_DEBUG_MSG_ERROR_KHR;
- }
- _eglDebugReport(errCode, msg, type, NULL);
+ _eglDebugReport(errCode, NULL, type, msg);
} else
_eglInternalError(errCode, msg);
return EGL_FALSE;
}
-/**
- * Returns the label set for the current thread.
- */
-EGLLabelKHR
-_eglGetThreadLabel(void)
-{
- _EGLThreadInfo *t = _eglGetCurrentThread();
- return t->Label;
-}
-
-static void
-_eglDebugReportFullv(EGLenum error, const char *command, const char *funcName,
- EGLint type, EGLLabelKHR objectLabel, const char *message, va_list args)
+void
+_eglDebugReport(EGLenum error, const char *funcName,
+ EGLint type, const char *message, ...)
{
+ _EGLThreadInfo *thr = _eglGetCurrentThread();
EGLDEBUGPROCKHR callback = NULL;
+ va_list args;
+
+ if (funcName == NULL)
+ funcName = thr->CurrentFuncName;
mtx_lock(_eglGlobal.Mutex);
- if (_eglGlobal.debugTypesEnabled & DebugBitFromType(type)) {
+ if (_eglGlobal.debugTypesEnabled & DebugBitFromType(type))
callback = _eglGlobal.debugCallback;
- }
+
mtx_unlock(_eglGlobal.Mutex);
if (callback != NULL) {
char *buf = NULL;
if (message != NULL) {
- if (vasprintf(&buf, message, args) < 0) {
+ va_start(args, message);
+ if (vasprintf(&buf, message, args) < 0)
buf = NULL;
- }
+
+ va_end(args);
}
- callback(error, command, type, _eglGetThreadLabel(), objectLabel, buf);
+ callback(error, funcName, type, thr->Label, thr->CurrentObjectLabel, buf);
free(buf);
}
- if (type == EGL_DEBUG_MSG_CRITICAL_KHR || type == EGL_DEBUG_MSG_ERROR_KHR) {
+ if (type == EGL_DEBUG_MSG_CRITICAL_KHR || type == EGL_DEBUG_MSG_ERROR_KHR)
_eglInternalError(error, funcName);
- }
-}
-
-void
-_eglDebugReportFull(EGLenum error, const char *command, const char *funcName,
- EGLint type, EGLLabelKHR objectLabel, const char *message, ...)
-{
- va_list args;
- va_start(args, message);
- _eglDebugReportFullv(error, command, funcName, type, objectLabel, message, args);
- va_end(args);
-}
-
-void
-_eglDebugReport(EGLenum error, const char *funcName,
- EGLint type, const char *message, ...)
-{
- _EGLThreadInfo *thr = _eglGetCurrentThread();
- va_list args;
-
- if (funcName == NULL) {
- funcName = thr->CurrentFuncName;
- }
-
- va_start(args, message);
- _eglDebugReportFullv(error, thr->CurrentFuncName, funcName, type, thr->CurrentObjectLabel, message, args);
- va_end(args);
}
diff -Nru mesa-17.2.4/src/egl/main/eglcurrent.h mesa-17.3.3/src/egl/main/eglcurrent.h
--- mesa-17.2.4/src/egl/main/eglcurrent.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglcurrent.h 2018-01-18 21:30:28.000000000 +0000
@@ -99,13 +99,6 @@
extern EGLBoolean
_eglError(EGLint errCode, const char *msg);
-extern EGLLabelKHR
-_eglGetThreadLabel(void);
-
-extern void
-_eglDebugReportFull(EGLenum error, const char *command, const char *funcName,
- EGLint type, EGLLabelKHR objectLabel, const char *message, ...);
-
extern void
_eglDebugReport(EGLenum error, const char *funcName,
EGLint type, const char *message, ...);
diff -Nru mesa-17.2.4/src/egl/main/egldisplay.h mesa-17.3.3/src/egl/main/egldisplay.h
--- mesa-17.2.4/src/egl/main/egldisplay.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/egldisplay.h 2018-01-18 21:30:28.000000000 +0000
@@ -105,6 +105,11 @@
EGLBoolean EXT_image_dma_buf_import_modifiers;
EGLBoolean EXT_swap_buffers_with_damage;
+ unsigned int IMG_context_priority;
+#define __EGL_CONTEXT_PRIORITY_LOW_BIT 0
+#define __EGL_CONTEXT_PRIORITY_MEDIUM_BIT 1
+#define __EGL_CONTEXT_PRIORITY_HIGH_BIT 2
+
EGLBoolean KHR_cl_event2;
EGLBoolean KHR_config_attribs;
EGLBoolean KHR_create_context;
@@ -152,7 +157,6 @@
/* options that affect how the driver initializes the display */
struct {
- EGLBoolean TestOnly; /**< Driver should not set fields when true */
EGLBoolean UseFallback; /**< Use fallback driver (sw or less features) */
void *Platform; /**< Platform-specific options */
} Options;
@@ -168,7 +172,6 @@
char ClientAPIsString[100]; /**< EGL_CLIENT_APIS */
char ExtensionsString[_EGL_MAX_EXTENSIONS_LEN]; /**< EGL_EXTENSIONS */
- _EGLArray *Screens;
_EGLArray *Configs;
/* lists of resources */
diff -Nru mesa-17.2.4/src/egl/main/egldriver.c mesa-17.3.3/src/egl/main/egldriver.c
--- mesa-17.2.4/src/egl/main/egldriver.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/egldriver.c 2018-01-18 21:30:28.000000000 +0000
@@ -44,250 +44,48 @@
#include "egldriver.h"
#include "egllog.h"
-typedef struct _egl_module {
- char *Name;
- _EGLMain_t BuiltIn;
- _EGLDriver *Driver;
-} _EGLModule;
+#include "util/debug.h"
static mtx_t _eglModuleMutex = _MTX_INITIALIZER_NP;
-static _EGLArray *_eglModules;
+static _EGLDriver *_eglDriver;
-const struct {
- const char *name;
- _EGLMain_t main;
-} _eglBuiltInDrivers[] = {
-#ifdef _EGL_BUILT_IN_DRIVER_DRI2
- { "egl_dri2", _eglBuiltInDriverDRI2 },
-#endif
-#ifdef _EGL_BUILT_IN_DRIVER_HAIKU
- { "egl_haiku", _eglBuiltInDriverHaiku },
-#endif
- { NULL, NULL }
-};
-
-/**
- * Load a module and create the driver object.
- */
-static EGLBoolean
-_eglLoadModule(_EGLModule *mod)
-{
- _EGLDriver *drv;
-
- if (mod->Driver)
- return EGL_TRUE;
-
- if (!mod->BuiltIn)
- return EGL_FALSE;
-
- drv = mod->BuiltIn(NULL);
- if (!drv || !drv->Name)
- return EGL_FALSE;
-
- mod->Driver = drv;
-
- return EGL_TRUE;
-}
-
-
-/**
- * Unload a module.
- */
-static void
-_eglUnloadModule(_EGLModule *mod)
-{
- /* destroy the driver */
- if (mod->Driver && mod->Driver->Unload)
- mod->Driver->Unload(mod->Driver);
-
- mod->Driver = NULL;
-}
-
-
-/**
- * Add a module to the module array.
- */
-static _EGLModule *
-_eglAddModule(const char *name)
-{
- _EGLModule *mod;
- EGLint i;
-
- if (!_eglModules) {
- _eglModules = _eglCreateArray("Module", 8);
- if (!_eglModules)
- return NULL;
- }
-
- /* find duplicates */
- for (i = 0; i < _eglModules->Size; i++) {
- mod = _eglModules->Elements[i];
- if (strcmp(mod->Name, name) == 0)
- return mod;
- }
-
- /* allocate a new one */
- mod = calloc(1, sizeof(*mod));
- if (mod) {
- mod->Name = strdup(name);
- if (!mod->Name) {
- free(mod);
- mod = NULL;
- }
- }
- if (mod) {
- _eglAppendArray(_eglModules, (void *) mod);
- _eglLog(_EGL_DEBUG, "added %s to module array", mod->Name);
- }
-
- return mod;
-}
-
-
-/**
- * Free a module.
- */
-static void
-_eglFreeModule(void *module)
-{
- _EGLModule *mod = (_EGLModule *) module;
-
- _eglUnloadModule(mod);
- free(mod->Name);
- free(mod);
-}
-
-
-/**
- * Add the user driver to the module array.
- *
- * The user driver is specified by EGL_DRIVER.
- */
-static EGLBoolean
-_eglAddUserDriver(void)
-{
- char *env;
-
- env = getenv("EGL_DRIVER");
- if (env) {
- EGLint i;
-
- for (i = 0; _eglBuiltInDrivers[i].name; i++) {
- if (!strcmp(_eglBuiltInDrivers[i].name, env)) {
- _EGLModule *mod = _eglAddModule(env);
- if (mod)
- mod->BuiltIn = _eglBuiltInDrivers[i].main;
-
- return EGL_TRUE;
- }
- }
- }
-
- return EGL_FALSE;
-}
-
-
-/**
- * Add built-in drivers to the module array.
- */
-static void
-_eglAddBuiltInDrivers(void)
+static _EGLDriver *
+_eglGetDriver(void)
{
- _EGLModule *mod;
- EGLint i;
-
- for (i = 0; _eglBuiltInDrivers[i].name; i++) {
- mod = _eglAddModule(_eglBuiltInDrivers[i].name);
- if (mod)
- mod->BuiltIn = _eglBuiltInDrivers[i].main;
- }
-}
-
+ mtx_lock(&_eglModuleMutex);
-/**
- * Add drivers to the module array. Drivers will be loaded as they are matched
- * to displays.
- */
-static EGLBoolean
-_eglAddDrivers(void)
-{
- if (_eglModules)
- return EGL_TRUE;
+ if (!_eglDriver)
+ _eglDriver = _eglBuiltInDriver();
- if (!_eglAddUserDriver()) {
- /*
- * Add other drivers only when EGL_DRIVER is not set. The order here
- * decides the priorities.
- */
- _eglAddBuiltInDrivers();
- }
+ mtx_unlock(&_eglModuleMutex);
- return (_eglModules != NULL);
+ return _eglDriver;
}
-
-/**
- * A helper function for _eglMatchDriver. It finds the first driver that can
- * initialize the display and return.
- */
static _EGLDriver *
_eglMatchAndInitialize(_EGLDisplay *dpy)
{
- _EGLDriver *drv = NULL;
- EGLint i = 0;
-
- if (!_eglAddDrivers()) {
- _eglLog(_EGL_WARNING, "failed to find any driver");
- return NULL;
- }
-
- if (dpy->Driver) {
- drv = dpy->Driver;
- /* no re-matching? */
- if (!drv->API.Initialize(drv, dpy))
- drv = NULL;
- return drv;
- }
-
- while (i < _eglModules->Size) {
- _EGLModule *mod = (_EGLModule *) _eglModules->Elements[i];
+ if (_eglGetDriver())
+ if (_eglDriver->API.Initialize(_eglDriver, dpy))
+ return _eglDriver;
- if (!_eglLoadModule(mod)) {
- /* remove invalid modules */
- _eglEraseArray(_eglModules, i, _eglFreeModule);
- continue;
- }
-
- if (mod->Driver->API.Initialize(mod->Driver, dpy)) {
- drv = mod->Driver;
- break;
- }
- else {
- i++;
- }
- }
-
- return drv;
+ return NULL;
}
-
/**
- * Match a display to a driver. The display is initialized unless test_only is
- * true. The matching is done by finding the first driver that can initialize
- * the display.
+ * Match a display to a driver. The matching is done by finding the first
+ * driver that can initialize the display.
*/
_EGLDriver *
-_eglMatchDriver(_EGLDisplay *dpy, EGLBoolean test_only)
+_eglMatchDriver(_EGLDisplay *dpy)
{
_EGLDriver *best_drv;
assert(!dpy->Initialized);
- mtx_lock(&_eglModuleMutex);
-
/* set options */
- dpy->Options.TestOnly = test_only;
- dpy->Options.UseFallback = EGL_FALSE;
+ dpy->Options.UseFallback =
+ env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false);
best_drv = _eglMatchAndInitialize(dpy);
if (!best_drv) {
@@ -295,49 +93,25 @@
best_drv = _eglMatchAndInitialize(dpy);
}
- mtx_unlock(&_eglModuleMutex);
-
if (best_drv) {
- _eglLog(_EGL_DEBUG, "the best driver is %s%s",
- best_drv->Name, (test_only) ? " (test only) " : "");
- if (!test_only) {
- dpy->Driver = best_drv;
- dpy->Initialized = EGL_TRUE;
- }
+ _eglLog(_EGL_DEBUG, "the best driver is %s",
+ best_drv->Name);
+ dpy->Driver = best_drv;
+ dpy->Initialized = EGL_TRUE;
}
return best_drv;
}
-
__eglMustCastToProperFunctionPointerType
_eglGetDriverProc(const char *procname)
{
- EGLint i;
- _EGLProc proc = NULL;
-
- if (!_eglModules) {
- /* load the driver for the default display */
- EGLDisplay egldpy = eglGetDisplay(EGL_DEFAULT_DISPLAY);
- _EGLDisplay *dpy = _eglLookupDisplay(egldpy);
- if (!dpy || !_eglMatchDriver(dpy, EGL_TRUE))
- return NULL;
- }
+ if (_eglGetDriver())
+ return _eglDriver->API.GetProcAddress(_eglDriver, procname);
- for (i = 0; i < _eglModules->Size; i++) {
- _EGLModule *mod = (_EGLModule *) _eglModules->Elements[i];
-
- if (!mod->Driver)
- break;
- proc = mod->Driver->API.GetProcAddress(mod->Driver, procname);
- if (proc)
- break;
- }
-
- return proc;
+ return NULL;
}
-
/**
* Unload all drivers.
*/
@@ -345,8 +119,6 @@
_eglUnloadDrivers(void)
{
/* this is called at atexit time */
- if (_eglModules) {
- _eglDestroyArray(_eglModules, _eglFreeModule);
- _eglModules = NULL;
- }
+ free(_eglDriver);
+ _eglDriver = NULL;
}
diff -Nru mesa-17.2.4/src/egl/main/egldriver.h mesa-17.3.3/src/egl/main/egldriver.h
--- mesa-17.2.4/src/egl/main/egldriver.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/egldriver.h 2018-01-18 21:30:28.000000000 +0000
@@ -70,9 +70,6 @@
_EGL_DRIVER_TYPECAST(drvname ## _config, _EGLConfig, obj)
-typedef _EGLDriver *(*_EGLMain_t)(const char *args);
-
-
/**
* Base class for device drivers.
*/
@@ -80,27 +77,16 @@
{
const char *Name; /**< name of this driver */
- /**
- * Release the driver resource.
- *
- * It is called before dlclose().
- */
- void (*Unload)(_EGLDriver *drv);
-
_EGLAPI API; /**< EGL API dispatch table */
};
-extern _EGLDriver *
-_eglBuiltInDriverDRI2(const char *args);
-
-
extern _EGLDriver*
-_eglBuiltInDriverHaiku(const char* args);
+_eglBuiltInDriver(void);
extern _EGLDriver *
-_eglMatchDriver(_EGLDisplay *dpy, EGLBoolean test_only);
+_eglMatchDriver(_EGLDisplay *dpy);
extern __eglMustCastToProperFunctionPointerType
diff -Nru mesa-17.2.4/src/egl/main/eglfallbacks.c mesa-17.3.3/src/egl/main/eglfallbacks.c
--- mesa-17.2.4/src/egl/main/eglfallbacks.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglfallbacks.c 2018-01-18 21:30:28.000000000 +0000
@@ -49,8 +49,6 @@
void
_eglInitDriverFallbacks(_EGLDriver *drv)
{
- memset(&drv->API, 0, sizeof(drv->API));
-
/* the driver has to implement these */
drv->API.Initialize = NULL;
drv->API.Terminate = NULL;
diff -Nru mesa-17.2.4/src/egl/main/eglglobals.c mesa-17.3.3/src/egl/main/eglglobals.c
--- mesa-17.2.4/src/egl/main/eglglobals.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglglobals.c 2018-01-18 21:30:28.000000000 +0000
@@ -49,22 +49,22 @@
struct _egl_global _eglGlobal =
{
- &_eglGlobalMutex, /* Mutex */
- NULL, /* DisplayList */
- 2, /* NumAtExitCalls */
- {
+ .Mutex = &_eglGlobalMutex,
+ .DisplayList = NULL,
+ .NumAtExitCalls = 2,
+ .AtExitCalls = {
/* default AtExitCalls, called in reverse order */
_eglUnloadDrivers, /* always called last */
_eglFiniDisplay
},
- /* ClientOnlyExtensionString */
+ .ClientOnlyExtensionString =
"EGL_EXT_client_extensions"
" EGL_EXT_platform_base"
" EGL_KHR_client_get_all_proc_addresses"
" EGL_KHR_debug",
- /* PlatformExtensionString */
+ .PlatformExtensionString =
#ifdef HAVE_WAYLAND_PLATFORM
" EGL_EXT_platform_wayland"
#endif
@@ -79,10 +79,10 @@
#endif
"",
- NULL, /* ClientExtensionsString */
+ .ClientExtensionString = NULL,
- NULL, /* debugCallback */
- _EGL_DEBUG_BIT_CRITICAL | _EGL_DEBUG_BIT_ERROR, /* debugTypesEnabled */
+ .debugCallback = NULL,
+ .debugTypesEnabled = _EGL_DEBUG_BIT_CRITICAL | _EGL_DEBUG_BIT_ERROR,
};
diff -Nru mesa-17.2.4/src/egl/main/eglimage.c mesa-17.3.3/src/egl/main/eglimage.c
--- mesa-17.2.4/src/egl/main/eglimage.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglimage.c 2018-01-18 21:30:28.000000000 +0000
@@ -41,7 +41,7 @@
switch (attr) {
case EGL_IMAGE_PRESERVED_KHR:
if (!dpy->Extensions.KHR_image_base)
- return EGL_BAD_PARAMETER;
+ return EGL_BAD_PARAMETER;
attrs->ImagePreserved = val;
break;
@@ -144,9 +144,9 @@
attrs->DMABufPlanePitches[0].IsPresent = EGL_TRUE;
break;
case EGL_DMA_BUF_PLANE1_FD_EXT:
- attrs->DMABufPlaneFds[1].Value = val;
- attrs->DMABufPlaneFds[1].IsPresent = EGL_TRUE;
- break;
+ attrs->DMABufPlaneFds[1].Value = val;
+ attrs->DMABufPlaneFds[1].IsPresent = EGL_TRUE;
+ break;
case EGL_DMA_BUF_PLANE1_OFFSET_EXT:
attrs->DMABufPlaneOffsets[1].Value = val;
attrs->DMABufPlaneOffsets[1].IsPresent = EGL_TRUE;
@@ -288,30 +288,30 @@
err = _eglParseKHRImageAttribs(attrs, dpy, attr, val);
if (err == EGL_SUCCESS)
- continue;
+ continue;
err = _eglParseMESADrmImageAttribs(attrs, dpy, attr, val);
if (err == EGL_SUCCESS)
- continue;
+ continue;
err = _eglParseWLBindWaylandDisplayAttribs(attrs, dpy, attr, val);
if (err == EGL_SUCCESS)
- continue;
+ continue;
err = _eglParseEXTImageDmaBufImportAttribs(attrs, dpy, attr, val);
if (err == EGL_SUCCESS)
- continue;
+ continue;
/* EXT_image_dma_buf_import states that if invalid value is provided for
* its attributes, we should return EGL_BAD_ATTRIBUTE.
* Bail out ASAP, since follow-up calls can return another EGL_BAD error.
*/
if (err == EGL_BAD_ATTRIBUTE)
- return _eglError(err, __func__);
+ return _eglError(err, __func__);
err = _eglParseEXTImageDmaBufImportModifiersAttribs(attrs, dpy, attr, val);
if (err == EGL_SUCCESS)
- continue;
+ continue;
return _eglError(err, __func__);
}
diff -Nru mesa-17.2.4/src/egl/main/eglsurface.c mesa-17.3.3/src/egl/main/eglsurface.c
--- mesa-17.2.4/src/egl/main/eglsurface.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/main/eglsurface.c 2018-01-18 21:30:28.000000000 +0000
@@ -45,22 +45,6 @@
#include "eglsurface.h"
-static void
-_eglClampSwapInterval(_EGLSurface *surf, EGLint interval)
-{
- EGLint bound = surf->Config->MaxSwapInterval;
- if (interval >= bound) {
- interval = bound;
- }
- else {
- bound = surf->Config->MinSwapInterval;
- if (interval < bound)
- interval = bound;
- }
- surf->SwapInterval = interval;
-}
-
-
/**
* Parse the list of surface attributes and return the proper error code.
*/
@@ -319,7 +303,7 @@
surf->BufferAgeRead = EGL_FALSE;
/* the default swap interval is 1 */
- _eglClampSwapInterval(surf, 1);
+ surf->SwapInterval = 1;
err = _eglParseSurfaceAttribList(surf, attrib_list);
if (err != EGL_SUCCESS)
@@ -565,6 +549,5 @@
_eglSwapInterval(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf,
EGLint interval)
{
- _eglClampSwapInterval(surf, interval);
return EGL_TRUE;
}
diff -Nru mesa-17.2.4/src/egl/Makefile.am mesa-17.3.3/src/egl/Makefile.am
--- mesa-17.2.4/src/egl/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -27,6 +27,7 @@
AM_CFLAGS = \
-I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/egl/main \
-I$(top_srcdir)/src/gbm/main \
-I$(top_srcdir)/src \
@@ -45,6 +46,7 @@
$(LIBEGL_C_FILES)
libEGL_common_la_LIBADD = \
+ $(top_builddir)/src/util/libmesautil.la \
$(EGL_LIB_DEPS)
dri2_backend_FILES =
@@ -78,11 +80,12 @@
drivers/dri2/linux-dmabuf-unstable-v1-protocol.lo: drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h
drivers/dri2/egl_dri2.lo: drivers/dri2/linux-dmabuf-unstable-v1-client-protocol.h
-AM_CFLAGS += $(WAYLAND_CFLAGS)
-libEGL_common_la_LIBADD += $(WAYLAND_LIBS)
+AM_CFLAGS += $(WAYLAND_CLIENT_CFLAGS)
+libEGL_common_la_LIBADD += $(WAYLAND_CLIENT_LIBS)
libEGL_common_la_LIBADD += $(LIBDRM_LIBS)
+AM_CFLAGS += $(WAYLAND_SERVER_CFLAGS)
libEGL_common_la_LIBADD += $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la
-libEGL_common_la_LIBADD += $(top_builddir)/src/util/libmesautil.la
+libEGL_common_la_LIBADD += $(WAYLAND_SERVER_LIBS)
dri2_backend_FILES += \
drivers/dri2/platform_wayland.c \
drivers/dri2/linux-dmabuf-unstable-v1-protocol.c \
@@ -120,8 +123,12 @@
$(dri2_backend_FILES) \
$(dri3_backend_FILES)
-libEGL_common_la_LIBADD += $(top_builddir)/src/loader/libloader.la
-libEGL_common_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS) $(CLOCK_LIB)
+libEGL_common_la_LIBADD += \
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la \
+ $(DLOPEN_LIBS) \
+ $(LIBDRM_LIBS) \
+ $(CLOCK_LIB)
GLVND_GEN_DEPS = generate/gen_egl_dispatch.py \
generate/egl.xml generate/eglFunctionList.py generate/genCommon.py \
@@ -157,7 +164,9 @@
main/egldispatchstubs.c \
g_egldispatchstubs.c \
g_egldispatchstubs.h
-libEGL_mesa_la_LIBADD = libEGL_common.la
+libEGL_mesa_la_LIBADD = \
+ libEGL_common.la \
+ $(top_builddir)/src/mapi/shared-glapi/libglapi.la
libEGL_mesa_la_LDFLAGS = \
-no-undefined \
-version-number 0 \
@@ -169,7 +178,9 @@
lib_LTLIBRARIES = libEGL.la
libEGL_la_SOURCES =
-libEGL_la_LIBADD = libEGL_common.la
+libEGL_la_LIBADD = \
+ libEGL_common.la \
+ $(top_builddir)/src/mapi/shared-glapi/libglapi.la
libEGL_la_LDFLAGS = \
-no-undefined \
-version-number 1:0 \
diff -Nru mesa-17.2.4/src/egl/Makefile.in mesa-17.3.3/src/egl/Makefile.in
--- mesa-17.2.4/src/egl/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/egl/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -123,11 +123,12 @@
@HAVE_DRI3_TRUE@@HAVE_PLATFORM_X11_TRUE@ drivers/dri2/platform_x11_dri3.h
@HAVE_DRI3_TRUE@@HAVE_PLATFORM_X11_TRUE@am__append_5 = $(top_builddir)/src/loader/libloader_dri3_helper.la
-@HAVE_PLATFORM_WAYLAND_TRUE@am__append_6 = $(WAYLAND_CFLAGS)
-@HAVE_PLATFORM_WAYLAND_TRUE@am__append_7 = $(WAYLAND_LIBS) \
+@HAVE_PLATFORM_WAYLAND_TRUE@am__append_6 = $(WAYLAND_CLIENT_CFLAGS) \
+@HAVE_PLATFORM_WAYLAND_TRUE@ $(WAYLAND_SERVER_CFLAGS)
+@HAVE_PLATFORM_WAYLAND_TRUE@am__append_7 = $(WAYLAND_CLIENT_LIBS) \
@HAVE_PLATFORM_WAYLAND_TRUE@ $(LIBDRM_LIBS) \
@HAVE_PLATFORM_WAYLAND_TRUE@ $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la \
-@HAVE_PLATFORM_WAYLAND_TRUE@ $(top_builddir)/src/util/libmesautil.la
+@HAVE_PLATFORM_WAYLAND_TRUE@ $(WAYLAND_SERVER_LIBS)
@HAVE_PLATFORM_WAYLAND_TRUE@am__append_8 = \
@HAVE_PLATFORM_WAYLAND_TRUE@ drivers/dri2/platform_wayland.c \
@HAVE_PLATFORM_WAYLAND_TRUE@ drivers/dri2/linux-dmabuf-unstable-v1-protocol.c \
@@ -144,7 +145,8 @@
subdir = src/egl
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -190,7 +192,8 @@
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)" \
"$(DESTDIR)$(vendorjsondir)" "$(DESTDIR)$(egldir)"
LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
-@USE_LIBGLVND_FALSE@libEGL_la_DEPENDENCIES = libEGL_common.la
+@USE_LIBGLVND_FALSE@libEGL_la_DEPENDENCIES = libEGL_common.la \
+@USE_LIBGLVND_FALSE@ $(top_builddir)/src/mapi/shared-glapi/libglapi.la
am_libEGL_la_OBJECTS =
libEGL_la_OBJECTS = $(am_libEGL_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
@@ -207,13 +210,15 @@
@HAVE_PLATFORM_WAYLAND_TRUE@ $(am__DEPENDENCIES_1) \
@HAVE_PLATFORM_WAYLAND_TRUE@ $(am__DEPENDENCIES_1) \
@HAVE_PLATFORM_WAYLAND_TRUE@ $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la \
-@HAVE_PLATFORM_WAYLAND_TRUE@ $(top_builddir)/src/util/libmesautil.la
+@HAVE_PLATFORM_WAYLAND_TRUE@ $(am__DEPENDENCIES_1)
@HAVE_PLATFORM_ANDROID_TRUE@am__DEPENDENCIES_4 = \
@HAVE_PLATFORM_ANDROID_TRUE@ $(am__DEPENDENCIES_1)
-libEGL_common_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \
+libEGL_common_la_DEPENDENCIES = \
+ $(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_1) \
$(am__DEPENDENCIES_2) $(am__append_5) $(am__DEPENDENCIES_3) \
$(am__append_9) $(am__DEPENDENCIES_4) \
- $(top_builddir)/src/loader/libloader.la $(am__DEPENDENCIES_1) \
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la $(am__DEPENDENCIES_1) \
$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
am__libEGL_common_la_SOURCES_DIST = main/eglapi.c main/eglapi.h \
main/eglarray.c main/eglarray.h main/eglconfig.c \
@@ -257,7 +262,8 @@
am_libEGL_common_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
$(am__objects_8) $(am__objects_10)
libEGL_common_la_OBJECTS = $(am_libEGL_common_la_OBJECTS)
-@USE_LIBGLVND_TRUE@libEGL_mesa_la_DEPENDENCIES = libEGL_common.la
+@USE_LIBGLVND_TRUE@libEGL_mesa_la_DEPENDENCIES = libEGL_common.la \
+@USE_LIBGLVND_TRUE@ $(top_builddir)/src/mapi/shared-glapi/libglapi.la
am__libEGL_mesa_la_SOURCES_DIST = main/eglglvnd.c \
main/egldispatchstubs.h main/egldispatchstubs.c \
g_egldispatchstubs.c g_egldispatchstubs.h
@@ -654,9 +660,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -710,6 +716,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -721,12 +729,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -847,9 +858,10 @@
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
BUILT_SOURCES = g_egldispatchstubs.c g_egldispatchstubs.h
-AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src/egl/main \
- -I$(top_srcdir)/src/gbm/main -I$(top_srcdir)/src $(DEFINES) \
- $(VISIBILITY_CFLAGS) $(LIBDRM_CFLAGS) $(EGL_CFLAGS) \
+AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src/mapi \
+ -I$(top_srcdir)/src/egl/main -I$(top_srcdir)/src/gbm/main \
+ -I$(top_srcdir)/src $(DEFINES) $(VISIBILITY_CFLAGS) \
+ $(LIBDRM_CFLAGS) $(EGL_CFLAGS) \
-D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) $(am__append_1) \
$(am__append_6) $(am__append_12) -I$(top_srcdir)/src/loader \
-I$(top_builddir)/src/egl/drivers/dri2 \
@@ -869,10 +881,12 @@
libEGL_common_la_SOURCES = $(LIBEGL_C_FILES) \
$(dri2_backend_core_FILES) $(dri2_backend_FILES) \
$(dri3_backend_FILES)
-libEGL_common_la_LIBADD = $(EGL_LIB_DEPS) $(am__append_2) \
- $(am__append_5) $(am__append_7) $(am__append_9) \
- $(am__append_13) $(top_builddir)/src/loader/libloader.la \
- $(DLOPEN_LIBS) $(LIBDRM_LIBS) $(CLOCK_LIB)
+libEGL_common_la_LIBADD = $(top_builddir)/src/util/libmesautil.la \
+ $(EGL_LIB_DEPS) $(am__append_2) $(am__append_5) \
+ $(am__append_7) $(am__append_9) $(am__append_13) \
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la $(DLOPEN_LIBS) \
+ $(LIBDRM_LIBS) $(CLOCK_LIB)
dri2_backend_FILES = $(am__append_3) $(am__append_8) $(am__append_10) \
$(am__append_11) $(am__append_14)
dri3_backend_FILES = $(am__append_4)
@@ -894,7 +908,10 @@
@USE_LIBGLVND_TRUE@ g_egldispatchstubs.c \
@USE_LIBGLVND_TRUE@ g_egldispatchstubs.h
-@USE_LIBGLVND_TRUE@libEGL_mesa_la_LIBADD = libEGL_common.la
+@USE_LIBGLVND_TRUE@libEGL_mesa_la_LIBADD = \
+@USE_LIBGLVND_TRUE@ libEGL_common.la \
+@USE_LIBGLVND_TRUE@ $(top_builddir)/src/mapi/shared-glapi/libglapi.la
+
@USE_LIBGLVND_TRUE@libEGL_mesa_la_LDFLAGS = \
@USE_LIBGLVND_TRUE@ -no-undefined \
@USE_LIBGLVND_TRUE@ -version-number 0 \
@@ -903,7 +920,10 @@
@USE_LIBGLVND_TRUE@ $(LD_NO_UNDEFINED)
@USE_LIBGLVND_FALSE@libEGL_la_SOURCES =
-@USE_LIBGLVND_FALSE@libEGL_la_LIBADD = libEGL_common.la
+@USE_LIBGLVND_FALSE@libEGL_la_LIBADD = \
+@USE_LIBGLVND_FALSE@ libEGL_common.la \
+@USE_LIBGLVND_FALSE@ $(top_builddir)/src/mapi/shared-glapi/libglapi.la
+
@USE_LIBGLVND_FALSE@libEGL_la_LDFLAGS = \
@USE_LIBGLVND_FALSE@ -no-undefined \
@USE_LIBGLVND_FALSE@ -version-number 1:0 \
diff -Nru mesa-17.2.4/src/egl/meson.build mesa-17.3.3/src/egl/meson.build
--- mesa-17.2.4/src/egl/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/egl/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,185 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+c_args_for_egl = []
+link_for_egl = []
+deps_for_egl = []
+incs_for_egl = [
+ inc_include, inc_src, inc_loader, inc_gbm, include_directories('main'),
+]
+files_egl = files(
+ 'main/eglapi.c',
+ 'main/eglapi.h',
+ 'main/eglarray.c',
+ 'main/eglarray.h',
+ 'main/eglconfig.c',
+ 'main/eglconfig.h',
+ 'main/eglcontext.c',
+ 'main/eglcontext.h',
+ 'main/eglcurrent.c',
+ 'main/eglcurrent.h',
+ 'main/egldefines.h',
+ 'main/egldisplay.c',
+ 'main/egldisplay.h',
+ 'main/egldriver.c',
+ 'main/egldriver.h',
+ 'main/eglfallbacks.c',
+ 'main/eglglobals.c',
+ 'main/eglglobals.h',
+ 'main/eglimage.c',
+ 'main/eglimage.h',
+ 'main/egllog.c',
+ 'main/egllog.h',
+ 'main/eglsurface.c',
+ 'main/eglsurface.h',
+ 'main/eglsync.c',
+ 'main/eglsync.h',
+ 'main/eglentrypoint.h',
+ 'main/egltypedefs.h',
+ 'drivers/dri2/egl_dri2.c',
+ 'drivers/dri2/egl_dri2.h',
+ 'drivers/dri2/egl_dri2_fallbacks.h',
+)
+
+linux_dmabuf_unstable_v1_protocol_c = custom_target(
+ 'linux-dmabuf-unstable-v1-protocol.c',
+ input : wayland_dmabuf_xml,
+ output : 'linux-dmabuf-unstable-v1-protocol.c',
+ command : [prog_wl_scanner, 'code', '@INPUT@', '@OUTPUT@'],
+)
+
+linux_dmabuf_unstable_v1_client_protocol_h = custom_target(
+ 'linux-dmabuf-unstable-v1-client-protocol.h',
+ input : wayland_dmabuf_xml,
+ output : 'linux-dmabuf-unstable-v1-client-protocol.h',
+ command : [prog_wl_scanner, 'client-header', '@INPUT@', '@OUTPUT@'],
+)
+
+g_egldispatchstubs_c = custom_target(
+ 'g_egldispatchstubs.c',
+ input : [
+ 'generate/gen_egl_dispatch.py', 'generate/eglFunctionList.py',
+ 'generate/egl.xml', 'generate/egl_other.xml'
+ ],
+ output : 'g_egldispatchstubs.c',
+ command : [
+ prog_python2, '@INPUT0@', 'source', '@INPUT1@', '@INPUT2@', '@INPUT3@'
+ ],
+ depend_files : files('generate/genCommon.py'),
+ capture : true,
+)
+
+g_egldispatchstubs_h = custom_target(
+ 'g_egldispatchstubs.h',
+ input : [
+ 'generate/gen_egl_dispatch.py', 'generate/eglFunctionList.py',
+ 'generate/egl.xml', 'generate/egl_other.xml'
+ ],
+ output : 'g_egldispatchstubs.h',
+ command : [
+ prog_python2, '@INPUT0@', 'header', '@INPUT1@', '@INPUT2@', '@INPUT3@'
+ ],
+ depend_files : files('generate/genCommon.py'),
+ capture : true,
+)
+
+if with_platform_x11
+ files_egl += files('drivers/dri2/platform_x11.c')
+ if with_dri3
+ files_egl += files('drivers/dri2/platform_x11_dri3.c')
+ link_for_egl += libloader_dri3_helper
+ endif
+ deps_for_egl += [dep_xcb_dri2, dep_xcb_xfixes]
+endif
+if with_platform_drm
+ files_egl += files('drivers/dri2/platform_drm.c')
+ link_for_egl += libgbm
+ incs_for_egl += include_directories('../gbm/main')
+endif
+if with_platform_surfaceless
+ files_egl += files('drivers/dri2/platform_surfaceless.c')
+endif
+if with_platform_wayland
+ deps_for_egl += [dep_wayland_client, dep_wayland_server]
+ link_for_egl += libwayland_drm
+ files_egl += files('drivers/dri2/platform_wayland.c')
+ files_egl += [
+ linux_dmabuf_unstable_v1_protocol_c,
+ linux_dmabuf_unstable_v1_client_protocol_h,
+ wayland_drm_client_protocol_h,
+ ]
+ incs_for_egl += include_directories(
+ 'wayland/wayland-egl', 'wayland/wayland-drm',
+ )
+endif
+# TODO: android
+
+# TODO: glvnd
+
+if cc.has_function('mincore')
+ c_args_for_egl += '-DHAVE_MINCORE'
+endif
+
+if not with_glvnd
+ egl_lib_name = 'EGL'
+ egl_lib_version = '1.0.0'
+else
+ egl_lib_name = 'EGL_mesa'
+ egl_lib_version = '0'
+ files_egl += [g_egldispatchstubs_h, g_egldispatchstubs_c]
+ files_egl += files('main/eglglvnd.c', 'main/egldispatchstubs.c')
+ install_data(
+ 'main/50_mesa.json',
+ install_dir : join_paths(get_option('datadir'), 'glvnd', 'egl_vendor.d')
+ )
+endif
+
+libegl = shared_library(
+ egl_lib_name,
+ files_egl,
+ c_args : [
+ c_vis_args,
+ c_args_for_egl,
+ '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_driver_dir),
+ '-D_EGL_BUILT_IN_DRIVER_DRI2',
+ '-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()),
+ ],
+ include_directories : incs_for_egl,
+ link_with : [link_for_egl, libloader, libxmlconfig, libglapi, libmesa_util],
+ link_args : [ld_args_bsymbolic, ld_args_gc_sections],
+ dependencies : [deps_for_egl, dep_dl, dep_libdrm, dep_clock, dep_thread],
+ install : true,
+ version : egl_lib_version,
+)
+
+pkg.generate(
+ name : 'egl',
+ description : 'Mesa EGL Library',
+ version : meson.project_version(),
+ libraries : libegl,
+ libraries_private: gl_priv_libs,
+ requires_private : gl_priv_reqs,
+ extra_cflags : gl_pkgconfig_c_flags,
+)
+
+if with_tests
+ test('egl-symbols-check', find_program('egl-symbols-check'))
+ test('egl-entrypoint-check', find_program('egl-entrypoint-check'))
+endif
diff -Nru mesa-17.2.4/src/egl/SConscript mesa-17.3.3/src/egl/SConscript
--- mesa-17.2.4/src/egl/SConscript 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/SConscript 2018-01-18 21:30:28.000000000 +0000
@@ -24,6 +24,8 @@
])
egl_sources.append('drivers/haiku/egl_haiku.cpp')
+env.Prepend(LIBS = [mesautil])
+
egl = env.SharedLibrary(
target = 'EGL',
source = egl_sources,
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-drm/Makefile.am mesa-17.3.3/src/egl/wayland/wayland-drm/Makefile.am
--- mesa-17.2.4/src/egl/wayland/wayland-drm/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-drm/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -2,7 +2,7 @@
-I$(top_srcdir)/include \
$(DEFINES) \
$(VISIBILITY_CFLAGS) \
- $(WAYLAND_CFLAGS)
+ $(WAYLAND_SERVER_CFLAGS)
noinst_LTLIBRARIES = libwayland-drm.la
libwayland_drm_la_SOURCES = \
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-drm/Makefile.in mesa-17.3.3/src/egl/wayland/wayland-drm/Makefile.in
--- mesa-17.2.4/src/egl/wayland/wayland-drm/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-drm/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -92,7 +92,8 @@
target_triplet = @target@
subdir = src/egl/wayland/wayland-drm
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -318,9 +319,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -374,6 +375,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -385,12 +388,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -478,7 +484,7 @@
-I$(top_srcdir)/include \
$(DEFINES) \
$(VISIBILITY_CFLAGS) \
- $(WAYLAND_CFLAGS)
+ $(WAYLAND_SERVER_CFLAGS)
noinst_LTLIBRARIES = libwayland-drm.la
libwayland_drm_la_SOURCES = \
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-drm/meson.build mesa-17.3.3/src/egl/wayland/wayland-drm/meson.build
--- mesa-17.2.4/src/egl/wayland/wayland-drm/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-drm/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,47 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+wayland_drm_protocol_c = custom_target(
+ 'wayland-drm-protocol.c',
+ input : 'wayland-drm.xml',
+ output : 'wayland-drm-protocol.c',
+ command : [prog_wl_scanner, 'code', '@INPUT@', '@OUTPUT@'],
+)
+
+wayland_drm_client_protocol_h = custom_target(
+ 'wayland-drm-client-protocol.h',
+ input : 'wayland-drm.xml',
+ output : 'wayland-drm-client-protocol.h',
+ command : [prog_wl_scanner, 'client-header', '@INPUT@', '@OUTPUT@'],
+)
+
+wayland_drm_server_protocol_h = custom_target(
+ 'wayland-drm-server-protocol.h',
+ input : 'wayland-drm.xml',
+ output : 'wayland-drm-server-protocol.h',
+ command : [prog_wl_scanner, 'server-header', '@INPUT@', '@OUTPUT@'],
+)
+
+libwayland_drm = static_library(
+ 'wayland_drm',
+ ['wayland-drm.c', wayland_drm_protocol_c, wayland_drm_server_protocol_h],
+ dependencies : [dep_wayland_server],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-drm/wayland-drm.c mesa-17.3.3/src/egl/wayland/wayland-drm/wayland-drm.c
--- mesa-17.2.4/src/egl/wayland/wayland-drm/wayland-drm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-drm/wayland-drm.c 2018-01-18 21:30:28.000000000 +0000
@@ -55,7 +55,7 @@
static void
destroy_buffer(struct wl_resource *resource)
{
- struct wl_drm_buffer *buffer = resource->data;
+ struct wl_drm_buffer *buffer = wl_resource_get_user_data(resource);
struct wl_drm *drm = buffer->drm;
drm->callbacks.release_buffer(drm->user_data, buffer);
@@ -77,7 +77,7 @@
int32_t offset1, int32_t stride1,
int32_t offset2, int32_t stride2)
{
- struct wl_drm *drm = resource->data;
+ struct wl_drm *drm = wl_resource_get_user_data(resource);
struct wl_drm_buffer *buffer;
buffer = calloc(1, sizeof *buffer);
@@ -187,7 +187,7 @@
drm_authenticate(struct wl_client *client,
struct wl_resource *resource, uint32_t id)
{
- struct wl_drm *drm = resource->data;
+ struct wl_drm *drm = wl_resource_get_user_data(resource);
if (drm->callbacks.authenticate(drm->user_data, id) < 0)
wl_resource_post_error(resource,
@@ -259,7 +259,7 @@
struct wl_drm *
wayland_drm_init(struct wl_display *display, char *device_name,
- struct wayland_drm_callbacks *callbacks, void *user_data,
+ const struct wayland_drm_callbacks *callbacks, void *user_data,
uint32_t flags)
{
struct wl_drm *drm;
@@ -292,15 +292,3 @@
free(drm);
}
-
-uint32_t
-wayland_drm_buffer_get_format(struct wl_drm_buffer *buffer)
-{
- return buffer->format;
-}
-
-void *
-wayland_drm_buffer_get_buffer(struct wl_drm_buffer *buffer)
-{
- return buffer->driver_buffer;
-}
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-drm/wayland-drm.h mesa-17.3.3/src/egl/wayland/wayland-drm/wayland-drm.h
--- mesa-17.2.4/src/egl/wayland/wayland-drm/wayland-drm.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-drm/wayland-drm.h 2018-01-18 21:30:28.000000000 +0000
@@ -3,71 +3,9 @@
#include
-#ifndef WL_DRM_FORMAT_ENUM
-#define WL_DRM_FORMAT_ENUM
-enum wl_drm_format {
- WL_DRM_FORMAT_C8 = 0x20203843,
- WL_DRM_FORMAT_RGB332 = 0x38424752,
- WL_DRM_FORMAT_BGR233 = 0x38524742,
- WL_DRM_FORMAT_XRGB4444 = 0x32315258,
- WL_DRM_FORMAT_XBGR4444 = 0x32314258,
- WL_DRM_FORMAT_RGBX4444 = 0x32315852,
- WL_DRM_FORMAT_BGRX4444 = 0x32315842,
- WL_DRM_FORMAT_ARGB4444 = 0x32315241,
- WL_DRM_FORMAT_ABGR4444 = 0x32314241,
- WL_DRM_FORMAT_RGBA4444 = 0x32314152,
- WL_DRM_FORMAT_BGRA4444 = 0x32314142,
- WL_DRM_FORMAT_XRGB1555 = 0x35315258,
- WL_DRM_FORMAT_XBGR1555 = 0x35314258,
- WL_DRM_FORMAT_RGBX5551 = 0x35315852,
- WL_DRM_FORMAT_BGRX5551 = 0x35315842,
- WL_DRM_FORMAT_ARGB1555 = 0x35315241,
- WL_DRM_FORMAT_ABGR1555 = 0x35314241,
- WL_DRM_FORMAT_RGBA5551 = 0x35314152,
- WL_DRM_FORMAT_BGRA5551 = 0x35314142,
- WL_DRM_FORMAT_RGB565 = 0x36314752,
- WL_DRM_FORMAT_BGR565 = 0x36314742,
- WL_DRM_FORMAT_RGB888 = 0x34324752,
- WL_DRM_FORMAT_BGR888 = 0x34324742,
- WL_DRM_FORMAT_XRGB8888 = 0x34325258,
- WL_DRM_FORMAT_XBGR8888 = 0x34324258,
- WL_DRM_FORMAT_RGBX8888 = 0x34325852,
- WL_DRM_FORMAT_BGRX8888 = 0x34325842,
- WL_DRM_FORMAT_ARGB8888 = 0x34325241,
- WL_DRM_FORMAT_ABGR8888 = 0x34324241,
- WL_DRM_FORMAT_RGBA8888 = 0x34324152,
- WL_DRM_FORMAT_BGRA8888 = 0x34324142,
- WL_DRM_FORMAT_XRGB2101010 = 0x30335258,
- WL_DRM_FORMAT_XBGR2101010 = 0x30334258,
- WL_DRM_FORMAT_RGBX1010102 = 0x30335852,
- WL_DRM_FORMAT_BGRX1010102 = 0x30335842,
- WL_DRM_FORMAT_ARGB2101010 = 0x30335241,
- WL_DRM_FORMAT_ABGR2101010 = 0x30334241,
- WL_DRM_FORMAT_RGBA1010102 = 0x30334152,
- WL_DRM_FORMAT_BGRA1010102 = 0x30334142,
- WL_DRM_FORMAT_YUYV = 0x56595559,
- WL_DRM_FORMAT_YVYU = 0x55595659,
- WL_DRM_FORMAT_UYVY = 0x59565955,
- WL_DRM_FORMAT_VYUY = 0x59555956,
- WL_DRM_FORMAT_AYUV = 0x56555941,
- WL_DRM_FORMAT_NV12 = 0x3231564e,
- WL_DRM_FORMAT_NV21 = 0x3132564e,
- WL_DRM_FORMAT_NV16 = 0x3631564e,
- WL_DRM_FORMAT_NV61 = 0x3136564e,
- WL_DRM_FORMAT_YUV410 = 0x39565559,
- WL_DRM_FORMAT_YVU410 = 0x39555659,
- WL_DRM_FORMAT_YUV411 = 0x31315559,
- WL_DRM_FORMAT_YVU411 = 0x31315659,
- WL_DRM_FORMAT_YUV420 = 0x32315559,
- WL_DRM_FORMAT_YVU420 = 0x32315659,
- WL_DRM_FORMAT_YUV422 = 0x36315559,
- WL_DRM_FORMAT_YVU422 = 0x36315659,
- WL_DRM_FORMAT_YUV444 = 0x34325559,
- WL_DRM_FORMAT_YVU444 = 0x34325659,
-};
-#endif /* WL_DRM_FORMAT_ENUM */
-
+struct wl_display;
struct wl_drm;
+struct wl_resource;
struct wl_drm_buffer {
struct wl_resource *resource;
@@ -96,16 +34,10 @@
struct wl_drm *
wayland_drm_init(struct wl_display *display, char *device_name,
- struct wayland_drm_callbacks *callbacks, void *user_data,
+ const struct wayland_drm_callbacks *callbacks, void *user_data,
uint32_t flags);
void
wayland_drm_uninit(struct wl_drm *drm);
-uint32_t
-wayland_drm_buffer_get_format(struct wl_drm_buffer *buffer);
-
-void *
-wayland_drm_buffer_get_buffer(struct wl_drm_buffer *buffer);
-
#endif
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-drm/wayland-drm-protocol.c mesa-17.3.3/src/egl/wayland/wayland-drm/wayland-drm-protocol.c
--- mesa-17.2.4/src/egl/wayland/wayland-drm/wayland-drm-protocol.c 2017-10-30 14:50:48.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-drm/wayland-drm-protocol.c 2018-01-18 21:31:05.000000000 +0000
@@ -1,4 +1,4 @@
-/* Generated by wayland-scanner 1.14.0 */
+/* Generated by wayland-scanner 1.14.90 */
/*
* Copyright © 2008-2011 Kristian Høgsberg
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-egl/Makefile.am mesa-17.3.3/src/egl/wayland/wayland-egl/Makefile.am
--- mesa-17.2.4/src/egl/wayland/wayland-egl/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-egl/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -3,10 +3,10 @@
AM_CFLAGS = $(DEFINES) \
$(VISIBILITY_CFLAGS) \
- $(WAYLAND_CFLAGS)
+ $(WAYLAND_CLIENT_CFLAGS)
lib_LTLIBRARIES = libwayland-egl.la
-noinst_HEADERS = wayland-egl-priv.h
+noinst_HEADERS = wayland-egl-backend.h
libwayland_egl_la_SOURCES = wayland-egl.c
libwayland_egl_la_LDFLAGS = \
-no-undefined \
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-egl/Makefile.in mesa-17.3.3/src/egl/wayland/wayland-egl/Makefile.in
--- mesa-17.2.4/src/egl/wayland/wayland-egl/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-egl/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -98,7 +98,8 @@
check_PROGRAMS = wayland-egl-abi-check$(EXEEXT)
subdir = src/egl/wayland/wayland-egl
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -539,9 +540,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -595,6 +596,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -606,12 +609,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -699,10 +705,10 @@
pkgconfig_DATA = wayland-egl.pc
AM_CFLAGS = $(DEFINES) \
$(VISIBILITY_CFLAGS) \
- $(WAYLAND_CFLAGS)
+ $(WAYLAND_CLIENT_CFLAGS)
lib_LTLIBRARIES = libwayland-egl.la
-noinst_HEADERS = wayland-egl-priv.h
+noinst_HEADERS = wayland-egl-backend.h
libwayland_egl_la_SOURCES = wayland-egl.c
libwayland_egl_la_LDFLAGS = \
-no-undefined \
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-egl/meson.build mesa-17.3.3/src/egl/wayland/wayland-egl/meson.build
--- mesa-17.2.4/src/egl/wayland/wayland-egl/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-egl/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,43 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+libwayland_egl = shared_library(
+ 'wayland-egl',
+ 'wayland-egl.c',
+ c_args : [c_vis_args],
+ link_args : ld_args_gc_sections,
+ dependencies : dep_wayland_client,
+ version : '1.0.0',
+ install : true,
+)
+
+pkg.generate(
+ name : 'wayland-egl',
+ description : 'Mesa wayland-egl library',
+ libraries : libwayland_egl,
+ version : meson.project_version(),
+ requires : 'wayland-client',
+)
+
+if with_tests
+ test('wayland-egl-symbols-check', find_program('wayland-egl-symbols-check'))
+ test('wayland-egl-abi-check', executable('wayland-egl-abi-check', 'wayland-egl-abi-check.c'))
+endif
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-egl/wayland-egl-abi-check.c mesa-17.3.3/src/egl/wayland/wayland-egl/wayland-egl-abi-check.c
--- mesa-17.2.4/src/egl/wayland/wayland-egl/wayland-egl-abi-check.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-egl/wayland-egl-abi-check.c 2018-01-18 21:30:28.000000000 +0000
@@ -20,10 +20,10 @@
* DEALINGS IN THE SOFTWARE.
*/
-#include // offsetof
-#include // printf
+#include /* offsetof */
+#include /* printf */
-#include "wayland-egl-priv.h" // Current struct wl_egl_window implementation
+#include "wayland-egl-backend.h" /* Current struct wl_egl_window implementation */
/*
* Following are previous implementations of wl_egl_window.
@@ -100,7 +100,7 @@
/* This program checks we keep a backwards-compatible struct wl_egl_window
- * definition whenever it is modified in wayland-egl-priv.h.
+ * definition whenever it is modified in wayland-egl-backend.h.
*
* The previous definition should be added above as a new struct
* wl_egl_window_vN, and the appropriate checks should be added below
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-egl/wayland-egl-backend.h mesa-17.3.3/src/egl/wayland/wayland-egl/wayland-egl-backend.h
--- mesa-17.2.4/src/egl/wayland/wayland-egl/wayland-egl-backend.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-egl/wayland-egl-backend.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2011 Benjamin Franzke
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Benjamin Franzke
+ */
+
+#ifndef _WAYLAND_EGL_PRIV_H
+#define _WAYLAND_EGL_PRIV_H
+
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WL_EGL_WINDOW_VERSION 3
+
+struct wl_surface;
+
+struct wl_egl_window {
+ const intptr_t version;
+
+ int width;
+ int height;
+ int dx;
+ int dy;
+
+ int attached_width;
+ int attached_height;
+
+ void *private;
+ void (*resize_callback)(struct wl_egl_window *, void *);
+ void (*destroy_window_callback)(void *);
+
+ struct wl_surface *surface;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-egl/wayland-egl.c mesa-17.3.3/src/egl/wayland/wayland-egl/wayland-egl.c
--- mesa-17.2.4/src/egl/wayland/wayland-egl/wayland-egl.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-egl/wayland-egl.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,9 +30,15 @@
#include
#include
-#include
#include "wayland-egl.h"
-#include "wayland-egl-priv.h"
+#include "wayland-egl-backend.h"
+
+/* GCC visibility */
+#if defined(__GNUC__)
+#define WL_EGL_EXPORT __attribute__ ((visibility("default")))
+#else
+#define WL_EGL_EXPORT
+#endif
WL_EGL_EXPORT void
wl_egl_window_resize(struct wl_egl_window *egl_window,
@@ -55,26 +61,32 @@
wl_egl_window_create(struct wl_surface *surface,
int width, int height)
{
- struct wl_egl_window _INIT_ = { .version = WL_EGL_WINDOW_VERSION };
struct wl_egl_window *egl_window;
if (width <= 0 || height <= 0)
return NULL;
- egl_window = malloc(sizeof *egl_window);
+ egl_window = calloc(1, sizeof *egl_window);
if (!egl_window)
return NULL;
- memcpy(egl_window, &_INIT_, sizeof *egl_window);
+ /* Cast away the constness to set the version number.
+ *
+ * We want the const notation since it gives an explicit
+ * feedback to the backend implementation, should it try to
+ * change it.
+ *
+ * The latter in itself is not too surprising as these days APIs
+ * tend to provide bidirectional version field.
+ */
+ intptr_t *version = (intptr_t *)&egl_window->version;
+ *version = WL_EGL_WINDOW_VERSION;
egl_window->surface = surface;
- egl_window->private = NULL;
- egl_window->resize_callback = NULL;
- egl_window->destroy_window_callback = NULL;
- wl_egl_window_resize(egl_window, width, height, 0, 0);
- egl_window->attached_width = 0;
- egl_window->attached_height = 0;
-
+
+ egl_window->width = width;
+ egl_window->height = height;
+
return egl_window;
}
diff -Nru mesa-17.2.4/src/egl/wayland/wayland-egl/wayland-egl-priv.h mesa-17.3.3/src/egl/wayland/wayland-egl/wayland-egl-priv.h
--- mesa-17.2.4/src/egl/wayland/wayland-egl/wayland-egl-priv.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/egl/wayland/wayland-egl/wayland-egl-priv.h 1970-01-01 00:00:00.000000000 +0000
@@ -1,68 +0,0 @@
-/*
- * Copyright © 2011 Benjamin Franzke
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Benjamin Franzke
- */
-
-#ifndef _WAYLAND_EGL_PRIV_H
-#define _WAYLAND_EGL_PRIV_H
-
-/* GCC visibility */
-#if defined(__GNUC__)
-#define WL_EGL_EXPORT __attribute__ ((visibility("default")))
-#else
-#define WL_EGL_EXPORT
-#endif
-
-#include
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define WL_EGL_WINDOW_VERSION 3
-
-struct wl_egl_window {
- const intptr_t version;
-
- int width;
- int height;
- int dx;
- int dy;
-
- int attached_width;
- int attached_height;
-
- void *private;
- void (*resize_callback)(struct wl_egl_window *, void *);
- void (*destroy_window_callback)(void *);
-
- struct wl_surface *surface;
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff -Nru mesa-17.2.4/src/gallium/Automake.inc mesa-17.3.3/src/gallium/Automake.inc
--- mesa-17.2.4/src/gallium/Automake.inc 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/Automake.inc 2018-01-18 21:30:28.000000000 +0000
@@ -39,6 +39,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/auxiliary/draw/draw_llvm.c mesa-17.3.3/src/gallium/auxiliary/draw/draw_llvm.c
--- mesa-17.2.4/src/gallium/auxiliary/draw/draw_llvm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/draw/draw_llvm.c 2018-01-18 21:30:28.000000000 +0000
@@ -2156,6 +2156,11 @@
{
struct draw_llvm *llvm = variant->llvm;
+ if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
+ debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
+ variant->shader->variants_cached, llvm->nr_variants);
+ }
+
gallivm_destroy(variant->gallivm);
remove_from_list(&variant->list_item_local);
@@ -2418,6 +2423,11 @@
{
struct draw_llvm *llvm = variant->llvm;
+ if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
+ debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
+ variant->shader->variants_cached, llvm->nr_gs_variants);
+ }
+
gallivm_destroy(variant->gallivm);
remove_from_list(&variant->list_item_local);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/draw/draw_private.h mesa-17.3.3/src/gallium/auxiliary/draw/draw_private.h
--- mesa-17.2.4/src/gallium/auxiliary/draw/draw_private.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/draw/draw_private.h 2018-01-18 21:30:28.000000000 +0000
@@ -103,7 +103,7 @@
/* maximum number of shader variants we can cache */
-#define DRAW_MAX_SHADER_VARIANTS 128
+#define DRAW_MAX_SHADER_VARIANTS 512
/**
* Private context for the drawing module.
diff -Nru mesa-17.2.4/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c mesa-17.3.3/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
--- mesa-17.2.4/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 2018-01-18 21:30:28.000000000 +0000
@@ -37,6 +37,7 @@
#include "draw/draw_vs.h"
#include "draw/draw_llvm.h"
#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_debug.h"
struct llvm_middle_end {
@@ -71,6 +72,7 @@
llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
{
struct draw_context *draw = fpme->draw;
+ struct draw_llvm *llvm = fpme->llvm;
struct draw_geometry_shader *gs = draw->gs.geometry_shader;
struct draw_gs_llvm_variant_key *key;
struct draw_gs_llvm_variant *variant = NULL;
@@ -79,7 +81,7 @@
char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
unsigned i;
- key = draw_gs_llvm_make_variant_key(fpme->llvm, store);
+ key = draw_gs_llvm_make_variant_key(llvm, store);
/* Search shader's list of variants for the key */
li = first_elem(&shader->variants);
@@ -93,38 +95,42 @@
if (variant) {
/* found the variant, move to head of global list (for LRU) */
- move_to_head(&fpme->llvm->gs_variants_list,
- &variant->list_item_global);
+ move_to_head(&llvm->gs_variants_list, &variant->list_item_global);
}
else {
/* Need to create new variant */
/* First check if we've created too many variants. If so, free
- * 25% of the LRU to avoid using too much memory.
+ * 3.125% of the LRU to avoid using too much memory.
*/
- if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ if (llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("Evicting GS: %u gs variants,\t%u total variants\n",
+ shader->variants_cached, llvm->nr_gs_variants);
+ }
+
/*
* XXX: should we flush here ?
*/
- for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
+ for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 32; i++) {
struct draw_gs_llvm_variant_list_item *item;
- if (is_empty_list(&fpme->llvm->gs_variants_list)) {
+ if (is_empty_list(&llvm->gs_variants_list)) {
break;
}
- item = last_elem(&fpme->llvm->gs_variants_list);
+ item = last_elem(&llvm->gs_variants_list);
assert(item);
assert(item->base);
draw_gs_llvm_destroy_variant(item->base);
}
}
- variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key);
+ variant = draw_gs_llvm_create_variant(llvm, gs->info.num_outputs, key);
if (variant) {
insert_at_head(&shader->variants, &variant->list_item_local);
- insert_at_head(&fpme->llvm->gs_variants_list,
+ insert_at_head(&llvm->gs_variants_list,
&variant->list_item_global);
- fpme->llvm->nr_gs_variants++;
+ llvm->nr_gs_variants++;
shader->variants_cached++;
}
}
@@ -145,6 +151,7 @@
{
struct llvm_middle_end *fpme = llvm_middle_end(middle);
struct draw_context *draw = fpme->draw;
+ struct draw_llvm *llvm = fpme->llvm;
struct draw_vertex_shader *vs = draw->vs.vertex_shader;
struct draw_geometry_shader *gs = draw->gs.geometry_shader;
const unsigned out_prim = gs ? gs->output_primitive :
@@ -203,7 +210,7 @@
char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
unsigned i;
- key = draw_llvm_make_variant_key(fpme->llvm, store);
+ key = draw_llvm_make_variant_key(llvm, store);
/* Search shader's list of variants for the key */
li = first_elem(&shader->variants);
@@ -217,38 +224,42 @@
if (variant) {
/* found the variant, move to head of global list (for LRU) */
- move_to_head(&fpme->llvm->vs_variants_list,
- &variant->list_item_global);
+ move_to_head(&llvm->vs_variants_list, &variant->list_item_global);
}
else {
/* Need to create new variant */
/* First check if we've created too many variants. If so, free
- * 25% of the LRU to avoid using too much memory.
+ * 3.125% of the LRU to avoid using too much memory.
*/
- if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ if (llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("Evicting VS: %u vs variants,\t%u total variants\n",
+ shader->variants_cached, llvm->nr_variants);
+ }
+
/*
* XXX: should we flush here ?
*/
- for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
+ for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 32; i++) {
struct draw_llvm_variant_list_item *item;
- if (is_empty_list(&fpme->llvm->vs_variants_list)) {
+ if (is_empty_list(&llvm->vs_variants_list)) {
break;
}
- item = last_elem(&fpme->llvm->vs_variants_list);
+ item = last_elem(&llvm->vs_variants_list);
assert(item);
assert(item->base);
draw_llvm_destroy_variant(item->base);
}
}
- variant = draw_llvm_create_variant(fpme->llvm, nr, key);
+ variant = draw_llvm_create_variant(llvm, nr, key);
if (variant) {
insert_at_head(&shader->variants, &variant->list_item_local);
- insert_at_head(&fpme->llvm->vs_variants_list,
+ insert_at_head(&llvm->vs_variants_list,
&variant->list_item_global);
- fpme->llvm->nr_variants++;
+ llvm->nr_variants++;
shader->variants_cached++;
}
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_arit.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_arit.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_arit.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_arit.c 2018-01-18 21:30:28.000000000 +0000
@@ -1799,7 +1799,7 @@
}
}
- if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
+ if(type.width*type.length == 128 && util_cpu_caps.has_ssse3 && HAVE_LLVM < 0x0600) {
switch(type.width) {
case 8:
return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
@@ -1809,7 +1809,7 @@
return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
}
}
- else if (type.width*type.length == 256 && util_cpu_caps.has_avx2) {
+ else if (type.width*type.length == 256 && util_cpu_caps.has_avx2 && HAVE_LLVM < 0x0600) {
switch(type.width) {
case 8:
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a);
@@ -1819,14 +1819,9 @@
return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.d", vec_type, a);
}
}
- else if (type.width*type.length == 256 && util_cpu_caps.has_ssse3 &&
- (gallivm_debug & GALLIVM_DEBUG_PERF) &&
- (type.width == 8 || type.width == 16 || type.width == 32)) {
- debug_printf("%s: inefficient code, should split vectors manually\n",
- __FUNCTION__);
- }
- return lp_build_max(bld, a, LLVMBuildNeg(builder, a, ""));
+ return lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero),
+ a, LLVMBuildNeg(builder, a, ""));
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c 2018-01-18 21:30:28.000000000 +0000
@@ -532,7 +532,7 @@
util_format_fits_8unorm(format_desc) &&
type.width == 8 && type.norm == 1 && type.sign == 0 &&
type.fixed == 0 && type.floating == 0) {
- LLVMValueRef packed, res, chans[4], rgba[4];
+ LLVMValueRef packed, res = NULL, chans[4], rgba[4];
LLVMTypeRef dst_vec_type, conv_vec_type;
struct lp_type fetch_type, conv_type;
struct lp_build_context bld_conv;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_limits.h mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_limits.h
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_limits.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_limits.h 2018-01-18 21:30:28.000000000 +0000
@@ -118,6 +118,9 @@
return 1;
case PIPE_SHADER_CAP_INTEGERS:
return 1;
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
+ return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return PIPE_MAX_SAMPLERS;
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
@@ -131,6 +134,7 @@
return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_sample.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_sample.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample.c 2018-01-18 21:30:28.000000000 +0000
@@ -156,19 +156,19 @@
state->wrap_r = sampler->wrap_r;
state->min_img_filter = sampler->min_img_filter;
state->mag_img_filter = sampler->mag_img_filter;
+ state->min_mip_filter = sampler->min_mip_filter;
state->seamless_cube_map = sampler->seamless_cube_map;
if (sampler->max_lod > 0.0f) {
- state->min_mip_filter = sampler->min_mip_filter;
- } else {
- state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ state->max_lod_pos = 1;
+ }
+
+ if (sampler->lod_bias != 0.0f) {
+ state->lod_bias_non_zero = 1;
}
if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
state->min_img_filter != state->mag_img_filter) {
- if (sampler->lod_bias != 0.0f) {
- state->lod_bias_non_zero = 1;
- }
/* If min_lod == max_lod we can greatly simplify mipmap selection.
* This is a case that occurs during automatic mipmap generation.
@@ -234,7 +234,7 @@
unsigned length = coord_bld->type.length;
unsigned num_quads = length / 4;
boolean rho_per_quad = rho_bld->type.length != length;
- boolean no_rho_opt = (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) && (dims > 1);
+ boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
unsigned i;
LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
LLVMValueRef rho_xvec, rho_yvec;
@@ -694,6 +694,7 @@
*/
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
+ boolean is_lodq,
unsigned texture_unit,
unsigned sampler_unit,
LLVMValueRef s,
@@ -704,6 +705,7 @@
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
unsigned mip_filter,
+ LLVMValueRef *out_lod,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart,
LLVMValueRef *out_lod_positive)
@@ -736,7 +738,7 @@
* I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
*/
- if (bld->static_sampler_state->min_max_lod_equal) {
+ if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
/* User is forcing sampling from a particular mipmap level.
* This is hit during mipmap generation.
*/
@@ -756,7 +758,7 @@
}
else {
LLVMValueRef rho;
- boolean rho_squared = ((gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
+ boolean rho_squared = (bld->no_rho_approx &&
(bld->dims > 1)) || cube_rho;
rho = lp_build_rho(bld, texture_unit, s, t, r, cube_rho, derivs);
@@ -765,7 +767,7 @@
* Compute lod = log2(rho)
*/
- if (!lod_bias &&
+ if (!lod_bias && !is_lodq &&
!bld->static_sampler_state->lod_bias_non_zero &&
!bld->static_sampler_state->apply_max_lod &&
!bld->static_sampler_state->apply_min_lod) {
@@ -792,8 +794,7 @@
return;
}
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
- !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR) &&
- !rho_squared) {
+ !bld->no_brilinear && !rho_squared) {
/*
* This can't work if rho is squared. Not sure if it could be
* fixed while keeping it worthwile, could also do sqrt here
@@ -839,6 +840,10 @@
lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
}
+ if (is_lodq) {
+ *out_lod = lod;
+ }
+
/* clamp lod */
if (bld->static_sampler_state->apply_max_lod) {
LLVMValueRef max_lod =
@@ -856,13 +861,18 @@
lod = lp_build_max(lodf_bld, lod, min_lod);
}
+
+ if (is_lodq) {
+ *out_lod_fpart = lod;
+ return;
+ }
}
*out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
lod, lodf_bld->zero);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+ if (!bld->no_brilinear) {
lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
out_lod_ipart, out_lod_fpart);
}
@@ -1679,9 +1689,7 @@
maxasat = lp_build_max(coord_bld, as, at);
ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
- if (need_derivs && (derivs_in ||
- ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
- (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) {
+ if (need_derivs && (derivs_in || (bld->no_quad_lod && bld->no_rho_approx))) {
/*
* XXX: This is really really complex.
* It is a bit overkill to use this for implicit derivatives as well,
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_sample.h mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample.h
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_sample.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample.h 2018-01-18 21:30:28.000000000 +0000
@@ -82,7 +82,8 @@
enum lp_sampler_op_type {
LP_SAMPLER_OP_TEXTURE,
LP_SAMPLER_OP_FETCH,
- LP_SAMPLER_OP_GATHER
+ LP_SAMPLER_OP_GATHER,
+ LP_SAMPLER_OP_LODQ
};
@@ -165,6 +166,7 @@
unsigned normalized_coords:1;
unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */
unsigned lod_bias_non_zero:1;
+ unsigned max_lod_pos:1;
unsigned apply_min_lod:1; /**< min_lod > 0 ? */
unsigned apply_max_lod:1; /**< max_lod < last_level ? */
unsigned seamless_cube_map:1;
@@ -321,6 +323,10 @@
/** number of lod values (valid are 1, length/4, length) */
unsigned num_lods;
+ boolean no_quad_lod;
+ boolean no_brilinear;
+ boolean no_rho_approx;
+
/** regular scalar float type */
struct lp_type float_type;
struct lp_build_context float_bld;
@@ -486,6 +492,7 @@
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
+ boolean is_lodq,
unsigned texture_index,
unsigned sampler_index,
LLVMValueRef s,
@@ -496,6 +503,7 @@
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
unsigned mip_filter,
+ LLVMValueRef *out_lod,
LLVMValueRef *out_lod_ipart,
LLVMValueRef *out_lod_fpart,
LLVMValueRef *out_lod_positive);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 2018-01-18 21:30:28.000000000 +0000
@@ -299,6 +299,7 @@
*/
static void
lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
+ boolean is_gather,
LLVMValueRef coord,
LLVMValueRef length,
LLVMValueRef length_f,
@@ -388,13 +389,29 @@
/* clamp to length max */
coord = lp_build_min_ext(coord_bld, coord, length_f,
GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
- /* subtract 0.5 */
- coord = lp_build_sub(coord_bld, coord, half);
- /* clamp to [0, length - 0.5] */
- coord = lp_build_max(coord_bld, coord, coord_bld->zero);
- /* convert to int, compute lerp weight */
- lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ if (!is_gather) {
+ /* subtract 0.5 */
+ coord = lp_build_sub(coord_bld, coord, half);
+ /* clamp to [0, length - 0.5] */
+ coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ } else {
+ /*
+ * The non-gather path will end up with coords 0, 1 if coord was
+ * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
+ * really matter what the second coord is). But for gather, we
+ * really need to end up with coords 0, 0.
+ */
+ coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+ coord0 = lp_build_sub(coord_bld, coord, half);
+ coord1 = lp_build_add(coord_bld, coord, half);
+ /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
+ coord0 = lp_build_itrunc(coord_bld, coord0);
+ coord1 = lp_build_itrunc(coord_bld, coord1);
+ weight = coord_bld->undef;
+ }
/* coord1 = min(coord1, length-1) */
coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
break;
@@ -424,6 +441,13 @@
coord = lp_build_add(coord_bld, coord, offset);
}
/* compute mirror function */
+ /*
+ * XXX: This looks incorrect wrt gather. Due to wrap specification,
+ * it is possible the first coord ends up larger than the second one.
+ * However, with our simplifications the coordinates will be swapped
+ * in this case. (Albeit some other api tests don't like it even
+ * with this fixed...)
+ */
coord = lp_build_coord_mirror(bld, coord);
/* scale coord to length */
@@ -474,6 +498,20 @@
offset = lp_build_int_to_float(coord_bld, offset);
coord = lp_build_add(coord_bld, coord, offset);
}
+ /*
+ * XXX: This looks incorrect wrt gather. Due to wrap specification,
+ * the first and second texel actually end up with "different order"
+ * for negative coords. For example, if the scaled coord would
+ * be -0.6, then the first coord should end up as 1
+ * (floor(-0.6 - 0.5) == -2, mirror makes that 1), the second as 0
+ * (floor(-0.6 - 0.5) + 1 == -1, mirror makes that 0).
+ * But with our simplifications the second coord will always be the
+ * larger one. The other two mirror_clamp modes have the same problem.
+ * Moreover, for coords close to zero we should end up with both
+ * coords being 0, but we will end up with coord1 being 1 instead
+ * (with bilinear filtering this is ok as the weight is 0.0) (this
+ * problem is specific to mirror_clamp_to_edge).
+ */
coord = lp_build_abs(coord_bld, coord);
/* clamp to length max */
@@ -929,7 +967,7 @@
*/
if (!seamless_cube_filter) {
- lp_build_sample_wrap_linear(bld, coords[0], width_vec,
+ lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
flt_width_vec, offsets[0],
bld->static_texture_state->pot_width,
bld->static_sampler_state->wrap_s,
@@ -940,7 +978,7 @@
x11 = x01;
if (dims >= 2) {
- lp_build_sample_wrap_linear(bld, coords[1], height_vec,
+ lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
flt_height_vec, offsets[1],
bld->static_texture_state->pot_height,
bld->static_sampler_state->wrap_t,
@@ -951,7 +989,7 @@
y11 = y10;
if (dims == 3) {
- lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
+ lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
flt_depth_vec, offsets[2],
bld->static_texture_state->pot_depth,
bld->static_sampler_state->wrap_r,
@@ -1791,6 +1829,7 @@
*/
static void
lp_build_sample_common(struct lp_build_sample_context *bld,
+ boolean is_lodq,
unsigned texture_index,
unsigned sampler_index,
LLVMValueRef *coords,
@@ -1798,6 +1837,7 @@
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef *lod_pos_or_zero,
+ LLVMValueRef *lod,
LLVMValueRef *lod_fpart,
LLVMValueRef *ilevel0,
LLVMValueRef *ilevel1)
@@ -1870,15 +1910,44 @@
* Compute the level of detail (float).
*/
if (min_filter != mag_filter ||
- mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lp_build_lod_selector(bld, texture_index, sampler_index,
+ lp_build_lod_selector(bld, is_lodq, texture_index, sampler_index,
coords[0], coords[1], coords[2], cube_rho,
derivs, lod_bias, explicit_lod,
- mip_filter,
+ mip_filter, lod,
&lod_ipart, lod_fpart, lod_pos_or_zero);
+ if (is_lodq) {
+ LLVMValueRef last_level;
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->gallivm,
+ bld->context_ptr,
+ texture_index);
+ first_level = bld->dynamic_state->first_level(bld->dynamic_state,
+ bld->gallivm,
+ bld->context_ptr,
+ texture_index);
+ last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
+ last_level = lp_build_int_to_float(&bld->float_bld, last_level);
+ last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
+
+ switch (mip_filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ *lod_fpart = bld->lodf_bld.zero;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
+ /* fallthrough */
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
+ bld->lodf_bld.zero, last_level);
+ break;
+ }
+ return;
+ }
+
} else {
lod_ipart = bld->lodi_bld.zero;
*lod_pos_or_zero = bld->lodi_bld.zero;
@@ -2484,7 +2553,7 @@
enum lp_sampler_op_type op_type;
LLVMValueRef lod_bias = NULL;
LLVMValueRef explicit_lod = NULL;
- boolean op_is_tex;
+ boolean op_is_tex, op_is_lodq, op_is_gather;
if (0) {
enum pipe_format fmt = static_texture_state->format;
@@ -2499,6 +2568,8 @@
LP_SAMPLER_OP_TYPE_SHIFT;
op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
+ op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
+ op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
if (lod_control == LP_SAMPLER_LOD_BIAS) {
lod_bias = lod;
@@ -2544,6 +2615,16 @@
bld.format_desc = util_format_description(static_texture_state->format);
bld.dims = dims;
+ if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD || op_is_lodq) {
+ bld.no_quad_lod = TRUE;
+ }
+ if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX || op_is_lodq) {
+ bld.no_rho_approx = TRUE;
+ }
+ if (gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR || op_is_lodq) {
+ bld.no_brilinear = TRUE;
+ }
+
bld.vector_width = lp_type_width(type);
bld.float_type = lp_type_float(32);
@@ -2573,12 +2654,13 @@
bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
}
- if (!static_texture_state->level_zero_only) {
+ if (!static_texture_state->level_zero_only ||
+ !static_sampler_state->max_lod_pos || op_is_lodq) {
derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
} else {
derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
}
- if (op_type == LP_SAMPLER_OP_GATHER) {
+ if (op_is_gather) {
/*
* gather4 is exactly like GL_LINEAR filtering but in the end skipping
* the actual filtering. Using mostly the same paths, so cube face
@@ -2639,11 +2721,11 @@
*/
bld.num_mips = bld.num_lods = 1;
- if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
- (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
- (static_texture_state->target == PIPE_TEXTURE_CUBE ||
- static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
- (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+ if (bld.no_quad_lod && bld.no_rho_approx &&
+ ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
+ (static_texture_state->target == PIPE_TEXTURE_CUBE ||
+ static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
+ op_is_lodq)) {
/*
* special case for using per-pixel lod even for implicit lod,
* which is generally never required (ok by APIs) except to please
@@ -2651,6 +2733,8 @@
* can cause derivatives to be different for pixels outside the primitive
* due to the major axis division even if pre-project derivatives are
* looking normal).
+ * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
+ * cube maps we do indeed get per-pixel lod values).
*/
bld.num_mips = type.length;
bld.num_lods = type.length;
@@ -2764,6 +2848,32 @@
newcoords[i] = coords[i];
}
+ if (util_format_is_pure_integer(static_texture_state->format) &&
+ !util_format_has_depth(bld.format_desc) && op_is_tex &&
+ (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
+ static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
+ static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
+ /*
+ * Bail if impossible filtering is specified (the awkard additional
+ * depth check is because it is legal in gallium to have things like S8Z24
+ * here which would say it's pure int despite such formats should sample
+ * the depth component).
+ * In GL such filters make the texture incomplete, this makes it robust
+ * against state trackers which set this up regardless (we'd crash in the
+ * lerp later otherwise).
+ * At least in some apis it may be legal to use such filters with lod
+ * queries and/or gather (at least for gather d3d10 says only the wrap
+ * bits are really used hence filter bits are likely simply ignored).
+ * For fetch, we don't get valid samplers either way here.
+ */
+ unsigned chan;
+ LLVMValueRef zero = lp_build_zero(gallivm, type);
+ for (chan = 0; chan < 4; chan++) {
+ texel_out[chan] = zero;
+ }
+ return;
+ }
+
if (0) {
/* For debug: no-op texture sampling */
lp_build_sample_nop(gallivm,
@@ -2780,33 +2890,9 @@
else {
LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
- LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
+ LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
boolean use_aos;
- if (util_format_is_pure_integer(static_texture_state->format) &&
- !util_format_has_depth(bld.format_desc) &&
- (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
- static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
- static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
- /*
- * Bail if impossible filtering is specified (the awkard additional
- * depth check is because it is legal in gallium to have things like S8Z24
- * here which would say it's pure int despite such formats should sample
- * the depth component).
- * In GL such filters make the texture incomplete, this makes it robust
- * against state trackers which set this up regardless (we'd crash in the
- * lerp later (except for gather)).
- * Must do this after fetch_texel code since with GL state tracker we'll
- * get some junk sampler for buffer textures.
- */
- unsigned chan;
- LLVMValueRef zero = lp_build_zero(gallivm, type);
- for (chan = 0; chan < 4; chan++) {
- texel_out[chan] = zero;
- }
- return;
- }
-
use_aos = util_format_fits_8unorm(bld.format_desc) &&
op_is_tex &&
/* not sure this is strictly needed or simply impossible */
@@ -2847,12 +2933,19 @@
derived_sampler_state.wrap_r);
}
- lp_build_sample_common(&bld, texture_index, sampler_index,
+ lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
newcoords,
derivs, lod_bias, explicit_lod,
- &lod_positive, &lod_fpart,
+ &lod_positive, &lod, &lod_fpart,
&ilevel0, &ilevel1);
+ if (op_is_lodq) {
+ texel_out[0] = lod_fpart;
+ texel_out[1] = lod;
+ texel_out[2] = texel_out[3] = bld.coord_bld.zero;
+ return;
+ }
+
if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
/* The aos path doesn't do seamless filtering so simply add cube layer
* to face now.
@@ -2899,6 +2992,9 @@
/* Setup our build context */
memset(&bld4, 0, sizeof bld4);
+ bld4.no_quad_lod = bld.no_quad_lod;
+ bld4.no_rho_approx = bld.no_rho_approx;
+ bld4.no_brilinear = bld.no_brilinear;
bld4.gallivm = bld.gallivm;
bld4.context_ptr = bld.context_ptr;
bld4.static_texture_state = bld.static_texture_state;
@@ -2926,8 +3022,7 @@
bld4.texel_type.length = 4;
bld4.num_mips = bld4.num_lods = 1;
- if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
- (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
+ if (bld4.no_quad_lod && bld4.no_rho_approx &&
(static_texture_state->target == PIPE_TEXTURE_CUBE ||
static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
(op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 2018-01-18 21:30:28.000000000 +0000
@@ -159,34 +159,6 @@
dp2_emit /* emit */
};
-/* TGSI_OPCODE_DP2A */
-static void
-dp2a_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- dp_fetch_args(bld_base, emit_data, 2);
- emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
- 2, TGSI_CHAN_X);
-}
-
-static void
-dp2a_emit(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- LLVMValueRef tmp;
- tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
- emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
- emit_data->args[5], tmp);
-}
-
-static struct lp_build_tgsi_action dp2a_action = {
- dp2a_fetch_args, /* fetch_args */
- dp2a_emit /* emit */
-};
-
/* TGSI_OPCODE_DP3 */
static void
dp3_fetch_args(
@@ -262,22 +234,6 @@
dp4_emit /* emit */
};
-/* TGSI_OPCODE_DPH */
-static void
-dph_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- dp_fetch_args(bld_base, emit_data, 4);
- /* src0.w */
- emit_data->args[3] = bld_base->base.one;
-}
-
-const struct lp_build_tgsi_action dph_action = {
- dph_fetch_args, /* fetch_args */
- dp4_emit /* emit */
-};
-
/* TGSI_OPCODE_DST */
static void
dst_fetch_args(
@@ -730,31 +686,6 @@
sqrt_emit /* emit */
};
-/* TGSI_OPCODE_SCS */
-static void
-scs_emit(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- /* dst.x */
- emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
- TGSI_OPCODE_COS, emit_data->args[0]);
- /* dst.y */
- emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
- TGSI_OPCODE_SIN, emit_data->args[0]);
- /* dst.z */
- emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
-
- /* dst.w */
- emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
-}
-
-const struct lp_build_tgsi_action scs_action = {
- scalar_unary_fetch_args, /* fetch_args */
- scs_emit /* emit */
-};
-
/* TGSI_OPCODE_F2U */
static void
f2u_emit(
@@ -902,61 +833,6 @@
emit_data->args[1], emit_data->args[0], "");
}
-/* TGSI_OPCODE_XPD */
-
-static void
-xpd_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- dp_fetch_args(bld_base, emit_data, 3);
-}
-
-/**
- * (a * b) - (c * d)
- */
-static LLVMValueRef
-xpd_helper(
- struct lp_build_tgsi_context * bld_base,
- LLVMValueRef a,
- LLVMValueRef b,
- LLVMValueRef c,
- LLVMValueRef d)
-{
- LLVMValueRef tmp0, tmp1;
-
- tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
- tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
-
- return lp_build_sub(&bld_base->base, tmp0, tmp1);
-}
-
-static void
-xpd_emit(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
- emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
- emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
-
- emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
- emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
- emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
-
- emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
- emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
- emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
-
- emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
-}
-
-const struct lp_build_tgsi_action xpd_action = {
- xpd_fetch_args, /* fetch_args */
- xpd_emit /* emit */
-};
-
/* TGSI_OPCODE_D2F */
static void
d2f_emit(
@@ -1286,8 +1162,6 @@
bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
- bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
- bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
@@ -1296,11 +1170,8 @@
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
- bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
- bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
- bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h 2018-01-18 21:30:28.000000000 +0000
@@ -82,6 +82,11 @@
LLVMValueRef output[4];
/**
+ * Secondary output for instruction that have a second destination register.
+ */
+ LLVMValueRef output1[4];
+
+ /**
* The current instruction that is being 'executed'.
*/
const struct tgsi_full_instruction * inst;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 2018-01-18 21:30:28.000000000 +0000
@@ -554,9 +554,6 @@
dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
break;
- case TGSI_OPCODE_DP2A:
- return FALSE;
-
case TGSI_OPCODE_FRC:
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
tmp0 = lp_build_floor(&bld->bld_base.base, src0);
@@ -593,12 +590,6 @@
dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
break;
- case TGSI_OPCODE_XPD:
- return FALSE;
-
- case TGSI_OPCODE_DPH:
- return FALSE;
-
case TGSI_OPCODE_COS:
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
@@ -728,9 +719,6 @@
dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
break;
- case TGSI_OPCODE_SCS:
- return FALSE;
-
case TGSI_OPCODE_TXB:
dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
break;
@@ -776,18 +764,6 @@
case TGSI_OPCODE_ENDSUB:
return FALSE;
- case TGSI_OPCODE_PUSHA:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
- case TGSI_OPCODE_POPA:
- /* deprecated? */
- assert(0);
- return FALSE;
- break;
-
case TGSI_OPCODE_CEIL:
src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
dst0 = lp_build_ceil(&bld->bld_base.base, src0);
@@ -838,11 +814,6 @@
return FALSE;
break;
- case TGSI_OPCODE_SAD:
- assert(0);
- return FALSE;
- break;
-
case TGSI_OPCODE_TXF:
assert(0);
return FALSE;
@@ -985,7 +956,7 @@
tgsi_get_opcode_info(instr->Instruction.Opcode);
if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
_debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- opcode_info->mnemonic);
+ tgsi_get_opcode_name(instr->Instruction.Opcode));
}
if (0) {
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 2018-01-18 21:30:28.000000000 +0000
@@ -201,8 +201,8 @@
static int get_src_chan_idx(unsigned opcode,
int dst_chan_index)
{
- enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(opcode);
- enum tgsi_opcode_type stype = tgsi_opcode_infer_src_type(opcode);
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(opcode, 0);
+ enum tgsi_opcode_type stype = tgsi_opcode_infer_src_type(opcode, 0);
if (!tgsi_type_is_64bit(dtype) && !tgsi_type_is_64bit(stype))
return dst_chan_index;
@@ -251,9 +251,6 @@
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:
- case TGSI_OPCODE_PUSHA:
- case TGSI_OPCODE_POPA:
- case TGSI_OPCODE_SAD:
/* deprecated? */
assert(0);
return FALSE;
@@ -267,11 +264,17 @@
memset(&emit_data, 0, sizeof(emit_data));
- assert(info->num_dst <= 1);
+ assert(info->num_dst <= 2);
if (info->num_dst) {
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
emit_data.output[chan_index] = bld_base->base.undef;
}
+
+ if (info->num_dst >= 2) {
+ TGSI_FOR_EACH_DST1_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_data.output1[chan_index] = bld_base->base.undef;
+ }
+ }
}
emit_data.inst = inst;
@@ -312,11 +315,21 @@
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
emit_data.output[chan_index] = val;
}
+
+ if (info->num_dst >= 2) {
+ val = emit_data.output1[0];
+ memset(emit_data.output1, 0, sizeof(emit_data.output1));
+ TGSI_FOR_EACH_DST1_ENABLED_CHANNEL(inst, chan_index) {
+ emit_data.output1[chan_index] = val;
+ }
+ }
}
}
if (info->num_dst > 0 && info->opcode != TGSI_OPCODE_STORE) {
- bld_base->emit_store(bld_base, inst, info, emit_data.output);
+ bld_base->emit_store(bld_base, inst, info, 0, emit_data.output);
+ if (info->num_dst >= 2)
+ bld_base->emit_store(bld_base, inst, info, 1, emit_data.output1);
}
return TRUE;
}
@@ -423,7 +436,7 @@
{
const struct tgsi_full_src_register *reg = &inst->Src[src_op];
enum tgsi_opcode_type stype =
- tgsi_opcode_infer_src_type(inst->Instruction.Opcode);
+ tgsi_opcode_infer_src_type(inst->Instruction.Opcode, src_op);
return lp_build_emit_fetch_src(bld_base, reg, stype, chan_index);
}
@@ -530,11 +543,9 @@
while (bld_base->pc != -1) {
const struct tgsi_full_instruction *instr =
bld_base->instructions + bld_base->pc;
- const struct tgsi_opcode_info *opcode_info =
- tgsi_get_opcode_info(instr->Instruction.Opcode);
if (!lp_build_tgsi_inst_llvm(bld_base, instr)) {
_debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- opcode_info->mnemonic);
+ tgsi_get_opcode_name(instr->Instruction.Opcode));
return FALSE;
}
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 2018-01-18 21:30:28.000000000 +0000
@@ -370,6 +370,7 @@
void (*emit_store)(struct lp_build_tgsi_context *,
const struct tgsi_full_instruction *,
const struct tgsi_opcode_info *,
+ unsigned index,
LLVMValueRef dst[4]);
void (*emit_declaration)(struct lp_build_tgsi_context *,
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c 2018-01-18 21:30:28.000000000 +0000
@@ -438,10 +438,8 @@
case TGSI_OPCODE_ENDIF:
case TGSI_OPCODE_BGNLOOP:
case TGSI_OPCODE_BRK:
- case TGSI_OPCODE_BREAKC:
case TGSI_OPCODE_CONT:
case TGSI_OPCODE_ENDLOOP:
- case TGSI_OPCODE_CALLNZ:
case TGSI_OPCODE_CAL:
case TGSI_OPCODE_BGNSUB:
case TGSI_OPCODE_ENDSUB:
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 2018-01-18 21:30:28.000000000 +0000
@@ -402,30 +402,6 @@
lp_exec_mask_update(mask);
}
-static void lp_exec_break_condition(struct lp_exec_mask *mask,
- LLVMValueRef cond)
-{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- struct function_ctx *ctx = func_ctx(mask);
- LLVMValueRef cond_mask = LLVMBuildAnd(builder,
- mask->exec_mask,
- cond, "cond_mask");
- cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
-
- if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
- mask->break_mask = LLVMBuildAnd(builder,
- mask->break_mask,
- cond_mask, "breakc_full");
- }
- else {
- mask->switch_mask = LLVMBuildAnd(builder,
- mask->switch_mask,
- cond_mask, "breakc_switch");
- }
-
- lp_exec_mask_update(mask);
-}
-
static void lp_exec_continue(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
@@ -1296,9 +1272,9 @@
/**
* Fetch 64-bit values from two separate channels.
* 64-bit values are stored split across two channels, like xy and zw.
- * This function creates a set of 16 floats,
+ * This function creates a set of vec_length*2 floats,
* extracts the values from the two channels,
- * puts them in the correct place, then casts to 8 64-bits.
+ * puts them in the correct place, then casts to vec_length 64-bits.
*/
static LLVMValueRef
emit_fetch_64bit(
@@ -1313,9 +1289,9 @@
LLVMValueRef res;
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
int i;
- LLVMValueRef shuffles[16];
+ LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
int len = bld_base->base.type.length * 2;
- assert(len <= 16);
+ assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
@@ -1715,7 +1691,7 @@
}
/**
- * store an array of 8 64-bit into two arrays of 8 floats
+ * store an array of vec-length 64-bit into two arrays of vec_length floats
* i.e.
* value is d0, d1, d2, d3 etc.
* each 64-bit has high and low pieces x, y
@@ -1734,8 +1710,8 @@
struct lp_build_context *float_bld = &bld_base->base;
unsigned i;
LLVMValueRef temp, temp2;
- LLVMValueRef shuffles[8];
- LLVMValueRef shuffles2[8];
+ LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
+ LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
for (i = 0; i < bld_base->base.type.length; i++) {
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
@@ -1775,7 +1751,7 @@
struct lp_build_context *float_bld = &bld_base->base;
struct lp_build_context *int_bld = &bld_base->int_bld;
LLVMValueRef indirect_index = NULL;
- enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
/*
* Apply saturation.
@@ -1937,19 +1913,18 @@
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_instruction * inst,
const struct tgsi_opcode_info * info,
+ unsigned index,
LLVMValueRef dst[4])
{
- unsigned chan_index;
- enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
-
- if(info->num_dst) {
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
- if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
- continue;
- emit_store_chan(bld_base, inst, 0, chan_index, dst[chan_index]);
- }
+ unsigned writemask = inst->Dst[index].Register.WriteMask;
+ while (writemask) {
+ unsigned chan_index = u_bit_scan(&writemask);
+ if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
+ continue;
+ emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
}
}
@@ -2256,6 +2231,7 @@
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier,
boolean compare,
+ enum lp_sampler_op_type sample_type,
LLVMValueRef *texel)
{
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
@@ -2269,7 +2245,7 @@
unsigned num_offsets, num_derivs, i;
unsigned layer_coord = 0;
- unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
+ unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
memset(¶ms, 0, sizeof(params));
@@ -2664,7 +2640,6 @@
opcode == TGSI_OPCODE_SAMPLE_L ||
opcode == TGSI_OPCODE_SVIEWINFO ||
opcode == TGSI_OPCODE_CAL ||
- opcode == TGSI_OPCODE_CALLNZ ||
opcode == TGSI_OPCODE_IF ||
opcode == TGSI_OPCODE_UIF ||
opcode == TGSI_OPCODE_BGNLOOP ||
@@ -3170,6 +3145,18 @@
}
static void
+lodq_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ emit_data->output, 1, LP_SAMPLER_OP_LODQ);
+}
+
+static void
txq_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -3211,7 +3198,7 @@
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
- FALSE, emit_data->output);
+ FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
@@ -3223,7 +3210,7 @@
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
- FALSE, emit_data->output);
+ FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
@@ -3235,7 +3222,7 @@
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
- TRUE, emit_data->output);
+ TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
@@ -3247,7 +3234,7 @@
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
- TRUE, emit_data->output);
+ TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
@@ -3259,7 +3246,7 @@
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
- FALSE, emit_data->output);
+ FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
@@ -3271,7 +3258,19 @@
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
- FALSE, emit_data->output);
+ FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
+}
+
+static void
+gather4_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
}
static void
@@ -3285,6 +3284,18 @@
emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
}
+static void
+lod_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
+}
+
static LLVMValueRef
mask_vec(struct lp_build_tgsi_context *bld_base)
{
@@ -3479,24 +3490,6 @@
}
static void
-breakc_emit(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMValueRef unsigned_cond =
- LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
- LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
- unsigned_cond,
- uint_bld->zero);
-
- lp_exec_break_condition(&bld->exec_mask, cond);
-}
-
-static void
if_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -3877,7 +3870,6 @@
bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
- bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
@@ -3906,6 +3898,7 @@
bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
/* DX10 sampling ops */
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
@@ -3915,7 +3908,10 @@
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
+
if (gs_iface) {
/* There's no specific value for this because it should always
diff -Nru mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_type.h mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_type.h
--- mesa-17.2.4/src/gallium/auxiliary/gallivm/lp_bld_type.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/gallivm/lp_bld_type.h 2018-01-18 21:30:28.000000000 +0000
@@ -59,7 +59,7 @@
* Should only be used when lp_native_vector_width isn't available,
* i.e. sizing/alignment of non-malloced variables.
*/
-#define LP_MAX_VECTOR_WIDTH 256
+#define LP_MAX_VECTOR_WIDTH 512
/**
* Minimum vector alignment for static variable alignment
@@ -67,7 +67,7 @@
* It should always be a constant equal to LP_MAX_VECTOR_WIDTH/8. An
* expression is non-portable.
*/
-#define LP_MIN_VECTOR_ALIGN 32
+#define LP_MIN_VECTOR_ALIGN 64
/**
* Several functions can only cope with vectors of length up to this value.
diff -Nru mesa-17.2.4/src/gallium/auxiliary/hud/hud_context.c mesa-17.3.3/src/gallium/auxiliary/hud/hud_context.c
--- mesa-17.2.4/src/gallium/auxiliary/hud/hud_context.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/hud/hud_context.c 2018-01-18 21:30:28.000000000 +0000
@@ -1618,17 +1618,17 @@
/* [0] = color,
* [1] = (2/fb_width, 2/fb_height, xoffset, yoffset)
* [2] = (xscale, yscale, 0, 0) */
- "DCL CONST[0..2]\n"
+ "DCL CONST[0][0..2]\n"
"DCL TEMP[0]\n"
"IMM[0] FLT32 { -1, 0, 0, 1 }\n"
/* v = in * (xscale, yscale) + (xoffset, yoffset) */
- "MAD TEMP[0].xy, IN[0], CONST[2].xyyy, CONST[1].zwww\n"
+ "MAD TEMP[0].xy, IN[0], CONST[0][2].xyyy, CONST[0][1].zwww\n"
/* pos = v * (2 / fb_width, 2 / fb_height) - (1, 1) */
- "MAD OUT[0].xy, TEMP[0], CONST[1].xyyy, IMM[0].xxxx\n"
+ "MAD OUT[0].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n"
"MOV OUT[0].zw, IMM[0]\n"
- "MOV OUT[1], CONST[0]\n"
+ "MOV OUT[1], CONST[0][0]\n"
"MOV OUT[2], IN[1]\n"
"END\n"
};
diff -Nru mesa-17.2.4/src/gallium/auxiliary/Makefile.in mesa-17.3.3/src/gallium/auxiliary/Makefile.in
--- mesa-17.2.4/src/gallium/auxiliary/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -122,7 +122,8 @@
@HAVE_PLATFORM_DRM_TRUE@@NEED_GALLIUM_VL_TRUE@@NEED_GALLIUM_VL_WINSYS_TRUE@am__append_13 = $(VL_WINSYS_DRM_SOURCES)
subdir = src/gallium/auxiliary
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -203,8 +204,8 @@
tgsi/tgsi_dump.c tgsi/tgsi_dump.h tgsi/tgsi_exec.c \
tgsi/tgsi_exec.h tgsi/tgsi_emulate.c tgsi/tgsi_emulate.h \
tgsi/tgsi_from_mesa.c tgsi/tgsi_from_mesa.h tgsi/tgsi_info.c \
- tgsi/tgsi_info.h tgsi/tgsi_iterate.c tgsi/tgsi_iterate.h \
- tgsi/tgsi_lowering.c tgsi/tgsi_lowering.h \
+ tgsi/tgsi_info.h tgsi/tgsi_info_opcodes.h tgsi/tgsi_iterate.c \
+ tgsi/tgsi_iterate.h tgsi/tgsi_lowering.c tgsi/tgsi_lowering.h \
tgsi/tgsi_opcode_tmp.h tgsi/tgsi_parse.c tgsi/tgsi_parse.h \
tgsi/tgsi_point_sprite.c tgsi/tgsi_point_sprite.h \
tgsi/tgsi_sanity.c tgsi/tgsi_sanity.h tgsi/tgsi_scan.c \
@@ -240,22 +241,23 @@
util/u_framebuffer.h util/u_gen_mipmap.c util/u_gen_mipmap.h \
util/u_half.h util/u_handle_table.c util/u_handle_table.h \
util/u_hash_table.c util/u_hash_table.h util/u_helpers.c \
- util/u_helpers.h util/u_index_modify.c util/u_index_modify.h \
- util/u_inlines.h util/u_keymap.c util/u_keymap.h \
- util/u_linear.c util/u_linear.h util/u_math.c util/u_math.h \
- util/u_memory.h util/u_mm.c util/u_mm.h util/u_network.c \
- util/u_network.h util/u_pack_color.h util/u_pointer.h \
- util/u_prim.h util/u_prim_restart.c util/u_prim_restart.h \
- util/u_pstipple.c util/u_pstipple.h util/u_pwr8.h \
- util/u_range.h util/u_rect.h util/u_resource.c \
- util/u_resource.h util/u_ringbuffer.c util/u_ringbuffer.h \
- util/u_sampler.c util/u_sampler.h util/u_simple_shaders.c \
- util/u_simple_shaders.h util/u_split_prim.h util/u_sse.h \
- util/u_suballoc.c util/u_suballoc.h util/u_surface.c \
- util/u_surface.h util/u_surfaces.c util/u_surfaces.h \
- util/u_tests.c util/u_tests.h util/u_texture.c \
- util/u_texture.h util/u_tile.c util/u_tile.h util/u_time.h \
- util/u_transfer.c util/u_transfer.h util/u_threaded_context.c \
+ util/u_helpers.h util/u_idalloc.c util/u_idalloc.h \
+ util/u_index_modify.c util/u_index_modify.h util/u_inlines.h \
+ util/u_linear.c util/u_linear.h util/u_log.c util/u_log.h \
+ util/u_math.c util/u_math.h util/u_memory.h util/u_mm.c \
+ util/u_mm.h util/u_network.c util/u_network.h \
+ util/u_pack_color.h util/u_pointer.h util/u_prim.h \
+ util/u_prim_restart.c util/u_prim_restart.h util/u_pstipple.c \
+ util/u_pstipple.h util/u_pwr8.h util/u_range.h util/u_rect.h \
+ util/u_resource.c util/u_resource.h util/u_ringbuffer.c \
+ util/u_ringbuffer.h util/u_sampler.c util/u_sampler.h \
+ util/u_simple_shaders.c util/u_simple_shaders.h \
+ util/u_split_prim.h util/u_sse.h util/u_suballoc.c \
+ util/u_suballoc.h util/u_surface.c util/u_surface.h \
+ util/u_surfaces.c util/u_surfaces.h util/u_tests.c \
+ util/u_tests.h util/u_texture.c util/u_texture.h util/u_tile.c \
+ util/u_tile.h util/u_time.h util/u_transfer.c \
+ util/u_transfer.h util/u_threaded_context.c \
util/u_threaded_context.h util/u_threaded_context_calls.h \
util/u_upload_mgr.c util/u_upload_mgr.h util/u_vbuf.c \
util/u_vbuf.h util/u_video.h util/u_viewport.h \
@@ -350,8 +352,8 @@
util/u_format_tests.lo util/u_format_yuv.lo \
util/u_format_zs.lo util/u_framebuffer.lo util/u_gen_mipmap.lo \
util/u_handle_table.lo util/u_hash_table.lo util/u_helpers.lo \
- util/u_index_modify.lo util/u_keymap.lo util/u_linear.lo \
- util/u_math.lo util/u_mm.lo util/u_network.lo \
+ util/u_idalloc.lo util/u_index_modify.lo util/u_linear.lo \
+ util/u_log.lo util/u_math.lo util/u_mm.lo util/u_network.lo \
util/u_prim_restart.lo util/u_pstipple.lo util/u_resource.lo \
util/u_ringbuffer.lo util/u_sampler.lo \
util/u_simple_shaders.lo util/u_suballoc.lo util/u_surface.lo \
@@ -673,9 +675,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -729,6 +731,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -740,12 +744,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -982,6 +989,7 @@
tgsi/tgsi_from_mesa.h \
tgsi/tgsi_info.c \
tgsi/tgsi_info.h \
+ tgsi/tgsi_info_opcodes.h \
tgsi/tgsi_iterate.c \
tgsi/tgsi_iterate.h \
tgsi/tgsi_lowering.c \
@@ -1084,13 +1092,15 @@
util/u_hash_table.h \
util/u_helpers.c \
util/u_helpers.h \
+ util/u_idalloc.c \
+ util/u_idalloc.h \
util/u_index_modify.c \
util/u_index_modify.h \
util/u_inlines.h \
- util/u_keymap.c \
- util/u_keymap.h \
util/u_linear.c \
util/u_linear.h \
+ util/u_log.c \
+ util/u_log.h \
util/u_math.c \
util/u_math.h \
util/u_memory.h \
@@ -1317,6 +1327,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
@@ -1733,10 +1745,11 @@
util/u_hash_table.lo: util/$(am__dirstamp) \
util/$(DEPDIR)/$(am__dirstamp)
util/u_helpers.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp)
+util/u_idalloc.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp)
util/u_index_modify.lo: util/$(am__dirstamp) \
util/$(DEPDIR)/$(am__dirstamp)
-util/u_keymap.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp)
util/u_linear.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp)
+util/u_log.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp)
util/u_math.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp)
util/u_mm.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp)
util/u_network.lo: util/$(am__dirstamp) util/$(DEPDIR)/$(am__dirstamp)
@@ -2138,9 +2151,10 @@
@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_handle_table.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_hash_table.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_helpers.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_idalloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_index_modify.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_keymap.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_linear.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_log.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_math.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_mm.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/u_network.Plo@am__quote@
diff -Nru mesa-17.2.4/src/gallium/auxiliary/Makefile.sources mesa-17.3.3/src/gallium/auxiliary/Makefile.sources
--- mesa-17.2.4/src/gallium/auxiliary/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -151,6 +151,7 @@
tgsi/tgsi_from_mesa.h \
tgsi/tgsi_info.c \
tgsi/tgsi_info.h \
+ tgsi/tgsi_info_opcodes.h \
tgsi/tgsi_iterate.c \
tgsi/tgsi_iterate.h \
tgsi/tgsi_lowering.c \
@@ -253,13 +254,15 @@
util/u_hash_table.h \
util/u_helpers.c \
util/u_helpers.h \
+ util/u_idalloc.c \
+ util/u_idalloc.h \
util/u_index_modify.c \
util/u_index_modify.h \
util/u_inlines.h \
- util/u_keymap.c \
- util/u_keymap.h \
util/u_linear.c \
util/u_linear.h \
+ util/u_log.c \
+ util/u_log.h \
util/u_math.c \
util/u_math.h \
util/u_memory.h \
diff -Nru mesa-17.2.4/src/gallium/auxiliary/meson.build mesa-17.3.3/src/gallium/auxiliary/meson.build
--- mesa-17.2.4/src/gallium/auxiliary/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,530 @@
+# Copyright © 2017 Dylan Baker
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_gallium_aux = include_directories('.')
+
+files_libgallium = files(
+ 'cso_cache/cso_cache.c',
+ 'cso_cache/cso_cache.h',
+ 'cso_cache/cso_context.c',
+ 'cso_cache/cso_context.h',
+ 'cso_cache/cso_hash.c',
+ 'cso_cache/cso_hash.h',
+ 'draw/draw_cliptest_tmp.h',
+ 'draw/draw_context.c',
+ 'draw/draw_context.h',
+ 'draw/draw_decompose_tmp.h',
+ 'draw/draw_fs.c',
+ 'draw/draw_fs.h',
+ 'draw/draw_gs.c',
+ 'draw/draw_gs.h',
+ 'draw/draw_gs_tmp.h',
+ 'draw/draw_pipe_aaline.c',
+ 'draw/draw_pipe_aapoint.c',
+ 'draw/draw_pipe.c',
+ 'draw/draw_pipe.h',
+ 'draw/draw_pipe_clip.c',
+ 'draw/draw_pipe_cull.c',
+ 'draw/draw_pipe_flatshade.c',
+ 'draw/draw_pipe_offset.c',
+ 'draw/draw_pipe_pstipple.c',
+ 'draw/draw_pipe_stipple.c',
+ 'draw/draw_pipe_twoside.c',
+ 'draw/draw_pipe_unfilled.c',
+ 'draw/draw_pipe_util.c',
+ 'draw/draw_pipe_validate.c',
+ 'draw/draw_pipe_vbuf.c',
+ 'draw/draw_pipe_wide_line.c',
+ 'draw/draw_pipe_wide_point.c',
+ 'draw/draw_prim_assembler.c',
+ 'draw/draw_prim_assembler.h',
+ 'draw/draw_prim_assembler_tmp.h',
+ 'draw/draw_private.h',
+ 'draw/draw_pt.c',
+ 'draw/draw_pt_decompose.h',
+ 'draw/draw_pt_emit.c',
+ 'draw/draw_pt_fetch.c',
+ 'draw/draw_pt_fetch_emit.c',
+ 'draw/draw_pt_fetch_shade_emit.c',
+ 'draw/draw_pt_fetch_shade_pipeline.c',
+ 'draw/draw_pt.h',
+ 'draw/draw_pt_post_vs.c',
+ 'draw/draw_pt_so_emit.c',
+ 'draw/draw_pt_util.c',
+ 'draw/draw_pt_vsplit.c',
+ 'draw/draw_pt_vsplit_tmp.h',
+ 'draw/draw_so_emit_tmp.h',
+ 'draw/draw_split_tmp.h',
+ 'draw/draw_vbuf.h',
+ 'draw/draw_vertex.c',
+ 'draw/draw_vertex.h',
+ 'draw/draw_vs.c',
+ 'draw/draw_vs_exec.c',
+ 'draw/draw_vs.h',
+ 'draw/draw_vs_variant.c',
+ 'hud/font.c',
+ 'hud/font.h',
+ 'hud/hud_context.c',
+ 'hud/hud_context.h',
+ 'hud/hud_cpu.c',
+ 'hud/hud_nic.c',
+ 'hud/hud_cpufreq.c',
+ 'hud/hud_diskstat.c',
+ 'hud/hud_sensors_temp.c',
+ 'hud/hud_driver_query.c',
+ 'hud/hud_fps.c',
+ 'hud/hud_private.h',
+ 'indices/u_indices.h',
+ 'indices/u_indices_priv.h',
+ 'indices/u_primconvert.c',
+ 'indices/u_primconvert.h',
+ 'os/os_memory_aligned.h',
+ 'os/os_memory_debug.h',
+ 'os/os_memory_stdc.h',
+ 'os/os_memory.h',
+ 'os/os_misc.c',
+ 'os/os_misc.h',
+ 'os/os_mman.h',
+ 'os/os_process.c',
+ 'os/os_process.h',
+ 'os/os_thread.h',
+ 'os/os_time.c',
+ 'os/os_time.h',
+ 'pipebuffer/pb_buffer_fenced.c',
+ 'pipebuffer/pb_buffer_fenced.h',
+ 'pipebuffer/pb_buffer.h',
+ 'pipebuffer/pb_buffer_malloc.c',
+ 'pipebuffer/pb_bufmgr_alt.c',
+ 'pipebuffer/pb_bufmgr_cache.c',
+ 'pipebuffer/pb_bufmgr_debug.c',
+ 'pipebuffer/pb_bufmgr.h',
+ 'pipebuffer/pb_bufmgr_mm.c',
+ 'pipebuffer/pb_bufmgr_ondemand.c',
+ 'pipebuffer/pb_bufmgr_pool.c',
+ 'pipebuffer/pb_bufmgr_slab.c',
+ 'pipebuffer/pb_cache.c',
+ 'pipebuffer/pb_cache.h',
+ 'pipebuffer/pb_slab.c',
+ 'pipebuffer/pb_slab.h',
+ 'pipebuffer/pb_validate.c',
+ 'pipebuffer/pb_validate.h',
+ 'postprocess/filters.h',
+ 'postprocess/postprocess.h',
+ 'postprocess/pp_celshade.c',
+ 'postprocess/pp_celshade.h',
+ 'postprocess/pp_colors.c',
+ 'postprocess/pp_colors.h',
+ 'postprocess/pp_filters.h',
+ 'postprocess/pp_init.c',
+ 'postprocess/pp_mlaa_areamap.h',
+ 'postprocess/pp_mlaa.c',
+ 'postprocess/pp_mlaa.h',
+ 'postprocess/pp_private.h',
+ 'postprocess/pp_program.c',
+ 'postprocess/pp_run.c',
+ 'rbug/rbug_connection.c',
+ 'rbug/rbug_connection.h',
+ 'rbug/rbug_context.c',
+ 'rbug/rbug_context.h',
+ 'rbug/rbug_core.c',
+ 'rbug/rbug_core.h',
+ 'rbug/rbug_demarshal.c',
+ 'rbug/rbug.h',
+ 'rbug/rbug_internal.h',
+ 'rbug/rbug_proto.h',
+ 'rbug/rbug_shader.c',
+ 'rbug/rbug_shader.h',
+ 'rbug/rbug_texture.c',
+ 'rbug/rbug_texture.h',
+ 'rtasm/rtasm_cpu.c',
+ 'rtasm/rtasm_cpu.h',
+ 'rtasm/rtasm_execmem.c',
+ 'rtasm/rtasm_execmem.h',
+ 'rtasm/rtasm_x86sse.c',
+ 'rtasm/rtasm_x86sse.h',
+ 'tgsi/tgsi_aa_point.c',
+ 'tgsi/tgsi_aa_point.h',
+ 'tgsi/tgsi_build.c',
+ 'tgsi/tgsi_build.h',
+ 'tgsi/tgsi_dump.c',
+ 'tgsi/tgsi_dump.h',
+ 'tgsi/tgsi_exec.c',
+ 'tgsi/tgsi_exec.h',
+ 'tgsi/tgsi_emulate.c',
+ 'tgsi/tgsi_emulate.h',
+ 'tgsi/tgsi_from_mesa.c',
+ 'tgsi/tgsi_from_mesa.h',
+ 'tgsi/tgsi_info.c',
+ 'tgsi/tgsi_info.h',
+ 'tgsi/tgsi_info_opcodes.h',
+ 'tgsi/tgsi_iterate.c',
+ 'tgsi/tgsi_iterate.h',
+ 'tgsi/tgsi_lowering.c',
+ 'tgsi/tgsi_lowering.h',
+ 'tgsi/tgsi_opcode_tmp.h',
+ 'tgsi/tgsi_parse.c',
+ 'tgsi/tgsi_parse.h',
+ 'tgsi/tgsi_point_sprite.c',
+ 'tgsi/tgsi_point_sprite.h',
+ 'tgsi/tgsi_sanity.c',
+ 'tgsi/tgsi_sanity.h',
+ 'tgsi/tgsi_scan.c',
+ 'tgsi/tgsi_scan.h',
+ 'tgsi/tgsi_strings.c',
+ 'tgsi/tgsi_strings.h',
+ 'tgsi/tgsi_text.c',
+ 'tgsi/tgsi_text.h',
+ 'tgsi/tgsi_transform.c',
+ 'tgsi/tgsi_transform.h',
+ 'tgsi/tgsi_two_side.c',
+ 'tgsi/tgsi_two_side.h',
+ 'tgsi/tgsi_ureg.c',
+ 'tgsi/tgsi_ureg.h',
+ 'tgsi/tgsi_util.c',
+ 'tgsi/tgsi_util.h',
+ 'translate/translate.c',
+ 'translate/translate.h',
+ 'translate/translate_cache.c',
+ 'translate/translate_cache.h',
+ 'translate/translate_generic.c',
+ 'translate/translate_sse.c',
+ 'util/dbghelp.h',
+ 'util/u_bitcast.h',
+ 'util/u_bitmask.c',
+ 'util/u_bitmask.h',
+ 'util/u_blend.h',
+ 'util/u_blit.c',
+ 'util/u_blit.h',
+ 'util/u_blitter.c',
+ 'util/u_blitter.h',
+ 'util/u_box.h',
+ 'util/u_cache.c',
+ 'util/u_cache.h',
+ 'util/u_cpu_detect.c',
+ 'util/u_cpu_detect.h',
+ 'util/u_debug.c',
+ 'util/u_debug.h',
+ 'util/u_debug_describe.c',
+ 'util/u_debug_describe.h',
+ 'util/u_debug_flush.c',
+ 'util/u_debug_flush.h',
+ 'util/u_debug_image.c',
+ 'util/u_debug_image.h',
+ 'util/u_debug_memory.c',
+ 'util/u_debug_refcnt.c',
+ 'util/u_debug_refcnt.h',
+ 'util/u_debug_stack.c',
+ 'util/u_debug_stack.h',
+ 'util/u_debug_symbol.c',
+ 'util/u_debug_symbol.h',
+ 'util/u_dirty_flags.h',
+ 'util/u_dirty_surfaces.h',
+ 'util/u_dl.c',
+ 'util/u_dl.h',
+ 'util/u_draw.c',
+ 'util/u_draw.h',
+ 'util/u_draw_quad.c',
+ 'util/u_draw_quad.h',
+ 'util/u_dual_blend.h',
+ 'util/u_dump_defines.c',
+ 'util/u_dump.h',
+ 'util/u_dump_state.c',
+ 'util/u_fifo.h',
+ 'util/u_format.c',
+ 'util/u_format.h',
+ 'util/u_format_etc.c',
+ 'util/u_format_etc.h',
+ 'util/u_format_latc.c',
+ 'util/u_format_latc.h',
+ 'util/u_format_other.c',
+ 'util/u_format_other.h',
+ 'util/u_format_rgtc.c',
+ 'util/u_format_rgtc.h',
+ 'util/u_format_s3tc.c',
+ 'util/u_format_s3tc.h',
+ 'util/u_format_tests.c',
+ 'util/u_format_tests.h',
+ 'util/u_format_yuv.c',
+ 'util/u_format_yuv.h',
+ 'util/u_format_zs.c',
+ 'util/u_format_zs.h',
+ 'util/u_framebuffer.c',
+ 'util/u_framebuffer.h',
+ 'util/u_gen_mipmap.c',
+ 'util/u_gen_mipmap.h',
+ 'util/u_half.h',
+ 'util/u_handle_table.c',
+ 'util/u_handle_table.h',
+ 'util/u_hash_table.c',
+ 'util/u_hash_table.h',
+ 'util/u_helpers.c',
+ 'util/u_helpers.h',
+ 'util/u_idalloc.c',
+ 'util/u_idalloc.h',
+ 'util/u_index_modify.c',
+ 'util/u_index_modify.h',
+ 'util/u_inlines.h',
+ 'util/u_linear.c',
+ 'util/u_linear.h',
+ 'util/u_log.c',
+ 'util/u_log.h',
+ 'util/u_math.c',
+ 'util/u_math.h',
+ 'util/u_memory.h',
+ 'util/u_mm.c',
+ 'util/u_mm.h',
+ 'util/u_network.c',
+ 'util/u_network.h',
+ 'util/u_pack_color.h',
+ 'util/u_pointer.h',
+ 'util/u_prim.h',
+ 'util/u_prim_restart.c',
+ 'util/u_prim_restart.h',
+ 'util/u_pstipple.c',
+ 'util/u_pstipple.h',
+ 'util/u_pwr8.h',
+ 'util/u_range.h',
+ 'util/u_rect.h',
+ 'util/u_resource.c',
+ 'util/u_resource.h',
+ 'util/u_ringbuffer.c',
+ 'util/u_ringbuffer.h',
+ 'util/u_sampler.c',
+ 'util/u_sampler.h',
+ 'util/u_simple_shaders.c',
+ 'util/u_simple_shaders.h',
+ 'util/u_split_prim.h',
+ 'util/u_sse.h',
+ 'util/u_suballoc.c',
+ 'util/u_suballoc.h',
+ 'util/u_surface.c',
+ 'util/u_surface.h',
+ 'util/u_surfaces.c',
+ 'util/u_surfaces.h',
+ 'util/u_tests.c',
+ 'util/u_tests.h',
+ 'util/u_texture.c',
+ 'util/u_texture.h',
+ 'util/u_tile.c',
+ 'util/u_tile.h',
+ 'util/u_time.h',
+ 'util/u_transfer.c',
+ 'util/u_transfer.h',
+ 'util/u_threaded_context.c',
+ 'util/u_threaded_context.h',
+ 'util/u_threaded_context_calls.h',
+ 'util/u_upload_mgr.c',
+ 'util/u_upload_mgr.h',
+ 'util/u_vbuf.c',
+ 'util/u_vbuf.h',
+ 'util/u_video.h',
+ 'util/u_viewport.h',
+ 'nir/tgsi_to_nir.c',
+ 'nir/tgsi_to_nir.h',
+)
+
+if dep_libdrm != [] and dep_libdrm.found()
+ files_libgallium += files(
+ 'renderonly/renderonly.c',
+ 'renderonly/renderonly.h',
+ )
+endif
+
+if with_llvm
+ files_libgallium += files(
+ 'gallivm/lp_bld_arit.c',
+ 'gallivm/lp_bld_arit.h',
+ 'gallivm/lp_bld_arit_overflow.c',
+ 'gallivm/lp_bld_arit_overflow.h',
+ 'gallivm/lp_bld_assert.c',
+ 'gallivm/lp_bld_assert.h',
+ 'gallivm/lp_bld_bitarit.c',
+ 'gallivm/lp_bld_bitarit.h',
+ 'gallivm/lp_bld_const.c',
+ 'gallivm/lp_bld_const.h',
+ 'gallivm/lp_bld_conv.c',
+ 'gallivm/lp_bld_conv.h',
+ 'gallivm/lp_bld_debug.cpp',
+ 'gallivm/lp_bld_debug.h',
+ 'gallivm/lp_bld_flow.c',
+ 'gallivm/lp_bld_flow.h',
+ 'gallivm/lp_bld_format_aos_array.c',
+ 'gallivm/lp_bld_format_aos.c',
+ 'gallivm/lp_bld_format_cached.c',
+ 'gallivm/lp_bld_format_float.c',
+ 'gallivm/lp_bld_format.c',
+ 'gallivm/lp_bld_format.h',
+ 'gallivm/lp_bld_format_soa.c',
+ 'gallivm/lp_bld_format_srgb.c',
+ 'gallivm/lp_bld_format_yuv.c',
+ 'gallivm/lp_bld_gather.c',
+ 'gallivm/lp_bld_gather.h',
+ 'gallivm/lp_bld.h',
+ 'gallivm/lp_bld_init.c',
+ 'gallivm/lp_bld_init.h',
+ 'gallivm/lp_bld_intr.c',
+ 'gallivm/lp_bld_intr.h',
+ 'gallivm/lp_bld_limits.h',
+ 'gallivm/lp_bld_logic.c',
+ 'gallivm/lp_bld_logic.h',
+ 'gallivm/lp_bld_misc.cpp',
+ 'gallivm/lp_bld_misc.h',
+ 'gallivm/lp_bld_pack.c',
+ 'gallivm/lp_bld_pack.h',
+ 'gallivm/lp_bld_printf.c',
+ 'gallivm/lp_bld_printf.h',
+ 'gallivm/lp_bld_quad.c',
+ 'gallivm/lp_bld_quad.h',
+ 'gallivm/lp_bld_sample_aos.c',
+ 'gallivm/lp_bld_sample_aos.h',
+ 'gallivm/lp_bld_sample.c',
+ 'gallivm/lp_bld_sample.h',
+ 'gallivm/lp_bld_sample_soa.c',
+ 'gallivm/lp_bld_struct.c',
+ 'gallivm/lp_bld_struct.h',
+ 'gallivm/lp_bld_swizzle.c',
+ 'gallivm/lp_bld_swizzle.h',
+ 'gallivm/lp_bld_tgsi_action.c',
+ 'gallivm/lp_bld_tgsi_action.h',
+ 'gallivm/lp_bld_tgsi_aos.c',
+ 'gallivm/lp_bld_tgsi.c',
+ 'gallivm/lp_bld_tgsi.h',
+ 'gallivm/lp_bld_tgsi_info.c',
+ 'gallivm/lp_bld_tgsi_soa.c',
+ 'gallivm/lp_bld_type.c',
+ 'gallivm/lp_bld_type.h',
+ 'draw/draw_llvm.c',
+ 'draw/draw_llvm.h',
+ 'draw/draw_llvm_sample.c',
+ 'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+ 'draw/draw_vs_llvm.c',
+ )
+endif
+
+files_libgalliumvl = files(
+ 'vl/vl_bicubic_filter.c',
+ 'vl/vl_bicubic_filter.h',
+ 'vl/vl_compositor.c',
+ 'vl/vl_compositor.h',
+ 'vl/vl_csc.c',
+ 'vl/vl_csc.h',
+ 'vl/vl_decoder.c',
+ 'vl/vl_decoder.h',
+ 'vl/vl_defines.h',
+ 'vl/vl_deint_filter.c',
+ 'vl/vl_deint_filter.h',
+ 'vl/vl_idct.c',
+ 'vl/vl_idct.h',
+ 'vl/vl_matrix_filter.c',
+ 'vl/vl_matrix_filter.h',
+ 'vl/vl_mc.c',
+ 'vl/vl_mc.h',
+ 'vl/vl_median_filter.c',
+ 'vl/vl_median_filter.h',
+ 'vl/vl_mpeg12_bitstream.c',
+ 'vl/vl_mpeg12_bitstream.h',
+ 'vl/vl_mpeg12_decoder.c',
+ 'vl/vl_mpeg12_decoder.h',
+ 'vl/vl_rbsp.h',
+ 'vl/vl_types.h',
+ 'vl/vl_vertex_buffers.c',
+ 'vl/vl_vertex_buffers.h',
+ 'vl/vl_video_buffer.c',
+ 'vl/vl_video_buffer.h',
+ 'vl/vl_vlc.h',
+ 'vl/vl_zscan.c',
+ 'vl/vl_zscan.h',
+)
+
+files_libgalliumvlwinsys = files('vl/vl_winsys.h')
+if with_dri2
+ files_libgalliumvlwinsys += files('vl/vl_winsys_dri.c')
+ if with_dri3
+ files_libgalliumvlwinsys += files('vl/vl_winsys_dri3.c')
+ endif
+endif
+if with_platform_drm
+ files_libgalliumvlwinsys += files('vl/vl_winsys_drm.c')
+endif
+
+u_indices_gen_c = custom_target(
+ 'u_indices_gen.c',
+ input : 'indices/u_indices_gen.py',
+ output : 'u_indices_gen.c',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+)
+
+u_unfilled_gen_c = custom_target(
+ 'u_unfilled_gen.c',
+ input : 'indices/u_unfilled_gen.py',
+ output : 'u_unfilled_gen.c',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+)
+
+u_format_table_c = custom_target(
+ 'u_format_table.c',
+ input : ['util/u_format_table.py', 'util/u_format.csv'],
+ output : 'u_format_table.c',
+ command : [prog_python2, '@INPUT@'],
+ depend_files : files('util/u_format_pack.py', 'util/u_format_parse.py'),
+ capture : true,
+)
+
+libgallium = static_library(
+ 'gallium',
+ [files_libgallium, u_indices_gen_c, u_unfilled_gen_c, u_format_table_c,
+ nir_opcodes_h],
+ include_directories : [
+ inc_loader, inc_gallium, inc_src, inc_include, include_directories('util')
+ ],
+ c_args : [c_vis_args, c_msvc_compat_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ dependencies : [dep_libdrm, dep_llvm, dep_unwind, dep_dl],
+ build_by_default : false,
+)
+
+libgalliumvl_stub = static_library(
+ 'galliumvl_stub',
+ 'vl/vl_stubs.c',
+ c_args : [c_vis_args, c_msvc_compat_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ include_directories: [inc_gallium, inc_include, inc_src],
+ build_by_default : false,
+)
+
+libgalliumvl = static_library(
+ 'galliumvl',
+ files_libgalliumvl,
+ c_args : [c_vis_args, c_msvc_compat_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ include_directories : [inc_gallium, inc_include, inc_src],
+ build_by_default : false,
+)
+
+# XXX: The dependencies here may be off...
+libgalliumvlwinsys = static_library(
+ 'galliumvlwinsys',
+ files_libgalliumvlwinsys,
+ include_directories : [inc_gallium, inc_include, inc_loader, inc_src],
+ dependencies : [dep_libdrm],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/auxiliary/nir/tgsi_to_nir.c mesa-17.3.3/src/gallium/auxiliary/nir/tgsi_to_nir.c
--- mesa-17.2.4/src/gallium/auxiliary/nir/tgsi_to_nir.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/nir/tgsi_to_nir.c 2018-01-18 21:30:28.000000000 +0000
@@ -314,7 +314,8 @@
file == TGSI_FILE_CONSTANT);
/* nothing to do for UBOs: */
- if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension) {
+ if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
+ decl->Dim.Index2D != 0) {
b->shader->info.num_ubos =
MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
return;
@@ -624,7 +625,7 @@
assert(!dim);
break;
case TGSI_FILE_CONSTANT:
- if (dim) {
+ if (dim && (dim->Index > 0 || dim->Indirect)) {
op = nir_intrinsic_load_ubo;
} else {
op = nir_intrinsic_load_uniform;
@@ -638,7 +639,7 @@
load = nir_intrinsic_instr_create(b->shader, op);
load->num_components = 4;
- if (dim) {
+ if (dim && (dim->Index > 0 || dim->Indirect)) {
if (dimind) {
load->src[srcn] =
ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
@@ -766,12 +767,13 @@
}
static nir_ssa_def *
-ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc)
+ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
+ int src_idx)
{
nir_builder *b = &c->build;
struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
unsigned tgsi_opcode = c->token->FullInstruction.Instruction.Opcode;
- unsigned tgsi_src_type = tgsi_opcode_infer_src_type(tgsi_opcode);
+ unsigned tgsi_src_type = tgsi_opcode_infer_src_type(tgsi_opcode, src_idx);
bool src_is_float = !(tgsi_src_type == TGSI_TYPE_SIGNED ||
tgsi_src_type == TGSI_TYPE_UNSIGNED);
nir_alu_src src;
@@ -956,23 +958,6 @@
}
}
-/* SCS - Sine Cosine
- * dst.x = \cos{src.x}
- * dst.y = \sin{src.x}
- * dst.z = 0.0
- * dst.w = 1.0
- */
-static void
-ttn_scs(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
- ttn_move_dest_masked(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)),
- TGSI_WRITEMASK_X);
- ttn_move_dest_masked(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)),
- TGSI_WRITEMASK_Y);
- ttn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), TGSI_WRITEMASK_Z);
- ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
-}
-
static void
ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
{
@@ -986,30 +971,6 @@
}
static void
-ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
- ttn_move_dest_masked(b, dest,
- nir_fsub(b,
- nir_fmul(b,
- ttn_swizzle(b, src[0], Y, Z, X, X),
- ttn_swizzle(b, src[1], Z, X, Y, X)),
- nir_fmul(b,
- ttn_swizzle(b, src[1], Y, Z, X, X),
- ttn_swizzle(b, src[0], Z, X, Y, X))),
- TGSI_WRITEMASK_XYZ);
- ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
-}
-
-static void
-ttn_dp2a(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
- ttn_move_dest(b, dest,
- ttn_channel(b, nir_fadd(b, nir_fdot2(b, src[0], src[1]),
- src[2]),
- X));
-}
-
-static void
ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
{
ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
@@ -1028,13 +989,6 @@
}
static void
-ttn_dph(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
-{
- ttn_move_dest(b, dest, nir_fadd(b, nir_fdot3(b, src[0], src[1]),
- ttn_channel(b, src[1], W)));
-}
-
-static void
ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
{
ttn_move_dest(b, dest, nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]));
@@ -1536,15 +1490,12 @@
[TGSI_OPCODE_MAD] = nir_op_ffma,
[TGSI_OPCODE_LRP] = 0,
[TGSI_OPCODE_SQRT] = nir_op_fsqrt,
- [TGSI_OPCODE_DP2A] = 0,
[TGSI_OPCODE_FRC] = nir_op_ffract,
[TGSI_OPCODE_FLR] = nir_op_ffloor,
[TGSI_OPCODE_ROUND] = nir_op_fround_even,
[TGSI_OPCODE_EX2] = nir_op_fexp2,
[TGSI_OPCODE_LG2] = nir_op_flog2,
[TGSI_OPCODE_POW] = nir_op_fpow,
- [TGSI_OPCODE_XPD] = 0,
- [TGSI_OPCODE_DPH] = 0,
[TGSI_OPCODE_COS] = nir_op_fcos,
[TGSI_OPCODE_DDX] = nir_op_fddx,
[TGSI_OPCODE_DDY] = nir_op_fddy,
@@ -1573,7 +1524,6 @@
[TGSI_OPCODE_SSG] = nir_op_fsign,
[TGSI_OPCODE_CMP] = 0,
- [TGSI_OPCODE_SCS] = 0,
[TGSI_OPCODE_TXB] = 0,
[TGSI_OPCODE_DIV] = nir_op_fdiv,
[TGSI_OPCODE_DP2] = 0,
@@ -1588,9 +1538,6 @@
[TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine,
[TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine,
- [TGSI_OPCODE_PUSHA] = 0, /* XXX */
- [TGSI_OPCODE_POPA] = 0, /* XXX */
-
[TGSI_OPCODE_CEIL] = nir_op_fceil,
[TGSI_OPCODE_I2F] = nir_op_i2f32,
[TGSI_OPCODE_NOT] = nir_op_inot,
@@ -1600,7 +1547,6 @@
[TGSI_OPCODE_OR] = nir_op_ior,
[TGSI_OPCODE_MOD] = nir_op_umod,
[TGSI_OPCODE_XOR] = nir_op_ixor,
- [TGSI_OPCODE_SAD] = 0, /* XXX */
[TGSI_OPCODE_TXF] = 0,
[TGSI_OPCODE_TXQ] = 0,
@@ -1614,17 +1560,12 @@
[TGSI_OPCODE_ENDLOOP] = 0,
[TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
- [TGSI_OPCODE_TXQ_LZ] = 0,
[TGSI_OPCODE_NOP] = 0,
[TGSI_OPCODE_FSEQ] = nir_op_feq,
[TGSI_OPCODE_FSGE] = nir_op_fge,
[TGSI_OPCODE_FSLT] = nir_op_flt,
[TGSI_OPCODE_FSNE] = nir_op_fne,
- /* No control flow yet */
- [TGSI_OPCODE_CALLNZ] = 0, /* XXX */
- [TGSI_OPCODE_BREAKC] = 0, /* not emitted by glsl_to_tgsi.cpp */
-
[TGSI_OPCODE_KILL_IF] = 0,
[TGSI_OPCODE_END] = 0,
@@ -1704,7 +1645,7 @@
nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
- src[i] = ttn_get_src(c, &tgsi_inst->Src[i]);
+ src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
}
nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
@@ -1763,10 +1704,6 @@
ttn_lit(b, op_trans[tgsi_op], dest, src);
break;
- case TGSI_OPCODE_XPD:
- ttn_xpd(b, op_trans[tgsi_op], dest, src);
- break;
-
case TGSI_OPCODE_DP2:
ttn_dp2(b, op_trans[tgsi_op], dest, src);
break;
@@ -1779,14 +1716,6 @@
ttn_dp4(b, op_trans[tgsi_op], dest, src);
break;
- case TGSI_OPCODE_DP2A:
- ttn_dp2a(b, op_trans[tgsi_op], dest, src);
- break;
-
- case TGSI_OPCODE_DPH:
- ttn_dph(b, op_trans[tgsi_op], dest, src);
- break;
-
case TGSI_OPCODE_UMAD:
ttn_umad(b, op_trans[tgsi_op], dest, src);
break;
@@ -1811,10 +1740,6 @@
ttn_ucmp(b, op_trans[tgsi_op], dest, src);
break;
- case TGSI_OPCODE_SCS:
- ttn_scs(b, op_trans[tgsi_op], dest, src);
- break;
-
case TGSI_OPCODE_SGT:
ttn_sgt(b, op_trans[tgsi_op], dest, src);
break;
@@ -1835,7 +1760,6 @@
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXL2:
case TGSI_OPCODE_TXB2:
- case TGSI_OPCODE_TXQ_LZ:
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_TG4:
case TGSI_OPCODE_LODQ:
@@ -1943,7 +1867,7 @@
nir_src src = nir_src_for_reg(c->output_regs[loc].reg);
src.reg.base_offset = c->output_regs[loc].offset;
- if (c->build.shader->stage == MESA_SHADER_FRAGMENT &&
+ if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT &&
var->data.location == FRAG_RESULT_DEPTH) {
/* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output, while
* NIR uses a single float FRAG_RESULT_DEPTH.
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c mesa-17.3.3/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
--- mesa-17.2.4/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,7 +30,7 @@
* Buffer cache.
*
* \author Jose Fonseca
- * \author Thomas Hellström
+ * \author Thomas Hellström
*/
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c mesa-17.3.3/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
--- mesa-17.2.4/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c 2018-01-18 21:30:28.000000000 +0000
@@ -31,7 +31,7 @@
* Batch buffer pool management.
*
* \author Jose Fonseca
- * \author Thomas Hellström
+ * \author Thomas Hellström
*/
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c mesa-17.3.3/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
--- mesa-17.2.4/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c 2018-01-18 21:30:28.000000000 +0000
@@ -32,7 +32,7 @@
*
* @sa http://en.wikipedia.org/wiki/Slab_allocation
*
- * @author Thomas Hellstrom
+ * @author Thomas Hellstrom
* @author Jose Fonseca
*/
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h mesa-17.3.3/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,35 @@
+// DriConf options supported by all Gallium DRI drivers.
+DRI_CONF_SECTION_PERFORMANCE
+ DRI_CONF_MESA_GLTHREAD("false")
+ DRI_CONF_MESA_NO_ERROR("false")
+ DRI_CONF_DISABLE_EXT_BUFFER_AGE("false")
+ DRI_CONF_DISABLE_OML_SYNC_CONTROL("false")
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_QUALITY
+ DRI_CONF_PP_CELSHADE(0)
+ DRI_CONF_PP_NORED(0)
+ DRI_CONF_PP_NOGREEN(0)
+ DRI_CONF_PP_NOBLUE(0)
+ DRI_CONF_PP_JIMENEZMLAA(0, 0, 32)
+ DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32)
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_DEBUG
+ DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false")
+ DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false")
+ DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
+ DRI_CONF_DISABLE_SHADER_BIT_ENCODING("false")
+ DRI_CONF_FORCE_GLSL_VERSION(0)
+ DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false")
+ DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false")
+ DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH("false")
+ DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
+ DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
+ DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false")
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_MISCELLANEOUS
+ DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
+ DRI_CONF_GLSL_ZERO_INIT("false")
+DRI_CONF_SECTION_END
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/Makefile.am mesa-17.3.3/src/gallium/auxiliary/pipe-loader/Makefile.am
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -5,6 +5,7 @@
AM_CFLAGS = \
-I$(top_srcdir)/src/loader \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util \
$(GALLIUM_PIPE_LOADER_DEFINES) \
$(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS)
@@ -40,9 +41,11 @@
endif
libpipe_loader_static_la_LIBADD = \
- $(top_builddir)/src/loader/libloader.la
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la
libpipe_loader_dynamic_la_LIBADD = \
- $(top_builddir)/src/loader/libloader.la
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la
EXTRA_DIST = SConscript
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/Makefile.in mesa-17.3.3/src/gallium/auxiliary/pipe-loader/Makefile.in
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -110,7 +110,8 @@
subdir = src/gallium/auxiliary/pipe-loader
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -128,10 +129,11 @@
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libpipe_loader_dynamic_la_DEPENDENCIES = \
- $(top_builddir)/src/loader/libloader.la
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la
am__libpipe_loader_dynamic_la_SOURCES_DIST = pipe_loader.c \
pipe_loader.h pipe_loader_priv.h pipe_loader_sw.c \
- pipe_loader_drm.c
+ driinfo_gallium.h pipe_loader_drm.c
am__objects_1 = libpipe_loader_dynamic_la-pipe_loader.lo \
libpipe_loader_dynamic_la-pipe_loader_sw.lo
am__objects_2 = libpipe_loader_dynamic_la-pipe_loader_drm.lo
@@ -149,10 +151,11 @@
$(libpipe_loader_dynamic_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
libpipe_loader_static_la_DEPENDENCIES = \
- $(top_builddir)/src/loader/libloader.la
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la
am__libpipe_loader_static_la_SOURCES_DIST = pipe_loader.c \
pipe_loader.h pipe_loader_priv.h pipe_loader_sw.c \
- pipe_loader_drm.c
+ driinfo_gallium.h pipe_loader_drm.c
am__objects_4 = libpipe_loader_static_la-pipe_loader.lo \
libpipe_loader_static_la-pipe_loader_sw.lo
am__objects_5 = libpipe_loader_static_la-pipe_loader_drm.lo
@@ -369,9 +372,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -425,6 +428,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -436,12 +441,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -529,7 +537,8 @@
pipe_loader.c \
pipe_loader.h \
pipe_loader_priv.h \
- pipe_loader_sw.c
+ pipe_loader_sw.c \
+ driinfo_gallium.h
DRM_SOURCES := \
pipe_loader_drm.c
@@ -576,6 +585,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
@@ -598,7 +609,7 @@
# XXX: check if we need the gallium/winsys include
AM_CFLAGS = -I$(top_srcdir)/src/loader \
- -I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_srcdir)/src/gallium/winsys -I$(top_builddir)/src/util \
$(GALLIUM_PIPE_LOADER_DEFINES) $(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS) $(am__append_4)
noinst_LTLIBRARIES = \
@@ -616,10 +627,12 @@
libpipe_loader_static_la_SOURCES = $(COMMON_SOURCES) $(am__append_5)
libpipe_loader_dynamic_la_SOURCES = $(COMMON_SOURCES) $(am__append_6)
libpipe_loader_static_la_LIBADD = \
- $(top_builddir)/src/loader/libloader.la
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la
libpipe_loader_dynamic_la_LIBADD = \
- $(top_builddir)/src/loader/libloader.la
+ $(top_builddir)/src/loader/libloader.la \
+ $(top_builddir)/src/util/libxmlconfig.la
EXTRA_DIST = SConscript
all: all-am
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/Makefile.sources mesa-17.3.3/src/gallium/auxiliary/pipe-loader/Makefile.sources
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -2,7 +2,8 @@
pipe_loader.c \
pipe_loader.h \
pipe_loader_priv.h \
- pipe_loader_sw.c
+ pipe_loader_sw.c \
+ driinfo_gallium.h
DRM_SOURCES := \
pipe_loader_drm.c
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/meson.build mesa-17.3.3/src/gallium/auxiliary/pipe-loader/meson.build
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,69 @@
+# Copyright © 2017 Dylan Baker
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_pipe_loader = files(
+ 'pipe_loader.c',
+ 'pipe_loader.h',
+ 'pipe_loader_priv.h',
+ 'pipe_loader_sw.c',
+ 'driinfo_gallium.h',
+)
+
+libpipe_loader_defines = []
+
+if dep_libdrm.found()
+ files_pipe_loader += files('pipe_loader_drm.c')
+endif
+if with_gallium_drisw_kms
+ libpipe_loader_defines += '-DHAVE_PIPE_LOADER_KMS'
+endif
+
+libpipe_loader_static = static_library(
+ 'pipe_loader_static',
+ files_pipe_loader,
+ include_directories : [
+ inc_util, inc_loader, inc_gallium, inc_include, inc_src, inc_gallium_aux,
+ inc_gallium_winsys,
+ ],
+ c_args : [
+ c_vis_args, '-DHAVE_PIPE_LOADER_DRI', '-DGALLIUM_STATIC_TARGETS=1',
+ libpipe_loader_defines,
+ ],
+ link_with : [libloader, libxmlconfig],
+ dependencies : [dep_libdrm],
+ build_by_default : false,
+)
+
+libpipe_loader_dynamic = static_library(
+ 'pipe_loader_dynamic',
+ files_pipe_loader,
+ include_directories : [
+ inc_util, inc_loader, inc_gallium, inc_include, inc_src, inc_gallium_aux,
+ inc_gallium_winsys,
+ ],
+ c_args : [
+ c_vis_args, libpipe_loader_defines, '-DHAVE_PIPE_LOADER_DRI',
+ '-DPIPE_SEARCH_DIR="@0@"'.format(join_paths(get_option('libdir'), 'gallium-pipe')
+ )
+ ],
+ link_with : [libloader, libxmlconfig],
+ dependencies : [dep_libdrm],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader.c mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader.c
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader.c 2018-01-18 21:30:28.000000000 +0000
@@ -31,6 +31,10 @@
#include "util/u_memory.h"
#include "util/u_string.h"
#include "util/u_dl.h"
+#include "util/xmlconfig.h"
+#include "util/xmlpool.h"
+
+#include
#ifdef _MSC_VER
#include
@@ -46,6 +50,12 @@
&pipe_loader_sw_probe
};
+const char gallium_driinfo_xml[] =
+ DRI_CONF_BEGIN
+#include "driinfo_gallium.h"
+ DRI_CONF_END
+;
+
int
pipe_loader_probe(struct pipe_loader_device **devs, int ndev)
{
@@ -66,6 +76,16 @@
devs[i]->ops->release(&devs[i]);
}
+void
+pipe_loader_base_release(struct pipe_loader_device **dev)
+{
+ driDestroyOptionCache(&(*dev)->option_cache);
+ driDestroyOptionInfo(&(*dev)->option_info);
+
+ FREE(*dev);
+ *dev = NULL;
+}
+
const struct drm_conf_ret *
pipe_loader_configuration(struct pipe_loader_device *dev,
enum drm_conf conf)
@@ -73,14 +93,52 @@
return dev->ops->configuration(dev, conf);
}
+void
+pipe_loader_load_options(struct pipe_loader_device *dev)
+{
+ if (dev->option_info.info)
+ return;
+
+ const char *xml_options = gallium_driinfo_xml;
+ const struct drm_conf_ret *xml_options_conf =
+ pipe_loader_configuration(dev, DRM_CONF_XML_OPTIONS);
+
+ if (xml_options_conf)
+ xml_options = xml_options_conf->val.val_pointer;
+
+ driParseOptionInfo(&dev->option_info, xml_options);
+ driParseConfigFiles(&dev->option_cache, &dev->option_info, 0,
+ dev->driver_name);
+}
+
+char *
+pipe_loader_get_driinfo_xml(const char *driver_name)
+{
+#ifdef HAVE_LIBDRM
+ char *xml = pipe_loader_drm_get_driinfo_xml(driver_name);
+#else
+ char *xml = NULL;
+#endif
+
+ if (!xml)
+ xml = strdup(gallium_driinfo_xml);
+
+ return xml;
+}
+
struct pipe_screen *
-pipe_loader_create_screen(struct pipe_loader_device *dev, unsigned flags)
+pipe_loader_create_screen(struct pipe_loader_device *dev)
{
- return dev->ops->create_screen(dev, flags);
+ struct pipe_screen_config config;
+
+ pipe_loader_load_options(dev);
+ config.options = &dev->option_cache;
+
+ return dev->ops->create_screen(dev, &config);
}
struct util_dl_library *
-pipe_loader_find_module(struct pipe_loader_device *dev,
+pipe_loader_find_module(const char *driver_name,
const char *library_paths)
{
struct util_dl_library *lib;
@@ -95,10 +153,10 @@
if (len)
ret = util_snprintf(path, sizeof(path), "%.*s/%s%s%s",
len, library_paths,
- MODULE_PREFIX, dev->driver_name, UTIL_DL_EXT);
+ MODULE_PREFIX, driver_name, UTIL_DL_EXT);
else
ret = util_snprintf(path, sizeof(path), "%s%s%s",
- MODULE_PREFIX, dev->driver_name, UTIL_DL_EXT);
+ MODULE_PREFIX, driver_name, UTIL_DL_EXT);
if (ret > 0 && ret < sizeof(path)) {
lib = util_dl_open(path);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 2018-01-18 21:30:28.000000000 +0000
@@ -32,6 +32,7 @@
#include
#include
+#include
#include
#include
@@ -63,99 +64,105 @@
static const struct pipe_loader_ops pipe_loader_drm_ops;
#ifdef GALLIUM_STATIC_TARGETS
-static const struct drm_conf_ret throttle_ret = {
- .type = DRM_CONF_INT,
- .val.val_int = 2,
-};
-
-static const struct drm_conf_ret share_fd_ret = {
- .type = DRM_CONF_BOOL,
- .val.val_bool = true,
-};
-
-static inline const struct drm_conf_ret *
-configuration_query(enum drm_conf conf)
-{
- switch (conf) {
- case DRM_CONF_THROTTLE:
- return &throttle_ret;
- case DRM_CONF_SHARE_FD:
- return &share_fd_ret;
- default:
- break;
- }
- return NULL;
-}
-
static const struct drm_driver_descriptor driver_descriptors[] = {
{
.driver_name = "i915",
.create_screen = pipe_i915_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "nouveau",
.create_screen = pipe_nouveau_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "r300",
.create_screen = pipe_r300_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "r600",
.create_screen = pipe_r600_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "radeonsi",
.create_screen = pipe_radeonsi_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_radeonsi_configuration_query,
},
{
.driver_name = "vmwgfx",
.create_screen = pipe_vmwgfx_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "kgsl",
.create_screen = pipe_freedreno_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "msm",
.create_screen = pipe_freedreno_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "pl111",
.create_screen = pipe_pl111_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "virtio_gpu",
.create_screen = pipe_virgl_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "vc4",
.create_screen = pipe_vc4_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
+ },
+ {
+ .driver_name = "vc5",
+ .create_screen = pipe_vc5_create_screen,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "etnaviv",
.create_screen = pipe_etna_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
},
{
.driver_name = "imx-drm",
.create_screen = pipe_imx_drm_create_screen,
- .configuration = configuration_query,
+ .configuration = pipe_default_configuration_query,
}
};
#endif
+static const struct drm_driver_descriptor *
+get_driver_descriptor(const char *driver_name, struct util_dl_library **plib)
+{
+#ifdef GALLIUM_STATIC_TARGETS
+ for (int i = 0; i < ARRAY_SIZE(driver_descriptors); i++) {
+ if (strcmp(driver_descriptors[i].driver_name, driver_name) == 0)
+ return &driver_descriptors[i];
+ }
+#else
+ *plib = pipe_loader_find_module(driver_name, PIPE_SEARCH_DIR);
+ if (!*plib)
+ return NULL;
+
+ const struct drm_driver_descriptor *dd =
+ (const struct drm_driver_descriptor *)
+ util_dl_get_proc_address(*plib, "driver_descriptor");
+
+ /* sanity check on the driver name */
+ if (dd && strcmp(dd->driver_name, driver_name) == 0)
+ return dd;
+#endif
+
+ return NULL;
+}
+
bool
pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
{
@@ -179,27 +186,13 @@
if (!ddev->base.driver_name)
goto fail;
-#ifdef GALLIUM_STATIC_TARGETS
- for (int i = 0; i < ARRAY_SIZE(driver_descriptors); i++) {
- if (strcmp(driver_descriptors[i].driver_name, ddev->base.driver_name) == 0) {
- ddev->dd = &driver_descriptors[i];
- break;
- }
- }
+ struct util_dl_library **plib = NULL;
+#ifndef GALLIUM_STATIC_TARGETS
+ plib = &ddev->lib;
+#endif
+ ddev->dd = get_driver_descriptor(ddev->base.driver_name, plib);
if (!ddev->dd)
goto fail;
-#else
- ddev->lib = pipe_loader_find_module(&ddev->base, PIPE_SEARCH_DIR);
- if (!ddev->lib)
- goto fail;
-
- ddev->dd = (const struct drm_driver_descriptor *)
- util_dl_get_proc_address(ddev->lib, "driver_descriptor");
-
- /* sanity check on the driver name */
- if (!ddev->dd || strcmp(ddev->dd->driver_name, ddev->base.driver_name) != 0)
- goto fail;
-#endif
*dev = &ddev->base;
return true;
@@ -264,8 +257,7 @@
close(ddev->fd);
FREE(ddev->base.driver_name);
- FREE(ddev);
- *dev = NULL;
+ pipe_loader_base_release(dev);
}
static const struct drm_conf_ret *
@@ -281,11 +273,34 @@
}
static struct pipe_screen *
-pipe_loader_drm_create_screen(struct pipe_loader_device *dev, unsigned flags)
+pipe_loader_drm_create_screen(struct pipe_loader_device *dev,
+ const struct pipe_screen_config *config)
{
struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
- return ddev->dd->create_screen(ddev->fd, flags);
+ return ddev->dd->create_screen(ddev->fd, config);
+}
+
+char *
+pipe_loader_drm_get_driinfo_xml(const char *driver_name)
+{
+ char *xml = NULL;
+ struct util_dl_library *lib = NULL;
+ const struct drm_driver_descriptor *dd =
+ get_driver_descriptor(driver_name, &lib);
+ if (!dd)
+ goto out;
+
+ const struct drm_conf_ret *conf = dd->configuration(DRM_CONF_XML_OPTIONS);
+ if (!conf)
+ goto out;
+
+ xml = strdup((const char *)conf->val.val_pointer);
+
+out:
+ if (lib)
+ util_dl_close(lib);
+ return xml;
}
static const struct pipe_loader_ops pipe_loader_drm_ops = {
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader.h mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader.h
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader.h 2018-01-18 21:30:28.000000000 +0000
@@ -35,6 +35,7 @@
#include "pipe/p_compiler.h"
#include "state_tracker/drm_driver.h"
+#include "util/xmlconfig.h"
#ifdef __cplusplus
extern "C" {
@@ -65,6 +66,9 @@
char *driver_name;
const struct pipe_loader_ops *ops;
+
+ driOptionCache option_cache;
+ driOptionCache option_info;
};
/**
@@ -84,7 +88,7 @@
* \param dev Device the screen will be created for.
*/
struct pipe_screen *
-pipe_loader_create_screen(struct pipe_loader_device *dev, unsigned flags);
+pipe_loader_create_screen(struct pipe_loader_device *dev);
/**
* Query the configuration parameters for the specified device.
@@ -97,6 +101,24 @@
enum drm_conf conf);
/**
+ * Ensure that dev->option_cache is initialized appropriately for the driver.
+ *
+ * This function can be called multiple times.
+ *
+ * \param dev Device for which options should be loaded.
+ */
+void
+pipe_loader_load_options(struct pipe_loader_device *dev);
+
+/**
+ * Get the driinfo XML string used by the given driver.
+ *
+ * The returned string is heap-allocated.
+ */
+char *
+pipe_loader_get_driinfo_xml(const char *driver_name);
+
+/**
* Release resources allocated for a list of devices.
*
* Should be called when the specified devices are no longer in use to
@@ -180,6 +202,16 @@
bool
pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd);
+/**
+ * Get the driinfo XML used for the DRM driver of the given name, if any.
+ *
+ * The returned string is heap-allocated.
+ */
+char *
+pipe_loader_drm_get_driinfo_xml(const char *driver_name);
+
+extern const char gallium_driinfo_xml[];
+
#ifdef __cplusplus
}
#endif
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h 2018-01-18 21:30:28.000000000 +0000
@@ -32,7 +32,7 @@
struct pipe_loader_ops {
struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev,
- unsigned flags);
+ const struct pipe_screen_config *config);
const struct drm_conf_ret *(*configuration)(struct pipe_loader_device *dev,
enum drm_conf conf);
@@ -41,10 +41,21 @@
};
/**
- * Open the pipe driver module that handles a specified device.
+ * Open the pipe driver module that contains the specified driver.
*/
struct util_dl_library *
-pipe_loader_find_module(struct pipe_loader_device *dev,
+pipe_loader_find_module(const char *driver_name,
const char *library_paths);
+/**
+ * Free the base device structure.
+ *
+ * Implementations of pipe_loader_ops::release must call this.
+ *
+ * (*dev)->driver_name must be freed by the caller if it was allocated on the
+ * heap.
+ */
+void
+pipe_loader_base_release(struct pipe_loader_device **dev);
+
#endif /* PIPE_LOADER_PRIV_H */
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c 2018-01-18 21:30:28.000000000 +0000
@@ -100,7 +100,7 @@
if (!sdev->dd)
return false;
#else
- sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR);
+ sdev->lib = pipe_loader_find_module("swrast", PIPE_SEARCH_DIR);
if (!sdev->lib)
return false;
@@ -270,7 +270,8 @@
static void
pipe_loader_sw_release(struct pipe_loader_device **dev)
{
- struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev);
+ MAYBE_UNUSED struct pipe_loader_sw_device *sdev =
+ pipe_loader_sw_device(*dev);
#ifndef GALLIUM_STATIC_TARGETS
if (sdev->lib)
@@ -282,8 +283,7 @@
close(sdev->fd);
#endif
- FREE(sdev);
- *dev = NULL;
+ pipe_loader_base_release(dev);
}
static const struct drm_conf_ret *
@@ -295,7 +295,7 @@
static struct pipe_screen *
pipe_loader_sw_create_screen(struct pipe_loader_device *dev,
- unsigned flags)
+ const struct pipe_screen_config *config)
{
struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
struct pipe_screen *screen;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/pipe-loader/SConscript mesa-17.3.3/src/gallium/auxiliary/pipe-loader/SConscript
--- mesa-17.2.4/src/gallium/auxiliary/pipe-loader/SConscript 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/pipe-loader/SConscript 2018-01-18 21:30:28.000000000 +0000
@@ -7,6 +7,7 @@
env.Append(CPPPATH = [
'#/src/loader',
'#/src/gallium/winsys',
+ xmlpool_options.dir.dir,
])
env.Append(CPPDEFINES = [
diff -Nru mesa-17.2.4/src/gallium/auxiliary/postprocess/pp_mlaa.h mesa-17.3.3/src/gallium/auxiliary/postprocess/pp_mlaa.h
--- mesa-17.2.4/src/gallium/auxiliary/postprocess/pp_mlaa.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/postprocess/pp_mlaa.h 2018-01-18 21:30:28.000000000 +0000
@@ -164,12 +164,12 @@
"DCL OUT[1], GENERIC[0]\n"
"DCL OUT[2], GENERIC[10]\n"
"DCL OUT[3], GENERIC[11]\n"
- "DCL CONST[0]\n"
+ "DCL CONST[0][0]\n"
"IMM FLT32 { 1.0000, 0.0000, -1.0000, 0.0000}\n"
" 0: MOV OUT[0], IN[0]\n"
" 1: MOV OUT[1], IN[1]\n"
- " 2: MAD OUT[2], CONST[0].xyxy, IMM[0].zyyz, IN[1].xyxy\n"
- " 3: MAD OUT[3], CONST[0].xyxy, IMM[0].xyyx, IN[1].xyxy\n"
+ " 2: MAD OUT[2], CONST[0][0].xyxy, IMM[0].zyyz, IN[1].xyxy\n"
+ " 3: MAD OUT[3], CONST[0][0].xyxy, IMM[0].xyyx, IN[1].xyxy\n"
" 4: END\n";
@@ -183,7 +183,7 @@
"DCL SVIEW[1], 2D, FLOAT\n"
"DCL SAMP[2]\n"
"DCL SVIEW[2], 2D, FLOAT\n"
- "DCL CONST[0]\n"
+ "DCL CONST[0][0]\n"
"DCL TEMP[0..6]\n"
"IMM FLT32 { 0.0000, -0.2500, 0.00609756, 0.5000}\n"
"IMM FLT32 { -1.5000, -2.0000, 0.9000, 1.5000}\n"
@@ -204,7 +204,7 @@
" 11: BRK\n"
" 12: ENDIF\n"
" 13: MOV TEMP[4].y, IMM[0].xxxx\n"
- " 14: MAD TEMP[3].xyz, CONST[0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n"
+ " 14: MAD TEMP[3].xyz, CONST[0][0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n"
" 15: MOV TEMP[3].w, IMM[0].xxxx\n"
" 16: TXL TEMP[5], TEMP[3], SAMP[2], 2D\n"
" 17: MOV TEMP[3].x, TEMP[5].yyyy\n"
@@ -229,7 +229,7 @@
" 36: BRK\n"
" 37: ENDIF\n"
" 38: MOV TEMP[5].y, IMM[0].xxxx\n"
- " 39: MAD TEMP[4].xyz, CONST[0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n"
+ " 39: MAD TEMP[4].xyz, CONST[0][0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n"
" 40: MOV TEMP[4].w, IMM[0].xxxx\n"
" 41: TXL TEMP[6].xy, TEMP[4], SAMP[2], 2D\n"
" 42: MOV TEMP[4].x, TEMP[6].yyyy\n"
@@ -250,7 +250,7 @@
" 57: MOV TEMP[5].x, TEMP[1].xxxx\n"
" 58: ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].yyyy\n"
" 59: MOV TEMP[5].z, TEMP[1].xxxx\n"
- " 60: MAD TEMP[1], TEMP[5], CONST[0].xyxy, IN[0].xyxy\n"
+ " 60: MAD TEMP[1], TEMP[5], CONST[0][0].xyxy, IN[0].xyxy\n"
" 61: MOV TEMP[4], TEMP[1].xyyy\n"
" 62: MOV TEMP[4].w, IMM[0].xxxx\n"
" 63: TXL TEMP[5].x, TEMP[4], SAMP[2], 2D\n"
@@ -278,7 +278,7 @@
" 85: BRK\n"
" 86: ENDIF\n"
" 87: MOV TEMP[3].y, IMM[0].xxxx\n"
- " 88: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n"
+ " 88: MAD TEMP[5].xyz, CONST[0][0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n"
" 89: MOV TEMP[5].w, IMM[0].xxxx\n"
" 90: TXL TEMP[4], TEMP[5], SAMP[2], 2D\n"
" 91: MOV TEMP[2].x, TEMP[4].xxxx\n"
@@ -303,7 +303,7 @@
"110: BRK\n"
"111: ENDIF\n"
"112: MOV TEMP[4].y, IMM[0].xxxx\n"
- "113: MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n"
+ "113: MAD TEMP[5].xyz, CONST[0][0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n"
"114: MOV TEMP[5].w, IMM[0].xxxx\n"
"115: TXL TEMP[6], TEMP[5], SAMP[2], 2D\n"
"116: MOV TEMP[3].x, TEMP[6].xxxx\n"
@@ -324,7 +324,7 @@
"131: MOV TEMP[4].y, TEMP[1].xxxx\n"
"132: ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].yyyy\n"
"133: MOV TEMP[4].w, TEMP[1].xxxx\n"
- "134: MAD TEMP[1], TEMP[4], CONST[0].xyxy, IN[0].xyxy\n"
+ "134: MAD TEMP[1], TEMP[4], CONST[0][0].xyxy, IN[0].xyxy\n"
"135: MOV TEMP[3], TEMP[1].xyyy\n"
"136: MOV TEMP[3].w, IMM[0].xxxx\n"
"137: TXL TEMP[4].y, TEMP[3], SAMP[2], 2D\n"
diff -Nru mesa-17.2.4/src/gallium/auxiliary/target-helpers/drm_helper.h mesa-17.3.3/src/gallium/auxiliary/target-helpers/drm_helper.h
--- mesa-17.2.4/src/gallium/auxiliary/target-helpers/drm_helper.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/target-helpers/drm_helper.h 2018-01-18 21:30:28.000000000 +0000
@@ -4,13 +4,39 @@
#include
#include "target-helpers/inline_debug_helper.h"
#include "target-helpers/drm_helper_public.h"
+#include "state_tracker/drm_driver.h"
+#include "util/xmlpool.h"
+
+static const struct drm_conf_ret throttle_ret = {
+ .type = DRM_CONF_INT,
+ .val.val_int = 2,
+};
+
+static const struct drm_conf_ret share_fd_ret = {
+ .type = DRM_CONF_BOOL,
+ .val.val_bool = true,
+};
+
+const struct drm_conf_ret *
+pipe_default_configuration_query(enum drm_conf conf)
+{
+ switch (conf) {
+ case DRM_CONF_THROTTLE:
+ return &throttle_ret;
+ case DRM_CONF_SHARE_FD:
+ return &share_fd_ret;
+ default:
+ break;
+ }
+ return NULL;
+}
#ifdef GALLIUM_I915
#include "i915/drm/i915_drm_public.h"
#include "i915/i915_public.h"
struct pipe_screen *
-pipe_i915_create_screen(int fd, unsigned flags)
+pipe_i915_create_screen(int fd, const struct pipe_screen_config *config)
{
struct i915_winsys *iws;
struct pipe_screen *screen;
@@ -26,7 +52,7 @@
#else
struct pipe_screen *
-pipe_i915_create_screen(int fd, unsigned flags)
+pipe_i915_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "i915g: driver missing\n");
return NULL;
@@ -38,7 +64,7 @@
#include "nouveau/drm/nouveau_drm_public.h"
struct pipe_screen *
-pipe_nouveau_create_screen(int fd, unsigned flags)
+pipe_nouveau_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
@@ -49,7 +75,7 @@
#else
struct pipe_screen *
-pipe_nouveau_create_screen(int fd, unsigned flags)
+pipe_nouveau_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "nouveau: driver missing\n");
return NULL;
@@ -61,7 +87,7 @@
#include "pl111/drm/pl111_drm_public.h"
struct pipe_screen *
-pipe_pl111_create_screen(int fd, unsigned flags)
+pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
@@ -72,7 +98,7 @@
#else
struct pipe_screen *
-pipe_pl111_create_screen(int fd, unsigned flags)
+pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "pl111: driver missing\n");
return NULL;
@@ -86,18 +112,18 @@
#include "r300/r300_public.h"
struct pipe_screen *
-pipe_r300_create_screen(int fd, unsigned flags)
+pipe_r300_create_screen(int fd, const struct pipe_screen_config *config)
{
struct radeon_winsys *rw;
- rw = radeon_drm_winsys_create(fd, flags, r300_screen_create);
+ rw = radeon_drm_winsys_create(fd, config, r300_screen_create);
return rw ? debug_screen_wrap(rw->screen) : NULL;
}
#else
struct pipe_screen *
-pipe_r300_create_screen(int fd, unsigned flags)
+pipe_r300_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "r300: driver missing\n");
return NULL;
@@ -111,18 +137,18 @@
#include "r600/r600_public.h"
struct pipe_screen *
-pipe_r600_create_screen(int fd, unsigned flags)
+pipe_r600_create_screen(int fd, const struct pipe_screen_config *config)
{
struct radeon_winsys *rw;
- rw = radeon_drm_winsys_create(fd, flags, r600_screen_create);
+ rw = radeon_drm_winsys_create(fd, config, r600_screen_create);
return rw ? debug_screen_wrap(rw->screen) : NULL;
}
#else
struct pipe_screen *
-pipe_r600_create_screen(int fd, unsigned flags)
+pipe_r600_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "r600: driver missing\n");
return NULL;
@@ -137,28 +163,52 @@
#include "radeonsi/si_public.h"
struct pipe_screen *
-pipe_radeonsi_create_screen(int fd, unsigned flags)
+pipe_radeonsi_create_screen(int fd, const struct pipe_screen_config *config)
{
struct radeon_winsys *rw;
/* First, try amdgpu. */
- rw = amdgpu_winsys_create(fd, flags, radeonsi_screen_create);
+ rw = amdgpu_winsys_create(fd, config, radeonsi_screen_create);
if (!rw)
- rw = radeon_drm_winsys_create(fd, flags, radeonsi_screen_create);
+ rw = radeon_drm_winsys_create(fd, config, radeonsi_screen_create);
return rw ? debug_screen_wrap(rw->screen) : NULL;
}
+const struct drm_conf_ret *
+pipe_radeonsi_configuration_query(enum drm_conf conf)
+{
+ static const struct drm_conf_ret xml_options_ret = {
+ .type = DRM_CONF_POINTER,
+ .val.val_pointer =
+#include "radeonsi/si_driinfo.h"
+ };
+
+ switch (conf) {
+ case DRM_CONF_XML_OPTIONS:
+ return &xml_options_ret;
+ default:
+ break;
+ }
+ return pipe_default_configuration_query(conf);
+}
+
#else
struct pipe_screen *
-pipe_radeonsi_create_screen(int fd, unsigned flags)
+pipe_radeonsi_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "radeonsi: driver missing\n");
return NULL;
}
+const struct drm_conf_ret *
+pipe_radeonsi_configuration_query(enum drm_conf conf)
+{
+ return NULL;
+}
+
#endif
#ifdef GALLIUM_VMWGFX
@@ -166,7 +216,7 @@
#include "svga/svga_public.h"
struct pipe_screen *
-pipe_vmwgfx_create_screen(int fd, unsigned flags)
+pipe_vmwgfx_create_screen(int fd, const struct pipe_screen_config *config)
{
struct svga_winsys_screen *sws;
struct pipe_screen *screen;
@@ -182,7 +232,7 @@
#else
struct pipe_screen *
-pipe_vmwgfx_create_screen(int fd, unsigned flags)
+pipe_vmwgfx_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "svga: driver missing\n");
return NULL;
@@ -194,7 +244,7 @@
#include "freedreno/drm/freedreno_drm_public.h"
struct pipe_screen *
-pipe_freedreno_create_screen(int fd, unsigned flags)
+pipe_freedreno_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
@@ -205,7 +255,7 @@
#else
struct pipe_screen *
-pipe_freedreno_create_screen(int fd, unsigned flags)
+pipe_freedreno_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "freedreno: driver missing\n");
return NULL;
@@ -218,7 +268,7 @@
#include "virgl/virgl_public.h"
struct pipe_screen *
-pipe_virgl_create_screen(int fd, unsigned flags)
+pipe_virgl_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
@@ -229,7 +279,7 @@
#else
struct pipe_screen *
-pipe_virgl_create_screen(int fd, unsigned flags)
+pipe_virgl_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "virgl: driver missing\n");
return NULL;
@@ -241,7 +291,7 @@
#include "vc4/drm/vc4_drm_public.h"
struct pipe_screen *
-pipe_vc4_create_screen(int fd, unsigned flags)
+pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
@@ -252,7 +302,7 @@
#else
struct pipe_screen *
-pipe_vc4_create_screen(int fd, unsigned flags)
+pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "vc4: driver missing\n");
return NULL;
@@ -260,11 +310,34 @@
#endif
+#ifdef GALLIUM_VC5
+#include "vc5/drm/vc5_drm_public.h"
+
+struct pipe_screen *
+pipe_vc5_create_screen(int fd, const struct pipe_screen_config *config)
+{
+ struct pipe_screen *screen;
+
+ screen = vc5_drm_screen_create(fd);
+ return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_vc5_create_screen(int fd, const struct pipe_screen_config *config)
+{
+ fprintf(stderr, "vc5: driver missing\n");
+ return NULL;
+}
+
+#endif
+
#ifdef GALLIUM_ETNAVIV
#include "etnaviv/drm/etnaviv_drm_public.h"
struct pipe_screen *
-pipe_etna_create_screen(int fd, unsigned flags)
+pipe_etna_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
@@ -275,7 +348,7 @@
#else
struct pipe_screen *
-pipe_etna_create_screen(int fd, unsigned flags)
+pipe_etna_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "etnaviv: driver missing\n");
return NULL;
@@ -287,7 +360,7 @@
#include "imx/drm/imx_drm_public.h"
struct pipe_screen *
-pipe_imx_drm_create_screen(int fd, unsigned flags)
+pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
@@ -298,7 +371,7 @@
#else
struct pipe_screen *
-pipe_imx_drm_create_screen(int fd, unsigned flags)
+pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config)
{
fprintf(stderr, "imx-drm: driver missing\n");
return NULL;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/target-helpers/drm_helper_public.h mesa-17.3.3/src/gallium/auxiliary/target-helpers/drm_helper_public.h
--- mesa-17.2.4/src/gallium/auxiliary/target-helpers/drm_helper_public.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/target-helpers/drm_helper_public.h 2018-01-18 21:30:28.000000000 +0000
@@ -1,46 +1,57 @@
#ifndef _DRM_HELPER_PUBLIC_H
#define _DRM_HELPER_PUBLIC_H
+enum drm_conf;
+struct drm_conf_ret;
struct pipe_screen;
+struct pipe_screen_config;
struct pipe_screen *
-pipe_i915_create_screen(int fd, unsigned flags);
+pipe_i915_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_ilo_create_screen(int fd, unsigned flags);
+pipe_ilo_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_nouveau_create_screen(int fd, unsigned flags);
+pipe_nouveau_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_r300_create_screen(int fd, unsigned flags);
+pipe_r300_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_r600_create_screen(int fd, unsigned flags);
+pipe_r600_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_radeonsi_create_screen(int fd, unsigned flags);
+pipe_radeonsi_create_screen(int fd, const struct pipe_screen_config *config);
+const struct drm_conf_ret *
+pipe_radeonsi_configuration_query(enum drm_conf conf);
struct pipe_screen *
-pipe_vmwgfx_create_screen(int fd, unsigned flags);
+pipe_vmwgfx_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_freedreno_create_screen(int fd, unsigned flags);
+pipe_freedreno_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_virgl_create_screen(int fd, unsigned flags);
+pipe_virgl_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_vc4_create_screen(int fd, unsigned flags);
+pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_pl111_create_screen(int fd, unsigned flags);
+pipe_vc5_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_etna_create_screen(int fd, unsigned flags);
+pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config);
struct pipe_screen *
-pipe_imx_drm_create_screen(int fd, unsigned flags);
+pipe_etna_create_screen(int fd, const struct pipe_screen_config *config);
+
+struct pipe_screen *
+pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config);
+
+const struct drm_conf_ret *
+pipe_default_configuration_query(enum drm_conf conf);
#endif /* _DRM_HELPER_PUBLIC_H */
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_build.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_build.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_build.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_build.c 2018-01-18 21:30:28.000000000 +0000
@@ -163,6 +163,16 @@
return dr;
}
+static struct tgsi_declaration_dimension
+tgsi_default_declaration_dimension()
+{
+ struct tgsi_declaration_dimension dim;
+
+ dim.Index2D = 0;
+
+ return dim;
+}
+
static struct tgsi_declaration_range
tgsi_build_declaration_range(
unsigned first,
@@ -381,6 +391,7 @@
full_declaration.Declaration = tgsi_default_declaration();
full_declaration.Range = tgsi_default_declaration_range();
+ full_declaration.Dim = tgsi_default_declaration_dimension();
full_declaration.Semantic = tgsi_default_declaration_semantic();
full_declaration.Interp = tgsi_default_declaration_interp();
full_declaration.Image = tgsi_default_declaration_image();
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_dump.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_dump.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_dump.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_dump.c 2018-01-18 21:30:28.000000000 +0000
@@ -578,7 +578,7 @@
TXT( " " );
ctx->indent += info->post_indent;
- TXT( info->mnemonic );
+ TXT( tgsi_get_opcode_name(inst->Instruction.Opcode) );
if (inst->Instruction.Saturate) {
TXT( "_SAT" );
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_exec.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_exec.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_exec.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_exec.c 2018-01-18 21:30:28.000000000 +0000
@@ -1454,6 +1454,17 @@
}
static void
+micro_ldexp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
+{
+ dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
+ dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
+ dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
+ dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
+}
+
+static void
micro_sub(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src0,
const union tgsi_exec_channel *src1)
@@ -2340,15 +2351,22 @@
exec_lodq(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
- uint unit;
+ uint resource_unit, sampler_unit;
int dim;
int i;
union tgsi_exec_channel coords[4];
const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
union tgsi_exec_channel r[2];
- unit = fetch_sampler_unit(mach, inst, 1);
- dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
+ resource_unit = fetch_sampler_unit(mach, inst, 1);
+ if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
+ uint target = mach->SamplerViews[resource_unit].Resource;
+ dim = tgsi_util_get_texture_coord_dim(target);
+ sampler_unit = fetch_sampler_unit(mach, inst, 2);
+ } else {
+ dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
+ sampler_unit = resource_unit;
+ }
assert(dim <= ARRAY_SIZE(coords));
/* fetch coordinates */
for (i = 0; i < dim; i++) {
@@ -2358,7 +2376,7 @@
for (i = dim; i < ARRAY_SIZE(coords); i++) {
args[i] = &ZeroVec;
}
- mach->Sampler->query_lod(mach->Sampler, unit, unit,
+ mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
args[0]->f,
args[1]->f,
args[2]->f,
@@ -2375,6 +2393,35 @@
store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
TGSI_EXEC_DATA_FLOAT);
}
+ if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
+ unsigned char swizzles[4];
+ unsigned chan;
+ swizzles[0] = inst->Src[1].Register.SwizzleX;
+ swizzles[1] = inst->Src[1].Register.SwizzleY;
+ swizzles[2] = inst->Src[1].Register.SwizzleZ;
+ swizzles[3] = inst->Src[1].Register.SwizzleW;
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ if (swizzles[chan] >= 2) {
+ store_dest(mach, &ZeroVec,
+ &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ } else {
+ store_dest(mach, &r[swizzles[chan]],
+ &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ }
+ } else {
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
+ TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
+ TGSI_EXEC_DATA_FLOAT);
+ }
+ }
}
static void
@@ -2631,6 +2678,9 @@
lod = &c1;
control = TGSI_SAMPLER_LOD_EXPLICIT;
}
+ else if (modifier == TEX_MODIFIER_GATHER) {
+ control = TGSI_SAMPLER_GATHER;
+ }
else {
assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
control = TGSI_SAMPLER_LOD_ZERO;
@@ -3185,60 +3235,6 @@
}
static void
-exec_dp2a(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- unsigned int chan;
- union tgsi_exec_channel arg[3];
-
- fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- micro_mul(&arg[2], &arg[0], &arg[1]);
-
- fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
-
- fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- micro_add(&arg[0], &arg[0], &arg[1]);
-
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
- store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
- }
- }
-}
-
-static void
-exec_dph(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- unsigned int chan;
- union tgsi_exec_channel arg[3];
-
- fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- micro_mul(&arg[2], &arg[0], &arg[1]);
-
- fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
-
- fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
-
- fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
- micro_add(&arg[0], &arg[0], &arg[1]);
-
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
- store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
- }
- }
-}
-
-static void
exec_dp2(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
@@ -3340,78 +3336,6 @@
}
static void
-exec_scs(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
- union tgsi_exec_channel arg;
- union tgsi_exec_channel result;
-
- fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- micro_cos(&result, &arg);
- store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- micro_sin(&result, &arg);
- store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- }
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
- store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
- store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
- }
-}
-
-static void
-exec_xpd(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- union tgsi_exec_channel r[6];
- union tgsi_exec_channel d[3];
-
- fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
-
- micro_mul(&r[2], &r[0], &r[1]);
-
- fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
- fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-
- micro_mul(&r[5], &r[3], &r[4] );
- micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
-
- fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
- micro_mul(&r[3], &r[3], &r[2]);
-
- fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-
- micro_mul(&r[1], &r[1], &r[5]);
- micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
-
- micro_mul(&r[5], &r[5], &r[4]);
- micro_mul(&r[0], &r[0], &r[2]);
- micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
-
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
- store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
- }
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
- store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
- }
-}
-
-static void
exec_dst(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
{
@@ -3811,17 +3735,15 @@
union tgsi_double_channel dst;
union tgsi_exec_channel dst_exp;
- if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
- micro_dfracexp(&dst, &dst_exp, &src);
+ fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
+ micro_dfracexp(&dst, &dst_exp, &src);
+ if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
- store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
- }
- if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
- fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
- micro_dfracexp(&dst, &dst_exp, &src);
+ if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
- store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
+ for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[1].Register.WriteMask & (1 << chan))
+ store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT);
}
}
@@ -5183,10 +5105,6 @@
exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
- case TGSI_OPCODE_DP2A:
- exec_dp2a(mach, inst);
- break;
-
case TGSI_OPCODE_FRC:
exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
@@ -5211,12 +5129,8 @@
exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
- case TGSI_OPCODE_XPD:
- exec_xpd(mach, inst);
- break;
-
- case TGSI_OPCODE_DPH:
- exec_dph(mach, inst);
+ case TGSI_OPCODE_LDEXP:
+ exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_COS:
@@ -5445,10 +5359,6 @@
exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
- case TGSI_OPCODE_SCS:
- exec_scs(mach, inst);
- break;
-
case TGSI_OPCODE_DIV:
exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
@@ -5528,14 +5438,6 @@
*pc = -1;
break;
- case TGSI_OPCODE_PUSHA:
- assert (0);
- break;
-
- case TGSI_OPCODE_POPA:
- assert (0);
- break;
-
case TGSI_OPCODE_CEIL:
exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
@@ -5572,10 +5474,6 @@
exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
break;
- case TGSI_OPCODE_SAD:
- assert (0);
- break;
-
case TGSI_OPCODE_TXF:
exec_txf(mach, inst);
break;
@@ -5680,25 +5578,6 @@
case TGSI_OPCODE_NOP:
break;
- case TGSI_OPCODE_BREAKC:
- IFETCH(&r[0], 0, TGSI_CHAN_X);
- /* update CondMask */
- if (r[0].u[0] && (mach->ExecMask & 0x1)) {
- mach->LoopMask &= ~0x1;
- }
- if (r[0].u[1] && (mach->ExecMask & 0x2)) {
- mach->LoopMask &= ~0x2;
- }
- if (r[0].u[2] && (mach->ExecMask & 0x4)) {
- mach->LoopMask &= ~0x4;
- }
- if (r[0].u[3] && (mach->ExecMask & 0x8)) {
- mach->LoopMask &= ~0x8;
- }
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- break;
-
case TGSI_OPCODE_F2I:
exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
break;
@@ -5860,7 +5739,7 @@
break;
case TGSI_OPCODE_GATHER4:
- assert(0);
+ exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE);
break;
case TGSI_OPCODE_SVIEWINFO:
@@ -5875,6 +5754,10 @@
assert(0);
break;
+ case TGSI_OPCODE_LOD:
+ exec_lodq(mach, inst);
+ break;
+
case TGSI_OPCODE_UARL:
exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
break;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_exec.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_exec.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_exec.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_exec.h 2018-01-18 21:30:28.000000000 +0000
@@ -58,6 +58,15 @@
TGSI_FOR_EACH_CHANNEL( CHAN )\
TGSI_IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+#define TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\
+ ((INST)->Dst[1].Register.WriteMask & (1 << (CHAN)))
+
+#define TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\
+ if (TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN ))
+
+#define TGSI_FOR_EACH_DST1_ENABLED_CHANNEL( INST, CHAN )\
+ TGSI_FOR_EACH_CHANNEL( CHAN )\
+ TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN )
/**
* Registers may be treated as float, signed int or unsigned int.
@@ -511,6 +520,9 @@
return 1;
case PIPE_SHADER_CAP_INTEGERS:
return 1;
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
+ return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return PIPE_MAX_SAMPLERS;
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
@@ -522,6 +534,7 @@
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_info.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_info.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_info.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_info.c 2018-01-18 21:30:28.000000000 +0000
@@ -35,261 +35,21 @@
#define CHAN TGSI_OUTPUT_CHAN_DEPENDENT
#define OTHR TGSI_OUTPUT_OTHER
+#define OPCODE(_num_dst, _num_src, _output_mode, name, ...) \
+ { .opcode = TGSI_OPCODE_ ## name, \
+ .output_mode = _output_mode, .num_dst = _num_dst, .num_src = _num_src, \
+ ##__VA_ARGS__ },
+
+#define OPCODE_GAP(opc) { .opcode = opc },
+
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{
- { 1, 1, 0, 0, 0, 0, 0, COMP, "ARL", TGSI_OPCODE_ARL },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "MOV", TGSI_OPCODE_MOV },
- { 1, 1, 0, 0, 0, 0, 0, CHAN, "LIT", TGSI_OPCODE_LIT },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "RCP", TGSI_OPCODE_RCP },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "RSQ", TGSI_OPCODE_RSQ },
- { 1, 1, 0, 0, 0, 0, 0, CHAN, "EXP", TGSI_OPCODE_EXP },
- { 1, 1, 0, 0, 0, 0, 0, CHAN, "LOG", TGSI_OPCODE_LOG },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "MUL", TGSI_OPCODE_MUL },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "ADD", TGSI_OPCODE_ADD },
- { 1, 2, 0, 0, 0, 0, 0, REPL, "DP3", TGSI_OPCODE_DP3 },
- { 1, 2, 0, 0, 0, 0, 0, REPL, "DP4", TGSI_OPCODE_DP4 },
- { 1, 2, 0, 0, 0, 0, 0, CHAN, "DST", TGSI_OPCODE_DST },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX_LZ", TGSI_OPCODE_TEX_LZ },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
- { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF_LZ", TGSI_OPCODE_TXF_LZ },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
- { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
- { 1, 0, 0, 0, 0, 0, 0, OTHR, "CLOCK", TGSI_OPCODE_CLOCK },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
- { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
- { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
- { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
- { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
- { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
- { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
- { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
- { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
- { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
- { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
- { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
- { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
- { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
- { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
- { 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
- { 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
- { 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
- { 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
- { 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC },
- { 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
- { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
- { 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
- { 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
- { 1, 1, 0, 0, 0, 0, 0, OTHR, "BALLOT", TGSI_OPCODE_BALLOT },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
- { 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
- { 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
- { 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
- { 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
- { 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
- { 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
- { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
- { 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
- { 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "READ_FIRST", TGSI_OPCODE_READ_FIRST },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
- { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
- { 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
- { 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
- { 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
- { 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "END", TGSI_OPCODE_END },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "DFMA", TGSI_OPCODE_DFMA },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I", TGSI_OPCODE_F2I },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "IDIV", TGSI_OPCODE_IDIV },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "IMAX", TGSI_OPCODE_IMAX },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "IMIN", TGSI_OPCODE_IMIN },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "INEG", TGSI_OPCODE_INEG },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "ISGE", TGSI_OPCODE_ISGE },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "ISHR", TGSI_OPCODE_ISHR },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "ISLT", TGSI_OPCODE_ISLT },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U", TGSI_OPCODE_F2U },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "U2F", TGSI_OPCODE_U2F },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "UADD", TGSI_OPCODE_UADD },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "UDIV", TGSI_OPCODE_UDIV },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "UMAD", TGSI_OPCODE_UMAD },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "UMAX", TGSI_OPCODE_UMAX },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "UMIN", TGSI_OPCODE_UMIN },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "UMOD", TGSI_OPCODE_UMOD },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL", TGSI_OPCODE_UMUL },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "USEQ", TGSI_OPCODE_USEQ },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "USGE", TGSI_OPCODE_USGE },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "USHR", TGSI_OPCODE_USHR },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "USLT", TGSI_OPCODE_USLT },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "USNE", TGSI_OPCODE_USNE },
- { 0, 1, 0, 0, 0, 0, 0, NONE, "SWITCH", TGSI_OPCODE_SWITCH },
- { 0, 1, 0, 0, 0, 0, 0, NONE, "CASE", TGSI_OPCODE_CASE },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT },
- { 0, 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
-
- { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE },
- { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I },
- { 1, 3, 0, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
- { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
- { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
- { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
- { 1, 5, 0, 0, 0, 0, 0, OTHR, "SAMPLE_D", TGSI_OPCODE_SAMPLE_D },
- { 1, 4, 0, 0, 0, 0, 0, OTHR, "SAMPLE_L", TGSI_OPCODE_SAMPLE_L },
- { 1, 3, 0, 0, 0, 0, 0, OTHR, "GATHER4", TGSI_OPCODE_GATHER4 },
- { 1, 2, 0, 0, 0, 0, 0, OTHR, "SVIEWINFO", TGSI_OPCODE_SVIEWINFO },
- { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_POS", TGSI_OPCODE_SAMPLE_POS },
- { 1, 2, 0, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG },
- { 1, 2, 0, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD },
- { 1, 2, 0, 1, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE },
- { 1, 0, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE },
- { 1, 0, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE },
- { 1, 0, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE },
- { 0, 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER },
-
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD },
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG },
- { 1, 4, 0, 1, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS },
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND },
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR },
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR },
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN },
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX },
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN },
- { 1, 3, 0, 1, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX },
- { 1, 3, 1, 0, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
- { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
- { 1, 3, 1, 0, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
- { 1, 3, 1, 0, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 },
- { 1, 2, 1, 0, 0, 0, 0, OTHR, "LODQ", TGSI_OPCODE_LODQ },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "IBFE", TGSI_OPCODE_IBFE },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "UBFE", TGSI_OPCODE_UBFE },
- { 1, 4, 0, 0, 0, 0, 0, COMP, "BFI", TGSI_OPCODE_BFI },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "BREV", TGSI_OPCODE_BREV },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "POPC", TGSI_OPCODE_POPC },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "LSB", TGSI_OPCODE_LSB },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "IMSB", TGSI_OPCODE_IMSB },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "UMSB", TGSI_OPCODE_UMSB },
- { 1, 1, 0, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
- { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
- { 1, 2, 0, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "F2D", TGSI_OPCODE_F2D },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "D2F", TGSI_OPCODE_D2F },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DABS", TGSI_OPCODE_DABS },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DNEG", TGSI_OPCODE_DNEG },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DADD", TGSI_OPCODE_DADD },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DMUL", TGSI_OPCODE_DMUL },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DMAX", TGSI_OPCODE_DMAX },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DMIN", TGSI_OPCODE_DMIN },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DSLT", TGSI_OPCODE_DSLT },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DSGE", TGSI_OPCODE_DSGE },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DSEQ", TGSI_OPCODE_DSEQ },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DSNE", TGSI_OPCODE_DSNE },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DRCP", TGSI_OPCODE_DRCP },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DSQRT", TGSI_OPCODE_DSQRT },
- { 1, 3, 0, 0, 0, 0, 0, COMP, "DMAD", TGSI_OPCODE_DMAD },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DFRAC", TGSI_OPCODE_DFRAC},
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DLDEXP", TGSI_OPCODE_DLDEXP},
- { 2, 1, 0, 0, 0, 0, 0, COMP, "DFRACEXP", TGSI_OPCODE_DFRACEXP},
- { 1, 1, 0, 0, 0, 0, 0, COMP, "D2I", TGSI_OPCODE_D2I },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "I2D", TGSI_OPCODE_I2D },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "D2U", TGSI_OPCODE_D2U },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "U2D", TGSI_OPCODE_U2D },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DRSQ", TGSI_OPCODE_DRSQ },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DTRUNC", TGSI_OPCODE_DTRUNC },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DCEIL", TGSI_OPCODE_DCEIL },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DFLR", TGSI_OPCODE_DFLR },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DROUND", TGSI_OPCODE_DROUND },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "DSSG", TGSI_OPCODE_DSSG },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ANY", TGSI_OPCODE_VOTE_ANY },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_ALL", TGSI_OPCODE_VOTE_ALL },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "VOTE_EQ", TGSI_OPCODE_VOTE_EQ },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SEQ", TGSI_OPCODE_U64SEQ },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SNE", TGSI_OPCODE_U64SNE },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SLT", TGSI_OPCODE_I64SLT },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SLT", TGSI_OPCODE_U64SLT },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SGE", TGSI_OPCODE_I64SGE },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SGE", TGSI_OPCODE_U64SGE },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MIN", TGSI_OPCODE_I64MIN },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MIN", TGSI_OPCODE_U64MIN },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MAX", TGSI_OPCODE_I64MAX },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MAX", TGSI_OPCODE_U64MAX },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "I64ABS", TGSI_OPCODE_I64ABS },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "I64SSG", TGSI_OPCODE_I64SSG },
- { 1, 1, 0, 0, 0, 0, 0, COMP, "I64NEG", TGSI_OPCODE_I64NEG },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64ADD", TGSI_OPCODE_U64ADD },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MUL", TGSI_OPCODE_U64MUL },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHL", TGSI_OPCODE_U64SHL },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "I64SHR", TGSI_OPCODE_I64SHR },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64SHR", TGSI_OPCODE_U64SHR },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "I64DIV", TGSI_OPCODE_I64DIV },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64DIV", TGSI_OPCODE_U64DIV },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "I64MOD", TGSI_OPCODE_I64MOD },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "U64MOD", TGSI_OPCODE_U64MOD },
- { 1, 2, 0, 0, 0, 0, 0, COMP, "DDIV", TGSI_OPCODE_DDIV },
+#include "tgsi_info_opcodes.h"
};
+#undef OPCODE
+#undef OPCODE_GAP
+
const struct tgsi_opcode_info *
tgsi_get_opcode_info( uint opcode )
{
@@ -309,12 +69,23 @@
return NULL;
}
+#define OPCODE(_num_dst, _num_src, _output_mode, name, ...) #name,
+#define OPCODE_GAP(opc) "UNK" #opc,
+
+static const char * const opcode_names[TGSI_OPCODE_LAST] =
+{
+#include "tgsi_info_opcodes.h"
+};
+
+#undef OPCODE
+#undef OPCODE_GAP
const char *
tgsi_get_opcode_name( uint opcode )
{
- const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
- return info->mnemonic;
+ if (opcode >= ARRAY_SIZE(opcode_names))
+ return "UNK_OOB";
+ return opcode_names[opcode];
}
@@ -356,9 +127,7 @@
case TGSI_OPCODE_AND:
case TGSI_OPCODE_OR:
case TGSI_OPCODE_XOR:
- case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
case TGSI_OPCODE_TXQ:
- case TGSI_OPCODE_TXQ_LZ:
case TGSI_OPCODE_TXQS:
case TGSI_OPCODE_F2U:
case TGSI_OPCODE_UDIV:
@@ -473,13 +242,16 @@
* infer the source type of a TGSI opcode.
*/
enum tgsi_opcode_type
-tgsi_opcode_infer_src_type( uint opcode )
+tgsi_opcode_infer_src_type(uint opcode, uint src_idx)
{
+ if (src_idx == 1 &&
+ (opcode == TGSI_OPCODE_DLDEXP || opcode == TGSI_OPCODE_LDEXP))
+ return TGSI_TYPE_SIGNED;
+
switch (opcode) {
case TGSI_OPCODE_UIF:
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_TXF_LZ:
- case TGSI_OPCODE_BREAKC:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_UADD:
@@ -499,7 +271,6 @@
return TGSI_TYPE_SIGNED;
case TGSI_OPCODE_ARL:
case TGSI_OPCODE_ARR:
- case TGSI_OPCODE_TXQ_LZ:
case TGSI_OPCODE_F2D:
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
@@ -542,7 +313,10 @@
* infer the destination type of a TGSI opcode.
*/
enum tgsi_opcode_type
-tgsi_opcode_infer_dst_type( uint opcode )
+tgsi_opcode_infer_dst_type( uint opcode, uint dst_idx )
{
+ if (dst_idx == 1 && opcode == TGSI_OPCODE_DFRACEXP)
+ return TGSI_TYPE_SIGNED;
+
return tgsi_opcode_infer_type(opcode);
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_info.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_info.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_info.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_info.h 2018-01-18 21:30:28.000000000 +0000
@@ -76,11 +76,10 @@
unsigned is_tex:1;
unsigned is_store:1;
unsigned is_branch:1;
- int pre_dedent:2;
- int post_indent:2;
+ unsigned pre_dedent:1;
+ unsigned post_indent:1;
enum tgsi_output_mode output_mode:3;
- const char *mnemonic;
- uint opcode;
+ unsigned opcode:8;
};
const struct tgsi_opcode_info *
@@ -112,10 +111,10 @@
}
enum tgsi_opcode_type
-tgsi_opcode_infer_src_type( uint opcode );
+tgsi_opcode_infer_src_type( uint opcode, uint src_idx );
enum tgsi_opcode_type
-tgsi_opcode_infer_dst_type( uint opcode );
+tgsi_opcode_infer_dst_type( uint opcode, uint dst_idx );
#if defined __cplusplus
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,252 @@
+OPCODE(1, 1, COMP, ARL)
+OPCODE(1, 1, COMP, MOV)
+OPCODE(1, 1, CHAN, LIT)
+OPCODE(1, 1, REPL, RCP)
+OPCODE(1, 1, REPL, RSQ)
+OPCODE(1, 1, CHAN, EXP)
+OPCODE(1, 1, CHAN, LOG)
+OPCODE(1, 2, COMP, MUL)
+OPCODE(1, 2, COMP, ADD)
+OPCODE(1, 2, REPL, DP3)
+OPCODE(1, 2, REPL, DP4)
+OPCODE(1, 2, CHAN, DST)
+OPCODE(1, 2, COMP, MIN)
+OPCODE(1, 2, COMP, MAX)
+OPCODE(1, 2, COMP, SLT)
+OPCODE(1, 2, COMP, SGE)
+OPCODE(1, 3, COMP, MAD)
+OPCODE(1, 2, OTHR, TEX_LZ, .is_tex = 1)
+OPCODE(1, 3, COMP, LRP)
+OPCODE(1, 3, COMP, FMA)
+OPCODE(1, 1, REPL, SQRT)
+OPCODE(1, 2, COMP, LDEXP)
+OPCODE(1, 1, COMP, F2U64)
+OPCODE(1, 1, COMP, F2I64)
+OPCODE(1, 1, COMP, FRC)
+OPCODE(1, 2, OTHR, TXF_LZ, .is_tex = 1)
+OPCODE(1, 1, COMP, FLR)
+OPCODE(1, 1, COMP, ROUND)
+OPCODE(1, 1, REPL, EX2)
+OPCODE(1, 1, REPL, LG2)
+OPCODE(1, 2, REPL, POW)
+OPCODE_GAP(31) /* removed */
+OPCODE(1, 1, COMP, U2I64)
+OPCODE(1, 0, OTHR, CLOCK)
+OPCODE(1, 1, COMP, I2I64)
+OPCODE_GAP(35) /* removed */
+OPCODE(1, 1, REPL, COS)
+OPCODE(1, 1, COMP, DDX)
+OPCODE(1, 1, COMP, DDY)
+OPCODE(0, 0, NONE, KILL)
+OPCODE(1, 1, REPL, PK2H)
+OPCODE(1, 1, REPL, PK2US)
+OPCODE(1, 1, REPL, PK4B)
+OPCODE(1, 1, REPL, PK4UB)
+OPCODE(1, 1, COMP, D2U64)
+OPCODE(1, 2, COMP, SEQ)
+OPCODE(1, 1, COMP, D2I64)
+OPCODE(1, 2, COMP, SGT)
+OPCODE(1, 1, REPL, SIN)
+OPCODE(1, 2, COMP, SLE)
+OPCODE(1, 2, COMP, SNE)
+OPCODE(1, 1, COMP, U642D)
+OPCODE(1, 2, OTHR, TEX, .is_tex = 1)
+OPCODE(1, 4, OTHR, TXD, .is_tex = 1)
+OPCODE(1, 2, OTHR, TXP, .is_tex = 1)
+OPCODE(1, 1, CHAN, UP2H)
+OPCODE(1, 1, CHAN, UP2US)
+OPCODE(1, 1, CHAN, UP4B)
+OPCODE(1, 1, CHAN, UP4UB)
+OPCODE(1, 1, COMP, U642F)
+OPCODE(1, 1, COMP, I642F)
+OPCODE(1, 1, COMP, ARR)
+OPCODE(1, 1, COMP, I642D)
+OPCODE(0, 0, NONE, CAL, .is_branch = 1)
+OPCODE(0, 0, NONE, RET)
+OPCODE(1, 1, COMP, SSG)
+OPCODE(1, 3, COMP, CMP)
+OPCODE_GAP(67) /* removed */
+OPCODE(1, 2, OTHR, TXB, .is_tex = 1)
+OPCODE(1, 1, OTHR, FBFETCH)
+OPCODE(1, 2, COMP, DIV)
+OPCODE(1, 2, REPL, DP2)
+OPCODE(1, 2, OTHR, TXL, .is_tex = 1)
+OPCODE(0, 0, NONE, BRK)
+OPCODE(0, 1, NONE, IF, .is_branch = 1, .post_indent = 1)
+OPCODE(0, 1, NONE, UIF, .is_branch = 1, .post_indent = 1)
+OPCODE(1, 2, COMP, READ_INVOC)
+OPCODE(0, 0, NONE, ELSE, .is_branch = 1, .pre_dedent = 1, .post_indent = 1)
+OPCODE(0, 0, NONE, ENDIF, .pre_dedent = 1)
+OPCODE(1, 1, COMP, DDX_FINE)
+OPCODE(1, 1, COMP, DDY_FINE)
+OPCODE_GAP(81) /* removed */
+OPCODE_GAP(82) /* removed */
+OPCODE(1, 1, COMP, CEIL)
+OPCODE(1, 1, COMP, I2F)
+OPCODE(1, 1, COMP, NOT)
+OPCODE(1, 1, COMP, TRUNC)
+OPCODE(1, 2, COMP, SHL)
+OPCODE(1, 1, OTHR, BALLOT)
+OPCODE(1, 2, COMP, AND)
+OPCODE(1, 2, COMP, OR)
+OPCODE(1, 2, COMP, MOD)
+OPCODE(1, 2, COMP, XOR)
+OPCODE_GAP(93) /* removed */
+OPCODE(1, 2, OTHR, TXF, .is_tex = 1)
+OPCODE(1, 2, OTHR, TXQ, .is_tex = 1)
+OPCODE(0, 0, NONE, CONT)
+OPCODE(0, 1, NONE, EMIT)
+OPCODE(0, 1, NONE, ENDPRIM)
+OPCODE(0, 0, NONE, BGNLOOP, .is_branch = 1, .post_indent = 1)
+OPCODE(0, 0, NONE, BGNSUB, .post_indent = 1)
+OPCODE(0, 0, NONE, ENDLOOP, .is_branch = 1, .pre_dedent = 1)
+OPCODE(0, 0, NONE, ENDSUB, .pre_dedent = 1)
+OPCODE_GAP(103) /* removed */
+OPCODE(1, 1, OTHR, TXQS, .is_tex = 1)
+OPCODE(1, 1, OTHR, RESQ)
+OPCODE(1, 1, COMP, READ_FIRST)
+OPCODE(0, 0, NONE, NOP)
+OPCODE(1, 2, COMP, FSEQ)
+OPCODE(1, 2, COMP, FSGE)
+OPCODE(1, 2, COMP, FSLT)
+OPCODE(1, 2, COMP, FSNE)
+OPCODE(0, 1, OTHR, MEMBAR)
+OPCODE_GAP(113) /* removed */
+OPCODE_GAP(114) /* removed */
+OPCODE_GAP(115) /* removed */
+OPCODE(0, 1, NONE, KILL_IF)
+OPCODE(0, 0, NONE, END)
+OPCODE(1, 3, COMP, DFMA)
+OPCODE(1, 1, COMP, F2I)
+OPCODE(1, 2, COMP, IDIV)
+OPCODE(1, 2, COMP, IMAX)
+OPCODE(1, 2, COMP, IMIN)
+OPCODE(1, 1, COMP, INEG)
+OPCODE(1, 2, COMP, ISGE)
+OPCODE(1, 2, COMP, ISHR)
+OPCODE(1, 2, COMP, ISLT)
+OPCODE(1, 1, COMP, F2U)
+OPCODE(1, 1, COMP, U2F)
+OPCODE(1, 2, COMP, UADD)
+OPCODE(1, 2, COMP, UDIV)
+OPCODE(1, 3, COMP, UMAD)
+OPCODE(1, 2, COMP, UMAX)
+OPCODE(1, 2, COMP, UMIN)
+OPCODE(1, 2, COMP, UMOD)
+OPCODE(1, 2, COMP, UMUL)
+OPCODE(1, 2, COMP, USEQ)
+OPCODE(1, 2, COMP, USGE)
+OPCODE(1, 2, COMP, USHR)
+OPCODE(1, 2, COMP, USLT)
+OPCODE(1, 2, COMP, USNE)
+OPCODE(0, 1, NONE, SWITCH)
+OPCODE(0, 1, NONE, CASE)
+OPCODE(0, 0, NONE, DEFAULT)
+OPCODE(0, 0, NONE, ENDSWITCH)
+
+OPCODE(1, 3, OTHR, SAMPLE)
+OPCODE(1, 2, OTHR, SAMPLE_I)
+OPCODE(1, 3, OTHR, SAMPLE_I_MS)
+OPCODE(1, 4, OTHR, SAMPLE_B)
+OPCODE(1, 4, OTHR, SAMPLE_C)
+OPCODE(1, 4, OTHR, SAMPLE_C_LZ)
+OPCODE(1, 5, OTHR, SAMPLE_D)
+OPCODE(1, 4, OTHR, SAMPLE_L)
+OPCODE(1, 3, OTHR, GATHER4)
+OPCODE(1, 2, OTHR, SVIEWINFO)
+OPCODE(1, 2, OTHR, SAMPLE_POS)
+OPCODE(1, 2, OTHR, SAMPLE_INFO)
+OPCODE(1, 1, COMP, UARL)
+OPCODE(1, 3, COMP, UCMP)
+OPCODE(1, 1, COMP, IABS)
+OPCODE(1, 1, COMP, ISSG)
+OPCODE(1, 2, OTHR, LOAD)
+OPCODE(1, 2, OTHR, STORE, .is_store = 1)
+OPCODE_GAP(163) /* removed */
+OPCODE_GAP(164) /* removed */
+OPCODE_GAP(165) /* removed */
+OPCODE(0, 0, OTHR, BARRIER)
+
+OPCODE(1, 3, OTHR, ATOMUADD, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMXCHG, .is_store = 1)
+OPCODE(1, 4, OTHR, ATOMCAS, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMAND, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMOR, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMXOR, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMUMIN, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMUMAX, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMIMIN, .is_store = 1)
+OPCODE(1, 3, OTHR, ATOMIMAX, .is_store = 1)
+OPCODE(1, 3, OTHR, TEX2, .is_tex = 1)
+OPCODE(1, 3, OTHR, TXB2, .is_tex = 1)
+OPCODE(1, 3, OTHR, TXL2, .is_tex = 1)
+OPCODE(1, 2, COMP, IMUL_HI)
+OPCODE(1, 2, COMP, UMUL_HI)
+OPCODE(1, 3, OTHR, TG4, .is_tex = 1)
+OPCODE(1, 2, OTHR, LODQ, .is_tex = 1)
+OPCODE(1, 3, COMP, IBFE)
+OPCODE(1, 3, COMP, UBFE)
+OPCODE(1, 4, COMP, BFI)
+OPCODE(1, 1, COMP, BREV)
+OPCODE(1, 1, COMP, POPC)
+OPCODE(1, 1, COMP, LSB)
+OPCODE(1, 1, COMP, IMSB)
+OPCODE(1, 1, COMP, UMSB)
+OPCODE(1, 1, OTHR, INTERP_CENTROID)
+OPCODE(1, 2, OTHR, INTERP_SAMPLE)
+OPCODE(1, 2, OTHR, INTERP_OFFSET)
+OPCODE(1, 1, COMP, F2D)
+OPCODE(1, 1, COMP, D2F)
+OPCODE(1, 1, COMP, DABS)
+OPCODE(1, 1, COMP, DNEG)
+OPCODE(1, 2, COMP, DADD)
+OPCODE(1, 2, COMP, DMUL)
+OPCODE(1, 2, COMP, DMAX)
+OPCODE(1, 2, COMP, DMIN)
+OPCODE(1, 2, COMP, DSLT)
+OPCODE(1, 2, COMP, DSGE)
+OPCODE(1, 2, COMP, DSEQ)
+OPCODE(1, 2, COMP, DSNE)
+OPCODE(1, 1, COMP, DRCP)
+OPCODE(1, 1, COMP, DSQRT)
+OPCODE(1, 3, COMP, DMAD)
+OPCODE(1, 1, COMP, DFRAC)
+OPCODE(1, 2, COMP, DLDEXP)
+OPCODE(2, 1, REPL, DFRACEXP)
+OPCODE(1, 1, COMP, D2I)
+OPCODE(1, 1, COMP, I2D)
+OPCODE(1, 1, COMP, D2U)
+OPCODE(1, 1, COMP, U2D)
+OPCODE(1, 1, COMP, DRSQ)
+OPCODE(1, 1, COMP, DTRUNC)
+OPCODE(1, 1, COMP, DCEIL)
+OPCODE(1, 1, COMP, DFLR)
+OPCODE(1, 1, COMP, DROUND)
+OPCODE(1, 1, COMP, DSSG)
+OPCODE(1, 1, COMP, VOTE_ANY)
+OPCODE(1, 1, COMP, VOTE_ALL)
+OPCODE(1, 1, COMP, VOTE_EQ)
+OPCODE(1, 2, COMP, U64SEQ)
+OPCODE(1, 2, COMP, U64SNE)
+OPCODE(1, 2, COMP, I64SLT)
+OPCODE(1, 2, COMP, U64SLT)
+OPCODE(1, 2, COMP, I64SGE)
+OPCODE(1, 2, COMP, U64SGE)
+OPCODE(1, 2, COMP, I64MIN)
+OPCODE(1, 2, COMP, U64MIN)
+OPCODE(1, 2, COMP, I64MAX)
+OPCODE(1, 2, COMP, U64MAX)
+OPCODE(1, 1, COMP, I64ABS)
+OPCODE(1, 1, COMP, I64SSG)
+OPCODE(1, 1, COMP, I64NEG)
+OPCODE(1, 2, COMP, U64ADD)
+OPCODE(1, 2, COMP, U64MUL)
+OPCODE(1, 2, COMP, U64SHL)
+OPCODE(1, 2, COMP, I64SHR)
+OPCODE(1, 2, COMP, U64SHR)
+OPCODE(1, 2, COMP, I64DIV)
+OPCODE(1, 2, COMP, U64DIV)
+OPCODE(1, 2, COMP, I64MOD)
+OPCODE(1, 2, COMP, U64MOD)
+OPCODE(1, 2, COMP, DDIV)
+OPCODE(1, 3, OTHR, LOD)
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_lowering.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_lowering.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_lowering.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_lowering.c 2018-01-18 21:30:28.000000000 +0000
@@ -258,130 +258,6 @@
}
}
-/* XPD - Cross Product
- * dst.x = src0.y \times src1.z - src1.y \times src0.z
- * dst.y = src0.z \times src1.x - src1.z \times src0.x
- * dst.z = src0.x \times src1.y - src1.x \times src0.y
- * dst.w = 1.0
- *
- * ; needs: 1 tmp, imm{1.0}
- * MUL tmpA.xyz, src1.yzx, src0.zxy
- * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
- * MOV dst.w, imm{1.0}
- */
-#define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
-#define XPD_TMP 1
-static void
-transform_xpd(struct tgsi_transform_context *tctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
- struct tgsi_full_dst_register *dst = &inst->Dst[0];
- struct tgsi_full_src_register *src0 = &inst->Src[0];
- struct tgsi_full_src_register *src1 = &inst->Src[1];
- struct tgsi_full_instruction new_inst;
-
- if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
- /* MUL tmpA.xyz, src1.yzx, src0.zxy */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
- new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
- reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
- tctx->emit_instruction(tctx, &new_inst);
-
- /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
- new_inst.Instruction.NumSrcRegs = 3;
- reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
- reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
- reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
- new_inst.Src[2].Register.Negate = true;
- tctx->emit_instruction(tctx, &new_inst);
- }
-
- if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
- /* MOV dst.w, imm{1.0} */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
- new_inst.Instruction.NumSrcRegs = 1;
- reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
- tctx->emit_instruction(tctx, &new_inst);
- }
-}
-
-/* SCS - Sine Cosine
- * dst.x = \cos{src.x}
- * dst.y = \sin{src.x}
- * dst.z = 0.0
- * dst.w = 1.0
- *
- * ; needs: 1 tmp, imm{0.0, 1.0}
- * if (dst.x aliases src.x) {
- * MOV tmpA.x, src.x
- * src = tmpA
- * }
- * COS dst.x, src.x
- * SIN dst.y, src.x
- * MOV dst.zw, imm{0.0, 1.0}
- */
-#define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
-#define SCS_TMP 1
-static void
-transform_scs(struct tgsi_transform_context *tctx,
- struct tgsi_full_instruction *inst)
-{
- struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
- struct tgsi_full_dst_register *dst = &inst->Dst[0];
- struct tgsi_full_src_register *src = &inst->Src[0];
- struct tgsi_full_instruction new_inst;
-
- if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
- create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
- src = &ctx->tmp[A].src;
- }
-
- if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
- /* COS dst.x, src.x */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
- new_inst.Instruction.NumSrcRegs = 1;
- reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
- tctx->emit_instruction(tctx, &new_inst);
- }
-
- if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
- /* SIN dst.y, src.x */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
- new_inst.Instruction.NumSrcRegs = 1;
- reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
- tctx->emit_instruction(tctx, &new_inst);
- }
-
- if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
- /* MOV dst.zw, imm{0.0, 1.0} */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
- new_inst.Instruction.NumSrcRegs = 1;
- reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
- tctx->emit_instruction(tctx, &new_inst);
- }
-}
-
/* LRP - Linear Interpolate
* dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
* dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
@@ -914,41 +790,29 @@
* DP3 - 3-component Dot Product
* dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
*
- * DPH - Homogeneous Dot Product
- * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
- *
* DP2 - 2-component Dot Product
* dst = src0.x \times src1.x + src0.y \times src1.y
*
- * DP2A - 2-component Dot Product And Add
- * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
- *
* NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
* operations, which is what you'd prefer for a ISA that is natively
* scalar. Probably a native vector ISA would at least already have
* DP4/DP3 instructions, but perhaps there is room for an alternative
- * translation for DPH/DP2/DP2A using vector instructions.
+ * translation for DP2 using vector instructions.
*
* ; needs: 1 tmp
* MUL tmpA.x, src0.x, src1.x
* MAD tmpA.x, src0.y, src1.y, tmpA.x
- * if (DPH || DP3 || DP4) {
+ * if (DP3 || DP4) {
* MAD tmpA.x, src0.z, src1.z, tmpA.x
- * if (DPH) {
- * ADD tmpA.x, src1.w, tmpA.x
- * } else if (DP4) {
+ * if (DP4) {
* MAD tmpA.x, src0.w, src1.w, tmpA.x
* }
- * } else if (DP2A) {
- * ADD tmpA.x, src2.x, tmpA.x
* }
* ; fixup last instruction to replicate into dst
*/
#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
-#define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
#define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
-#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
#define DOTP_TMP 1
static void
transform_dotp(struct tgsi_transform_context *tctx,
@@ -958,7 +822,6 @@
struct tgsi_full_dst_register *dst = &inst->Dst[0];
struct tgsi_full_src_register *src0 = &inst->Src[0];
struct tgsi_full_src_register *src1 = &inst->Src[1];
- struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
struct tgsi_full_instruction new_inst;
unsigned opcode = inst->Instruction.Opcode;
@@ -987,8 +850,7 @@
reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
- if ((opcode == TGSI_OPCODE_DPH) ||
- (opcode == TGSI_OPCODE_DP3) ||
+ if ((opcode == TGSI_OPCODE_DP3) ||
(opcode == TGSI_OPCODE_DP4)) {
tctx->emit_instruction(tctx, &new_inst);
@@ -1002,18 +864,7 @@
reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
- if (opcode == TGSI_OPCODE_DPH) {
- tctx->emit_instruction(tctx, &new_inst);
-
- /* ADD tmpA.x, src1.w, tmpA.x */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
- new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
- reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
- } else if (opcode == TGSI_OPCODE_DP4) {
+ if (opcode == TGSI_OPCODE_DP4) {
tctx->emit_instruction(tctx, &new_inst);
/* MAD tmpA.x, src0.w, src1.w, tmpA.x */
@@ -1026,17 +877,6 @@
reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
}
- } else if (opcode == TGSI_OPCODE_DP2A) {
- tctx->emit_instruction(tctx, &new_inst);
-
- /* ADD tmpA.x, src2.x, tmpA.x */
- new_inst = tgsi_default_full_instruction();
- new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
- new_inst.Instruction.NumDstRegs = 1;
- reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
- new_inst.Instruction.NumSrcRegs = 2;
- reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
- reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
}
/* fixup last instruction to write to dst: */
@@ -1502,16 +1342,6 @@
goto skip;
transform_dst(tctx, inst);
break;
- case TGSI_OPCODE_XPD:
- if (!ctx->config->lower_XPD)
- goto skip;
- transform_xpd(tctx, inst);
- break;
- case TGSI_OPCODE_SCS:
- if (!ctx->config->lower_SCS)
- goto skip;
- transform_scs(tctx, inst);
- break;
case TGSI_OPCODE_LRP:
if (!ctx->config->lower_LRP)
goto skip;
@@ -1552,21 +1382,11 @@
goto skip;
transform_dotp(tctx, inst);
break;
- case TGSI_OPCODE_DPH:
- if (!ctx->config->lower_DPH)
- goto skip;
- transform_dotp(tctx, inst);
- break;
case TGSI_OPCODE_DP2:
if (!ctx->config->lower_DP2)
goto skip;
transform_dotp(tctx, inst);
break;
- case TGSI_OPCODE_DP2A:
- if (!ctx->config->lower_DP2A)
- goto skip;
- transform_dotp(tctx, inst);
- break;
case TGSI_OPCODE_FLR:
if (!ctx->config->lower_FLR)
goto skip;
@@ -1645,8 +1465,6 @@
#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
/* if there are no instructions to lower, then we are done: */
if (!(OPCS(DST) ||
- OPCS(XPD) ||
- OPCS(SCS) ||
OPCS(LRP) ||
OPCS(FRC) ||
OPCS(POW) ||
@@ -1655,9 +1473,7 @@
OPCS(LOG) ||
OPCS(DP4) ||
OPCS(DP3) ||
- OPCS(DPH) ||
OPCS(DP2) ||
- OPCS(DP2A) ||
OPCS(FLR) ||
OPCS(CEIL) ||
OPCS(TRUNC) ||
@@ -1677,14 +1493,6 @@
newlen += DST_GROW * OPCS(DST);
numtmp = MAX2(numtmp, DST_TMP);
}
- if (OPCS(XPD)) {
- newlen += XPD_GROW * OPCS(XPD);
- numtmp = MAX2(numtmp, XPD_TMP);
- }
- if (OPCS(SCS)) {
- newlen += SCS_GROW * OPCS(SCS);
- numtmp = MAX2(numtmp, SCS_TMP);
- }
if (OPCS(LRP)) {
newlen += LRP_GROW * OPCS(LRP);
numtmp = MAX2(numtmp, LRP_TMP);
@@ -1717,18 +1525,10 @@
newlen += DP3_GROW * OPCS(DP3);
numtmp = MAX2(numtmp, DOTP_TMP);
}
- if (OPCS(DPH)) {
- newlen += DPH_GROW * OPCS(DPH);
- numtmp = MAX2(numtmp, DOTP_TMP);
- }
if (OPCS(DP2)) {
newlen += DP2_GROW * OPCS(DP2);
numtmp = MAX2(numtmp, DOTP_TMP);
}
- if (OPCS(DP2A)) {
- newlen += DP2A_GROW * OPCS(DP2A);
- numtmp = MAX2(numtmp, DOTP_TMP);
- }
if (OPCS(FLR)) {
newlen += FLR_GROW * OPCS(FLR);
numtmp = MAX2(numtmp, FLR_TMP);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_lowering.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_lowering.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_lowering.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_lowering.h 2018-01-18 21:30:28.000000000 +0000
@@ -55,8 +55,6 @@
* enable lowering of TGSI_OPCODE_
*/
unsigned lower_DST:1;
- unsigned lower_XPD:1;
- unsigned lower_SCS:1;
unsigned lower_LRP:1;
unsigned lower_FRC:1;
unsigned lower_POW:1;
@@ -65,9 +63,7 @@
unsigned lower_LOG:1;
unsigned lower_DP4:1;
unsigned lower_DP3:1;
- unsigned lower_DPH:1;
unsigned lower_DP2:1;
- unsigned lower_DP2A:1;
unsigned lower_FLR:1;
unsigned lower_CEIL:1;
unsigned lower_TRUNC:1;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 2018-01-18 21:30:28.000000000 +0000
@@ -56,7 +56,6 @@
OP12_TEX(TEX_LZ)
OP13(LRP)
OP11(SQRT)
-OP13(DP2A)
OP11(FRC)
OP12_TEX(TXF_LZ)
OP11(FLR)
@@ -64,8 +63,6 @@
OP11(EX2)
OP11(LG2)
OP12(POW)
-OP12(XPD)
-OP12(DPH)
OP11(COS)
OP11(DDX)
OP11(DDY)
@@ -93,7 +90,6 @@
OP00(RET)
OP11(SSG)
OP13(CMP)
-OP11(SCS)
OP12_TEX(TXB)
OP12(DIV)
OP12(DP2)
@@ -103,8 +99,6 @@
OP01_LBL(UIF)
OP00_LBL(ELSE)
OP00(ENDIF)
-OP01(PUSHA)
-OP10(POPA)
OP11(CEIL)
OP11(I2F)
OP11(NOT)
@@ -114,7 +108,6 @@
OP12(OR)
OP12(MOD)
OP12(XOR)
-OP13(SAD)
OP12_TEX(TXF)
OP12_TEX(TXQ)
OP00(CONT)
@@ -125,8 +118,6 @@
OP00_LBL(ENDLOOP)
OP00(ENDSUB)
OP00(NOP)
-OP01(CALLNZ)
-OP01(BREAKC)
OP01(KILL_IF)
OP00(END)
OP11(F2I)
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_sanity.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_sanity.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_sanity.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_sanity.c 2018-01-18 21:30:28.000000000 +0000
@@ -326,10 +326,12 @@
}
if (info->num_dst != inst->Instruction.NumDstRegs) {
- report_error( ctx, "%s: Invalid number of destination operands, should be %u", info->mnemonic, info->num_dst );
+ report_error( ctx, "%s: Invalid number of destination operands, should be %u",
+ tgsi_get_opcode_name(inst->Instruction.Opcode), info->num_dst );
}
if (info->num_src != inst->Instruction.NumSrcRegs) {
- report_error( ctx, "%s: Invalid number of source operands, should be %u", info->mnemonic, info->num_src );
+ report_error( ctx, "%s: Invalid number of source operands, should be %u",
+ tgsi_get_opcode_name(inst->Instruction.Opcode), info->num_src );
}
/* Check destination and source registers' validity.
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_scan.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_scan.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.c 2018-01-18 21:30:28.000000000 +0000
@@ -60,7 +60,6 @@
return opcode == TGSI_OPCODE_RESQ ||
opcode == TGSI_OPCODE_TXQ ||
opcode == TGSI_OPCODE_TXQS ||
- opcode == TGSI_OPCODE_TXQ_LZ ||
opcode == TGSI_OPCODE_LODQ;
}
@@ -92,7 +91,6 @@
opcode != TGSI_OPCODE_TXL &&
opcode != TGSI_OPCODE_TXL2 &&
opcode != TGSI_OPCODE_TXQ &&
- opcode != TGSI_OPCODE_TXQ_LZ &&
opcode != TGSI_OPCODE_TXQS;
}
@@ -109,7 +107,7 @@
const struct tgsi_full_instruction *fullinst,
const struct tgsi_full_src_register *src,
unsigned src_index,
- unsigned usage_mask,
+ unsigned usage_mask_after_swizzle,
bool is_interp_instruction,
bool *is_mem_inst)
{
@@ -117,24 +115,21 @@
if (info->processor == PIPE_SHADER_COMPUTE &&
src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
- unsigned swizzle[4], i, name;
+ unsigned name, mask;
name = info->system_value_semantic_name[src->Register.Index];
- swizzle[0] = src->Register.SwizzleX;
- swizzle[1] = src->Register.SwizzleY;
- swizzle[2] = src->Register.SwizzleZ;
- swizzle[3] = src->Register.SwizzleW;
switch (name) {
case TGSI_SEMANTIC_THREAD_ID:
case TGSI_SEMANTIC_BLOCK_ID:
- for (i = 0; i < 4; i++) {
- if (swizzle[i] <= TGSI_SWIZZLE_Z) {
- if (name == TGSI_SEMANTIC_THREAD_ID)
- info->uses_thread_id[swizzle[i]] = true;
- else
- info->uses_block_id[swizzle[i]] = true;
- }
+ mask = usage_mask_after_swizzle & TGSI_WRITEMASK_XYZ;
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+
+ if (name == TGSI_SEMANTIC_THREAD_ID)
+ info->uses_thread_id[i] = true;
+ else
+ info->uses_block_id[i] = true;
}
break;
case TGSI_SEMANTIC_BLOCK_SIZE:
@@ -152,12 +147,12 @@
if (src->Register.File == TGSI_FILE_INPUT) {
if (src->Register.Indirect) {
for (ind = 0; ind < info->num_inputs; ++ind) {
- info->input_usage_mask[ind] |= usage_mask;
+ info->input_usage_mask[ind] |= usage_mask_after_swizzle;
}
} else {
assert(ind >= 0);
assert(ind < PIPE_MAX_SHADER_INPUTS);
- info->input_usage_mask[ind] |= usage_mask;
+ info->input_usage_mask[ind] |= usage_mask_after_swizzle;
}
if (info->processor == PIPE_SHADER_FRAGMENT) {
@@ -172,21 +167,11 @@
index = info->input_semantic_index[input];
if (name == TGSI_SEMANTIC_POSITION &&
- (src->Register.SwizzleX == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleY == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleZ == TGSI_SWIZZLE_Z ||
- src->Register.SwizzleW == TGSI_SWIZZLE_Z))
- info->reads_z = TRUE;
-
- if (name == TGSI_SEMANTIC_COLOR) {
- unsigned mask =
- (1 << src->Register.SwizzleX) |
- (1 << src->Register.SwizzleY) |
- (1 << src->Register.SwizzleZ) |
- (1 << src->Register.SwizzleW);
+ usage_mask_after_swizzle & TGSI_WRITEMASK_Z)
+ info->reads_z = true;
- info->colors_read |= mask << (index * 4);
- }
+ if (name == TGSI_SEMANTIC_COLOR)
+ info->colors_read |= usage_mask_after_swizzle << (index * 4);
/* Process only interpolated varyings. Don't include POSITION.
* Don't include integer varyings, because they are not
@@ -459,14 +444,43 @@
}
}
- if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
- fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+ if ((fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+ fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG) ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_DFMA ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_DDIV ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_D2U64 ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_D2I64 ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_U642D ||
+ fullinst->Instruction.Opcode == TGSI_OPCODE_I642D)
info->uses_doubles = TRUE;
for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
scan_src_operand(info, fullinst, &fullinst->Src[i], i,
tgsi_util_get_inst_usage_mask(fullinst, i),
is_interp_instruction, &is_mem_inst);
+
+ if (fullinst->Src[i].Register.Indirect) {
+ struct tgsi_full_src_register src = {{0}};
+
+ src.Register.File = fullinst->Src[i].Indirect.File;
+ src.Register.Index = fullinst->Src[i].Indirect.Index;
+
+ scan_src_operand(info, fullinst, &src, -1,
+ 1 << fullinst->Src[i].Indirect.Swizzle,
+ false, NULL);
+ }
+
+ if (fullinst->Src[i].Register.Dimension &&
+ fullinst->Src[i].Dimension.Indirect) {
+ struct tgsi_full_src_register src = {{0}};
+
+ src.Register.File = fullinst->Src[i].DimIndirect.File;
+ src.Register.Index = fullinst->Src[i].DimIndirect.Index;
+
+ scan_src_operand(info, fullinst, &src, -1,
+ 1 << fullinst->Src[i].DimIndirect.Swizzle,
+ false, NULL);
+ }
}
if (fullinst->Instruction.Texture) {
@@ -475,12 +489,12 @@
src.Register.File = fullinst->TexOffsets[i].File;
src.Register.Index = fullinst->TexOffsets[i].Index;
- src.Register.SwizzleX = fullinst->TexOffsets[i].SwizzleX;
- src.Register.SwizzleY = fullinst->TexOffsets[i].SwizzleY;
- src.Register.SwizzleZ = fullinst->TexOffsets[i].SwizzleZ;
/* The usage mask is suboptimal but should be safe. */
- scan_src_operand(info, fullinst, &src, 0, TGSI_WRITEMASK_XYZ,
+ scan_src_operand(info, fullinst, &src, -1,
+ (1 << fullinst->TexOffsets[i].SwizzleX) |
+ (1 << fullinst->TexOffsets[i].SwizzleY) |
+ (1 << fullinst->TexOffsets[i].SwizzleZ),
false, &is_mem_inst);
}
}
@@ -488,13 +502,31 @@
/* check for indirect register writes */
for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
+
if (dst->Register.Indirect) {
+ struct tgsi_full_src_register src = {{0}};
+
+ src.Register.File = dst->Indirect.File;
+ src.Register.Index = dst->Indirect.Index;
+
+ scan_src_operand(info, fullinst, &src, -1,
+ 1 << dst->Indirect.Swizzle, false, NULL);
+
info->indirect_files |= (1 << dst->Register.File);
info->indirect_files_written |= (1 << dst->Register.File);
}
- if (dst->Register.Dimension && dst->Dimension.Indirect)
+ if (dst->Register.Dimension && dst->Dimension.Indirect) {
+ struct tgsi_full_src_register src = {{0}};
+
+ src.Register.File = dst->DimIndirect.File;
+ src.Register.Index = dst->DimIndirect.Index;
+
+ scan_src_operand(info, fullinst, &src, -1,
+ 1 << dst->DimIndirect.Swizzle, false, NULL);
+
info->dim_indirect_files |= 1u << dst->Register.File;
+ }
if (is_memory_file(dst->Register.File)) {
assert(fullinst->Instruction.Opcode == TGSI_OPCODE_STORE);
@@ -933,3 +965,225 @@
return;
}
+
+static void
+check_no_subroutines(const struct tgsi_full_instruction *inst)
+{
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ case TGSI_OPCODE_CAL:
+ unreachable("subroutines unhandled");
+ }
+}
+
+static unsigned
+get_inst_tessfactor_writemask(const struct tgsi_shader_info *info,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned writemask = 0;
+
+ for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *dst = &inst->Dst[i];
+
+ if (dst->Register.File == TGSI_FILE_OUTPUT &&
+ !dst->Register.Indirect) {
+ unsigned name = info->output_semantic_name[dst->Register.Index];
+
+ if (name == TGSI_SEMANTIC_TESSINNER)
+ writemask |= dst->Register.WriteMask;
+ else if (name == TGSI_SEMANTIC_TESSOUTER)
+ writemask |= dst->Register.WriteMask << 4;
+ }
+ }
+ return writemask;
+}
+
+static unsigned
+get_block_tessfactor_writemask(const struct tgsi_shader_info *info,
+ struct tgsi_parse_context *parse,
+ unsigned end_opcode)
+{
+ struct tgsi_full_instruction *inst;
+ unsigned writemask = 0;
+
+ do {
+ tgsi_parse_token(parse);
+ assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+ inst = &parse->FullToken.FullInstruction;
+ check_no_subroutines(inst);
+
+ /* Recursively process nested blocks. */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
+ writemask |=
+ get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDIF);
+ continue;
+
+ case TGSI_OPCODE_BGNLOOP:
+ writemask |=
+ get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP);
+ continue;
+
+ case TGSI_OPCODE_BARRIER:
+ unreachable("nested BARRIER is illegal");
+ continue;
+ }
+
+ writemask |= get_inst_tessfactor_writemask(info, inst);
+ } while (inst->Instruction.Opcode != end_opcode);
+
+ return writemask;
+}
+
+static void
+get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info,
+ struct tgsi_parse_context *parse,
+ unsigned *upper_block_tf_writemask,
+ unsigned *cond_block_tf_writemask)
+{
+ struct tgsi_full_instruction *inst;
+ unsigned then_tessfactor_writemask = 0;
+ unsigned else_tessfactor_writemask = 0;
+ bool is_then = true;
+
+ do {
+ tgsi_parse_token(parse);
+ assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+ inst = &parse->FullToken.FullInstruction;
+ check_no_subroutines(inst);
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ELSE:
+ is_then = false;
+ continue;
+
+ /* Recursively process nested blocks. */
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
+ get_if_block_tessfactor_writemask(info, parse,
+ is_then ? &then_tessfactor_writemask :
+ &else_tessfactor_writemask,
+ cond_block_tf_writemask);
+ continue;
+
+ case TGSI_OPCODE_BGNLOOP:
+ *cond_block_tf_writemask |=
+ get_block_tessfactor_writemask(info, parse, TGSI_OPCODE_ENDLOOP);
+ continue;
+
+ case TGSI_OPCODE_BARRIER:
+ unreachable("nested BARRIER is illegal");
+ continue;
+ }
+
+ /* Process an instruction in the current block. */
+ unsigned writemask = get_inst_tessfactor_writemask(info, inst);
+
+ if (writemask) {
+ if (is_then)
+ then_tessfactor_writemask |= writemask;
+ else
+ else_tessfactor_writemask |= writemask;
+ }
+ } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF);
+
+ if (then_tessfactor_writemask || else_tessfactor_writemask) {
+ /* If both statements write the same tess factor channels,
+ * we can say that the upper block writes them too. */
+ *upper_block_tf_writemask |= then_tessfactor_writemask &
+ else_tessfactor_writemask;
+ *cond_block_tf_writemask |= then_tessfactor_writemask |
+ else_tessfactor_writemask;
+ }
+}
+
+void
+tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
+ const struct tgsi_shader_info *info,
+ struct tgsi_tessctrl_info *out)
+{
+ memset(out, 0, sizeof(*out));
+
+ if (info->processor != PIPE_SHADER_TESS_CTRL)
+ return;
+
+ struct tgsi_parse_context parse;
+ if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
+ debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n");
+ return;
+ }
+
+ /* The pass works as follows:
+ * If all codepaths write tess factors, we can say that all invocations
+ * define tess factors.
+ *
+ * Each tess factor channel is tracked separately.
+ */
+ unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
+ unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
+
+ /* Initial value = true. Here the pass will accumulate results from multiple
+ * segments surrounded by barriers. If tess factors aren't written at all,
+ * it's a shader bug and we don't care if this will be true.
+ */
+ out->tessfactors_are_def_in_all_invocs = true;
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ continue;
+
+ struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
+ check_no_subroutines(inst);
+
+ /* Process nested blocks. */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
+ get_if_block_tessfactor_writemask(info, &parse,
+ &main_block_tf_writemask,
+ &cond_block_tf_writemask);
+ continue;
+
+ case TGSI_OPCODE_BGNLOOP:
+ cond_block_tf_writemask |=
+ get_block_tessfactor_writemask(info, &parse, TGSI_OPCODE_ENDIF);
+ continue;
+
+ case TGSI_OPCODE_BARRIER:
+ /* The following case must be prevented:
+ * gl_TessLevelInner = ...;
+ * barrier();
+ * if (gl_InvocationID == 1)
+ * gl_TessLevelInner = ...;
+ *
+ * If you consider disjoint code segments separated by barriers, each
+ * such segment that writes tess factor channels should write the same
+ * channels in all codepaths within that segment.
+ */
+ if (main_block_tf_writemask || cond_block_tf_writemask) {
+ /* Accumulate the result: */
+ out->tessfactors_are_def_in_all_invocs &=
+ !(cond_block_tf_writemask & ~main_block_tf_writemask);
+
+ /* Analyze the next code segment from scratch. */
+ main_block_tf_writemask = 0;
+ cond_block_tf_writemask = 0;
+ }
+ continue;
+ }
+
+ main_block_tf_writemask |= get_inst_tessfactor_writemask(info, inst);
+ }
+
+ /* Accumulate the result for the last code segment separated by a barrier. */
+ if (main_block_tf_writemask || cond_block_tf_writemask) {
+ out->tessfactors_are_def_in_all_invocs &=
+ !(cond_block_tf_writemask & ~main_block_tf_writemask);
+ }
+
+ tgsi_parse_free(&parse);
+}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_scan.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_scan.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.h 2018-01-18 21:30:28.000000000 +0000
@@ -188,6 +188,12 @@
struct tgsi_declaration_range range;
};
+struct tgsi_tessctrl_info
+{
+ /** Whether all codepaths write tess factors in all invocations. */
+ bool tessfactors_are_def_in_all_invocs;
+};
+
extern void
tgsi_scan_shader(const struct tgsi_token *tokens,
struct tgsi_shader_info *info);
@@ -198,6 +204,11 @@
unsigned max_array_id,
struct tgsi_array_info *arrays);
+void
+tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
+ const struct tgsi_shader_info *info,
+ struct tgsi_tessctrl_info *out);
+
static inline bool
tgsi_is_bindless_image_file(unsigned file)
{
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_strings.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_strings.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_strings.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_strings.c 2018-01-18 21:30:28.000000000 +0000
@@ -57,6 +57,7 @@
"SVIEW",
"BUFFER",
"MEMORY",
+ "CONSTBUF",
};
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_text.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_text.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_text.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_text.c 2018-01-18 21:30:28.000000000 +0000
@@ -1003,16 +1003,17 @@
const struct tgsi_opcode_info *info)
{
const char *cur = *pcur;
+ const char *mnemonic = tgsi_get_opcode_name(info->opcode);
/* simple case: the whole string matches the instruction name */
- if (str_match_nocase_whole(&cur, info->mnemonic)) {
+ if (str_match_nocase_whole(&cur, mnemonic)) {
*pcur = cur;
*saturate = 0;
*precise = 0;
return TRUE;
}
- if (str_match_no_case(&cur, info->mnemonic)) {
+ if (str_match_no_case(&cur, mnemonic)) {
/* the instruction has a suffix, figure it out */
if (str_match_no_case(&cur, "_SAT")) {
*pcur = cur;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_transform.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_transform.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_transform.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_transform.h 2018-01-18 21:30:28.000000000 +0000
@@ -124,6 +124,8 @@
decl.Declaration.File = TGSI_FILE_CONSTANT;
decl.Range.First = firstIdx;
decl.Range.Last = lastIdx;
+ decl.Declaration.Dimension = 1;
+ /* Dim.Index2D is already 0 */
ctx->emit_declaration(ctx, &decl);
}
@@ -231,6 +233,18 @@
}
static inline void
+tgsi_transform_src_reg_xyzw(struct tgsi_full_src_register *reg,
+ unsigned file, unsigned index)
+{
+ reg->Register.File = file;
+ reg->Register.Index = index;
+ if (file == TGSI_FILE_CONSTANT) {
+ reg->Register.Dimension = 1;
+ reg->Dimension.Index = 0;
+ }
+}
+
+static inline void
tgsi_transform_src_reg(struct tgsi_full_src_register *reg,
unsigned file, unsigned index,
unsigned swizzleX, unsigned swizzleY,
@@ -238,7 +252,11 @@
{
reg->Register.File = file;
reg->Register.Index = index;
- reg->Register.SwizzleX = swizzleX;
+ if (file == TGSI_FILE_CONSTANT) {
+ reg->Register.Dimension = 1;
+ reg->Dimension.Index = 0;
+ }
+ reg->Register.SwizzleX = swizzleX;
reg->Register.SwizzleY = swizzleY;
reg->Register.SwizzleZ = swizzleZ;
reg->Register.SwizzleW = swizzleW;
@@ -265,8 +283,7 @@
inst.Dst[0].Register.Index = dst_index;
inst.Dst[0].Register.WriteMask = dst_writemask;
inst.Instruction.NumSrcRegs = 1;
- inst.Src[0].Register.File = src0_file;
- inst.Src[0].Register.Index = src0_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index);
ctx->emit_instruction(ctx, &inst);
}
@@ -293,10 +310,8 @@
inst.Dst[0].Register.Index = dst_index;
inst.Dst[0].Register.WriteMask = dst_writemask;
inst.Instruction.NumSrcRegs = 2;
- inst.Src[0].Register.File = src0_file;
- inst.Src[0].Register.Index = src0_index;
- inst.Src[1].Register.File = src1_file;
- inst.Src[1].Register.Index = src1_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index);
+ tgsi_transform_src_reg_xyzw(&inst.Src[1], src1_file, src1_index);
inst.Src[1].Register.Negate = src1_negate;
ctx->emit_instruction(ctx, &inst);
@@ -325,12 +340,9 @@
inst.Dst[0].Register.Index = dst_index;
inst.Dst[0].Register.WriteMask = dst_writemask;
inst.Instruction.NumSrcRegs = 3;
- inst.Src[0].Register.File = src0_file;
- inst.Src[0].Register.Index = src0_index;
- inst.Src[1].Register.File = src1_file;
- inst.Src[1].Register.Index = src1_index;
- inst.Src[2].Register.File = src2_file;
- inst.Src[2].Register.Index = src2_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index);
+ tgsi_transform_src_reg_xyzw(&inst.Src[1], src1_file, src1_index);
+ tgsi_transform_src_reg_xyzw(&inst.Src[2], src2_file, src2_index);
ctx->emit_instruction(ctx, &inst);
}
@@ -356,8 +368,7 @@
inst.Dst[0].Register.Index = dst_index;
inst.Dst[0].Register.WriteMask = dst_writemask;
inst.Instruction.NumSrcRegs = 1;
- inst.Src[0].Register.File = src0_file;
- inst.Src[0].Register.Index = src0_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index);
switch (dst_writemask) {
case TGSI_WRITEMASK_X:
inst.Src[0].Register.SwizzleX = src0_swizzle;
@@ -402,10 +413,8 @@
inst.Dst[0].Register.Index = dst_index;
inst.Dst[0].Register.WriteMask = dst_writemask;
inst.Instruction.NumSrcRegs = 2;
- inst.Src[0].Register.File = src0_file;
- inst.Src[0].Register.Index = src0_index;
- inst.Src[1].Register.File = src1_file;
- inst.Src[1].Register.Index = src1_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index);
+ tgsi_transform_src_reg_xyzw(&inst.Src[1], src1_file, src1_index);
inst.Src[1].Register.Negate = src1_negate;
switch (dst_writemask) {
case TGSI_WRITEMASK_X:
@@ -458,13 +467,10 @@
inst.Dst[0].Register.Index = dst_index;
inst.Dst[0].Register.WriteMask = dst_writemask;
inst.Instruction.NumSrcRegs = 3;
- inst.Src[0].Register.File = src0_file;
- inst.Src[0].Register.Index = src0_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[0], src0_file, src0_index);
inst.Src[0].Register.Negate = src0_negate;
- inst.Src[1].Register.File = src1_file;
- inst.Src[1].Register.Index = src1_index;
- inst.Src[2].Register.File = src2_file;
- inst.Src[2].Register.Index = src2_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[1], src1_file, src1_index);
+ tgsi_transform_src_reg_xyzw(&inst.Src[2], src2_file, src2_index);
switch (dst_writemask) {
case TGSI_WRITEMASK_X:
inst.Src[0].Register.SwizzleX = src0_swizzle;
@@ -507,8 +513,7 @@
inst.Instruction.Opcode = TGSI_OPCODE_KILL_IF;
inst.Instruction.NumDstRegs = 0;
inst.Instruction.NumSrcRegs = 1;
- inst.Src[0].Register.File = src_file;
- inst.Src[0].Register.Index = src_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[0], src_file, src_index);
inst.Src[0].Register.SwizzleX =
inst.Src[0].Register.SwizzleY =
inst.Src[0].Register.SwizzleZ =
@@ -540,10 +545,8 @@
inst.Instruction.NumSrcRegs = 2;
inst.Instruction.Texture = TRUE;
inst.Texture.Texture = tex_target;
- inst.Src[0].Register.File = src_file;
- inst.Src[0].Register.Index = src_index;
- inst.Src[1].Register.File = TGSI_FILE_SAMPLER;
- inst.Src[1].Register.Index = sampler_index;
+ tgsi_transform_src_reg_xyzw(&inst.Src[0], src_file, src_index);
+ tgsi_transform_src_reg_xyzw(&inst.Src[1], TGSI_FILE_SAMPLER, sampler_index);
ctx->emit_instruction(ctx, &inst);
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_ureg.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_ureg.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_ureg.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_ureg.c 2018-01-18 21:30:28.000000000 +0000
@@ -180,8 +180,7 @@
unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
unsigned nr_array_temps;
- struct const_decl const_decls;
- struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS];
+ struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
unsigned properties[TGSI_PROPERTY_COUNT];
@@ -507,7 +506,7 @@
unsigned last,
unsigned index2D)
{
- struct const_decl *decl = &ureg->const_decls2D[index2D];
+ struct const_decl *decl = &ureg->const_decls[index2D];
assert(index2D < PIPE_MAX_CONSTANT_BUFFERS);
@@ -529,7 +528,7 @@
ureg_DECL_constant(struct ureg_program *ureg,
unsigned index)
{
- struct const_decl *decl = &ureg->const_decls;
+ struct const_decl *decl = &ureg->const_decls[0];
unsigned minconst = index, maxconst = index;
unsigned i;
@@ -579,7 +578,9 @@
assert(i < decl->nr_constant_ranges);
assert(decl->constant_range[i].first <= index);
assert(decl->constant_range[i].last >= index);
- return ureg_src_register(TGSI_FILE_CONSTANT, index);
+
+ struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, index);
+ return ureg_src_dimension(src, 0);
}
static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
@@ -1891,17 +1892,8 @@
emit_decl_memory(ureg, i);
}
- if (ureg->const_decls.nr_constant_ranges) {
- for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
- emit_decl_range(ureg,
- TGSI_FILE_CONSTANT,
- ureg->const_decls.constant_range[i].first,
- ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1);
- }
- }
-
for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- struct const_decl *decl = &ureg->const_decls2D[i];
+ struct const_decl *decl = &ureg->const_decls[i];
if (decl->nr_constant_ranges) {
uint j;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_ureg.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_ureg.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_ureg.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_ureg.h 2018-01-18 21:30:28.000000000 +0000
@@ -905,7 +905,6 @@
ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr )
{
assert(reg.File != TGSI_FILE_NULL);
- assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY);
reg.Indirect = 1;
reg.IndirectFile = addr.File;
reg.IndirectIndex = addr.Index;
@@ -917,7 +916,6 @@
ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
{
assert(reg.File != TGSI_FILE_NULL);
- assert(addr.File == TGSI_FILE_ADDRESS || addr.File == TGSI_FILE_TEMPORARY);
reg.Indirect = 1;
reg.IndirectFile = addr.File;
reg.IndirectIndex = addr.Index;
@@ -1025,10 +1023,6 @@
{
struct ureg_dst dst;
- assert(!src.Indirect ||
- (src.IndirectFile == TGSI_FILE_ADDRESS ||
- src.IndirectFile == TGSI_FILE_TEMPORARY));
-
dst.File = src.File;
dst.WriteMask = TGSI_WRITEMASK_XYZW;
dst.IndirectFile = src.IndirectFile;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_util.c mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_util.c
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_util.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_util.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,9 +27,11 @@
#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
+#include "tgsi_info.h"
#include "tgsi_parse.h"
#include "tgsi_util.h"
#include "tgsi_exec.h"
+#include "util/bitscan.h"
union pointer_hack
{
@@ -180,82 +182,80 @@
unsigned chan;
switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_ARL:
- case TGSI_OPCODE_ARR:
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_UIF:
+ case TGSI_OPCODE_EMIT:
+ case TGSI_OPCODE_ENDPRIM:
case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_MUL:
- case TGSI_OPCODE_DIV:
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_MIN:
- case TGSI_OPCODE_MAX:
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_MAD:
- case TGSI_OPCODE_LRP:
- case TGSI_OPCODE_FMA:
- case TGSI_OPCODE_FRC:
- case TGSI_OPCODE_CEIL:
- case TGSI_OPCODE_FLR:
- case TGSI_OPCODE_ROUND:
- case TGSI_OPCODE_POW:
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_SIN:
- case TGSI_OPCODE_DDX:
- case TGSI_OPCODE_DDY:
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_SGT:
- case TGSI_OPCODE_SLE:
- case TGSI_OPCODE_SNE:
- case TGSI_OPCODE_SSG:
- case TGSI_OPCODE_CMP:
- case TGSI_OPCODE_TRUNC:
- case TGSI_OPCODE_NOT:
- case TGSI_OPCODE_AND:
- case TGSI_OPCODE_OR:
- case TGSI_OPCODE_XOR:
- case TGSI_OPCODE_SAD:
- case TGSI_OPCODE_FSEQ:
- case TGSI_OPCODE_FSGE:
- case TGSI_OPCODE_FSLT:
- case TGSI_OPCODE_FSNE:
- case TGSI_OPCODE_F2I:
- case TGSI_OPCODE_IDIV:
- case TGSI_OPCODE_IMAX:
- case TGSI_OPCODE_IMIN:
- case TGSI_OPCODE_INEG:
- case TGSI_OPCODE_ISGE:
- case TGSI_OPCODE_ISHR:
- case TGSI_OPCODE_ISLT:
- case TGSI_OPCODE_F2U:
- case TGSI_OPCODE_U2F:
- case TGSI_OPCODE_UADD:
- case TGSI_OPCODE_UDIV:
- case TGSI_OPCODE_UMAD:
- case TGSI_OPCODE_UMAX:
- case TGSI_OPCODE_UMIN:
- case TGSI_OPCODE_UMOD:
- case TGSI_OPCODE_UMUL:
- case TGSI_OPCODE_USEQ:
- case TGSI_OPCODE_USGE:
- case TGSI_OPCODE_USHR:
- case TGSI_OPCODE_USLT:
- case TGSI_OPCODE_USNE:
- case TGSI_OPCODE_IMUL_HI:
- case TGSI_OPCODE_UMUL_HI:
- case TGSI_OPCODE_DDX_FINE:
- case TGSI_OPCODE_DDY_FINE:
- /* Channel-wise operations */
- read_mask = write_mask;
- break;
-
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SQRT:
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_POW: /* reads src0.x and src1.x */
+ case TGSI_OPCODE_UP2H:
+ case TGSI_OPCODE_UP2US:
+ case TGSI_OPCODE_UP4B:
+ case TGSI_OPCODE_UP4UB:
+ case TGSI_OPCODE_MEMBAR:
+ case TGSI_OPCODE_BALLOT:
read_mask = TGSI_WRITEMASK_X;
break;
- case TGSI_OPCODE_SCS:
- read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0;
+ case TGSI_OPCODE_DP2:
+ case TGSI_OPCODE_PK2H:
+ case TGSI_OPCODE_PK2US:
+ case TGSI_OPCODE_DFRACEXP:
+ case TGSI_OPCODE_F2D:
+ case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_F2U64:
+ case TGSI_OPCODE_F2I64:
+ case TGSI_OPCODE_U2I64:
+ case TGSI_OPCODE_I2I64:
+ case TGSI_OPCODE_TXQS: /* bindless handle possible */
+ case TGSI_OPCODE_RESQ: /* bindless handle possible */
+ read_mask = TGSI_WRITEMASK_XY;
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ if (src_idx == 0)
+ read_mask = TGSI_WRITEMASK_X;
+ else
+ read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */
+ break;
+
+ case TGSI_OPCODE_DP3:
+ read_mask = TGSI_WRITEMASK_XYZ;
+ break;
+
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_PK4B:
+ case TGSI_OPCODE_PK4UB:
+ case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_D2I:
+ case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_U642F:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_I642F:
+ read_mask = TGSI_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_LIT:
+ read_mask = write_mask & TGSI_WRITEMASK_YZ ?
+ TGSI_WRITEMASK_XY | TGSI_WRITEMASK_W : 0;
break;
case TGSI_OPCODE_EXP:
@@ -263,78 +263,158 @@
read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
break;
- case TGSI_OPCODE_DP2A:
- read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY;
+ case TGSI_OPCODE_DST:
+ if (src_idx == 0)
+ read_mask = TGSI_WRITEMASK_YZ;
+ else
+ read_mask = TGSI_WRITEMASK_YW;
break;
- case TGSI_OPCODE_DP2:
- read_mask = TGSI_WRITEMASK_XY;
+ case TGSI_OPCODE_DLDEXP:
+ if (src_idx == 0) {
+ read_mask = write_mask;
+ } else {
+ read_mask =
+ (write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0) |
+ (write_mask & TGSI_WRITEMASK_ZW ? TGSI_WRITEMASK_Z : 0);
+ }
break;
- case TGSI_OPCODE_DP3:
- read_mask = TGSI_WRITEMASK_XYZ;
+ case TGSI_OPCODE_READ_INVOC:
+ if (src_idx == 0)
+ read_mask = write_mask;
+ else
+ read_mask = TGSI_WRITEMASK_X;
break;
- case TGSI_OPCODE_DP4:
- read_mask = TGSI_WRITEMASK_XYZW;
- break;
-
- case TGSI_OPCODE_DPH:
- read_mask = src_idx == 0 ? TGSI_WRITEMASK_XYZ : TGSI_WRITEMASK_XYZW;
+ case TGSI_OPCODE_FBFETCH:
+ read_mask = 0; /* not a real register read */
break;
case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TEX_LZ:
+ case TGSI_OPCODE_TXF_LZ:
+ case TGSI_OPCODE_TXF:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
- if (src_idx == 0) {
- /* Note that the SHADOW variants use the Z component too */
- switch (inst->Texture.Texture) {
- case TGSI_TEXTURE_1D:
- read_mask = TGSI_WRITEMASK_X;
- break;
- case TGSI_TEXTURE_SHADOW1D:
- read_mask = TGSI_WRITEMASK_XZ;
- break;
- case TGSI_TEXTURE_1D_ARRAY:
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
- read_mask = TGSI_WRITEMASK_XY;
- break;
- case TGSI_TEXTURE_SHADOW1D_ARRAY:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_2D_ARRAY:
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
- case TGSI_TEXTURE_2D_MSAA:
- read_mask = TGSI_WRITEMASK_XYZ;
- break;
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- case TGSI_TEXTURE_CUBE_ARRAY:
- case TGSI_TEXTURE_SHADOWCUBE:
- case TGSI_TEXTURE_2D_ARRAY_MSAA:
- case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
- read_mask = TGSI_WRITEMASK_XYZW;
- break;
- default:
- assert(0);
- read_mask = 0;
- }
+ case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TEX2:
+ case TGSI_OPCODE_TXB2:
+ case TGSI_OPCODE_TXL2:
+ case TGSI_OPCODE_LODQ:
+ case TGSI_OPCODE_TG4: {
+ unsigned dim_layer =
+ tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
+ unsigned dim_layer_shadow, dim;
+
+ /* Add shadow. */
+ if (tgsi_is_shadow_target(inst->Texture.Texture)) {
+ dim_layer_shadow = dim_layer + 1;
+ if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D)
+ dim_layer_shadow = 3;
+ } else {
+ dim_layer_shadow = dim_layer;
+ }
- if (inst->Instruction.Opcode != TGSI_OPCODE_TEX) {
+ /* Remove layer. */
+ if (tgsi_is_array_sampler(inst->Texture.Texture))
+ dim = dim_layer - 1;
+ else
+ dim = dim_layer;
+
+ read_mask = TGSI_WRITEMASK_XY; /* bindless handle in the last operand */
+
+ switch (src_idx) {
+ case 0:
+ if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ)
+ read_mask = u_bit_consecutive(0, dim);
+ else
+ read_mask = u_bit_consecutive(0, dim_layer_shadow) & 0xf;
+
+ if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D)
+ read_mask &= ~TGSI_WRITEMASK_Y;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXF ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXP)
read_mask |= TGSI_WRITEMASK_W;
- }
+ break;
+
+ case 1:
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
+ read_mask = u_bit_consecutive(0, dim);
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TG4)
+ read_mask = TGSI_WRITEMASK_X;
+ break;
+
+ case 2:
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
+ read_mask = u_bit_consecutive(0, dim);
+ break;
+ }
+ break;
+ }
+
+ case TGSI_OPCODE_LOAD:
+ if (src_idx == 0) {
+ read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */
+ } else {
+ unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);
+ read_mask = u_bit_consecutive(0, dim);
+ }
+ break;
+
+ case TGSI_OPCODE_STORE:
+ if (src_idx == 0) {
+ unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);
+ read_mask = u_bit_consecutive(0, dim);
} else {
- /* A safe approximation */
read_mask = TGSI_WRITEMASK_XYZW;
}
break;
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ if (src_idx == 0) {
+ read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */
+ } else if (src_idx == 1) {
+ unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture);
+ read_mask = u_bit_consecutive(0, dim);
+ } else {
+ read_mask = TGSI_WRITEMASK_XYZW;
+ }
+ break;
+
+ case TGSI_OPCODE_INTERP_CENTROID:
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ case TGSI_OPCODE_INTERP_OFFSET:
+ if (src_idx == 0)
+ read_mask = write_mask;
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET)
+ read_mask = TGSI_WRITEMASK_XY; /* offset */
+ else
+ read_mask = TGSI_WRITEMASK_X; /* sample */
+ break;
+
default:
- /* Assume all channels are read */
- read_mask = TGSI_WRITEMASK_XYZW;
+ if (tgsi_get_opcode_info(inst->Instruction.Opcode)->output_mode ==
+ TGSI_OUTPUT_COMPONENTWISE)
+ read_mask = write_mask;
+ else
+ read_mask = TGSI_WRITEMASK_XYZW; /* assume all channels are read */
break;
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_util.h mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_util.h
--- mesa-17.2.4/src/gallium/auxiliary/tgsi/tgsi_util.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/tgsi/tgsi_util.h 2018-01-18 21:30:28.000000000 +0000
@@ -98,6 +98,18 @@
target == TGSI_TEXTURE_2D_ARRAY_MSAA);
}
+static inline bool
+tgsi_is_array_sampler(unsigned target)
+{
+ return target == TGSI_TEXTURE_1D_ARRAY ||
+ target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
+ target == TGSI_TEXTURE_2D_ARRAY ||
+ target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
+ target == TGSI_TEXTURE_CUBE_ARRAY ||
+ target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA;
+}
+
#if defined __cplusplus
}
#endif
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_blitter.c mesa-17.3.3/src/gallium/auxiliary/util/u_blitter.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_blitter.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_blitter.c 2018-01-18 21:30:28.000000000 +0000
@@ -68,7 +68,9 @@
/* Constant state objects. */
/* Vertex shaders. */
void *vs; /**< Vertex shader which passes {pos, generic} to the output.*/
- void *vs_pos_only[4]; /**< Vertex shader which passes pos to the output.*/
+ void *vs_nogeneric;
+ void *vs_pos_only[4]; /**< Vertex shader which passes pos to the output
+ for clear_buffer/copy_buffer.*/
void *vs_layered; /**< Vertex shader which sets LAYER = INSTANCEID. */
/* Fragment shaders. */
@@ -123,23 +125,20 @@
/* Rasterizer state. */
void *rs_state, *rs_state_scissor, *rs_discard_state;
- /* Viewport state. */
- struct pipe_viewport_state viewport;
-
/* Destination surface dimensions. */
unsigned dst_width;
unsigned dst_height;
- boolean has_geometry_shader;
- boolean has_tessellation;
- boolean has_layered;
- boolean has_stream_out;
- boolean has_stencil_export;
- boolean has_texture_multisample;
- boolean has_tex_lz;
- boolean has_txf;
- boolean cube_as_2darray;
- boolean cached_all_shaders;
+ bool has_geometry_shader;
+ bool has_tessellation;
+ bool has_layered;
+ bool has_stream_out;
+ bool has_stencil_export;
+ bool has_texture_multisample;
+ bool has_tex_lz;
+ bool has_txf;
+ bool cube_as_2darray;
+ bool cached_all_shaders;
/* The Draw module overrides these functions.
* Always create the blitter before Draw. */
@@ -147,10 +146,6 @@
void (*delete_fs_state)(struct pipe_context *, void *);
};
-static struct pipe_surface *
-util_blitter_get_next_surface_layer(struct pipe_context *pipe,
- struct pipe_surface *surf);
-
struct blitter_context *util_blitter_create(struct pipe_context *pipe)
{
struct blitter_context_priv *ctx;
@@ -167,7 +162,6 @@
ctx->base.pipe = pipe;
ctx->base.draw_rectangle = util_blitter_draw_rectangle;
- ctx->base.get_next_surface_layer = util_blitter_get_next_surface_layer;
ctx->bind_fs_state = pipe->bind_fs_state;
ctx->delete_fs_state = pipe->delete_fs_state;
@@ -349,15 +343,16 @@
ctx->vs_pos_only[index] =
util_make_vertex_passthrough_shader_with_so(pipe, 1, semantic_names,
- semantic_indices, FALSE,
- &so);
+ semantic_indices, false,
+ false, &so);
}
pipe->bind_vs_state(pipe, ctx->vs_pos_only[index]);
}
-static void bind_vs_passthrough(struct blitter_context_priv *ctx)
+static void *get_vs_passthrough_pos_generic(struct blitter_context *blitter)
{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
if (!ctx->vs) {
@@ -366,21 +361,37 @@
const uint semantic_indices[] = { 0, 0 };
ctx->vs =
util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
- semantic_indices, FALSE);
+ semantic_indices, false);
}
+ return ctx->vs;
+}
+
+static void *get_vs_passthrough_pos(struct blitter_context *blitter)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ struct pipe_context *pipe = ctx->base.pipe;
+
+ if (!ctx->vs_nogeneric) {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION };
+ const uint semantic_indices[] = { 0 };
- pipe->bind_vs_state(pipe, ctx->vs);
+ ctx->vs_nogeneric =
+ util_make_vertex_passthrough_shader(pipe, 1,
+ semantic_names,
+ semantic_indices, false);
+ }
+ return ctx->vs_nogeneric;
}
-static void bind_vs_layered(struct blitter_context_priv *ctx)
+static void *get_vs_layered(struct blitter_context *blitter)
{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
if (!ctx->vs_layered) {
ctx->vs_layered = util_make_layered_clear_vertex_shader(pipe);
}
-
- pipe->bind_vs_state(pipe, ctx->vs_layered);
+ return ctx->vs_layered;
}
static void bind_fs_empty(struct blitter_context_priv *ctx)
@@ -403,7 +414,7 @@
assert(!ctx->cached_all_shaders);
ctx->fs_write_one_cbuf =
util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT, FALSE);
+ TGSI_INTERPOLATE_CONSTANT, false);
}
ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
@@ -417,7 +428,7 @@
assert(!ctx->cached_all_shaders);
ctx->fs_write_all_cbufs =
util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT, TRUE);
+ TGSI_INTERPOLATE_CONSTANT, true);
}
ctx->bind_fs_state(pipe, ctx->fs_write_all_cbufs);
@@ -449,6 +460,8 @@
pipe->delete_rasterizer_state(pipe, ctx->rs_discard_state);
if (ctx->vs)
pipe->delete_vs_state(pipe, ctx->vs);
+ if (ctx->vs_nogeneric)
+ pipe->delete_vs_state(pipe, ctx->vs_nogeneric);
for (i = 0; i < 4; i++)
if (ctx->vs_pos_only[i])
pipe->delete_vs_state(pipe, ctx->vs_pos_only[i]);
@@ -508,7 +521,7 @@
}
void util_blitter_set_texture_multisample(struct blitter_context *blitter,
- boolean supported)
+ bool supported)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
@@ -521,7 +534,7 @@
_debug_printf("u_blitter:%i: Caught recursion. This is a driver bug.\n",
__LINE__);
}
- blitter->running = TRUE;
+ blitter->running = true;
blitter->pipe->set_active_query_state(blitter->pipe, false);
}
@@ -532,14 +545,13 @@
_debug_printf("u_blitter:%i: Caught recursion. This is a driver bug.\n",
__LINE__);
}
- blitter->running = FALSE;
+ blitter->running = false;
blitter->pipe->set_active_query_state(blitter->pipe, true);
}
static void blitter_check_saved_vertex_states(struct blitter_context_priv *ctx)
{
- assert(ctx->base.saved_velem_state != INVALID_PTR);
assert(ctx->base.saved_vs != INVALID_PTR);
assert(!ctx->has_geometry_shader || ctx->base.saved_gs != INVALID_PTR);
assert(!ctx->has_tessellation || ctx->base.saved_tcs != INVALID_PTR);
@@ -555,13 +567,17 @@
unsigned i;
/* Vertex buffer. */
- pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1,
- &ctx->base.saved_vertex_buffer);
- pipe_vertex_buffer_unreference(&ctx->base.saved_vertex_buffer);
+ if (ctx->base.saved_vertex_buffer.buffer.resource) {
+ pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1,
+ &ctx->base.saved_vertex_buffer);
+ pipe_vertex_buffer_unreference(&ctx->base.saved_vertex_buffer);
+ }
/* Vertex elements. */
- pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state);
- ctx->base.saved_velem_state = INVALID_PTR;
+ if (ctx->base.saved_velem_state != INVALID_PTR) {
+ pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state);
+ ctx->base.saved_velem_state = INVALID_PTR;
+ }
/* Vertex shader. */
pipe->bind_vs_state(pipe, ctx->base.saved_vs);
@@ -627,14 +643,16 @@
/* Sample mask. */
if (ctx->base.is_sample_mask_saved) {
pipe->set_sample_mask(pipe, ctx->base.saved_sample_mask);
- ctx->base.is_sample_mask_saved = FALSE;
+ ctx->base.is_sample_mask_saved = false;
}
/* Miscellaneous states. */
/* XXX check whether these are saved and whether they need to be restored
* (depending on the operation) */
pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref);
- pipe->set_viewport_states(pipe, 0, 1, &ctx->base.saved_viewport);
+
+ if (!blitter->skip_viewport_restore)
+ pipe->set_viewport_states(pipe, 0, 1, &ctx->base.saved_viewport);
}
static void blitter_check_saved_fb_state(struct blitter_context_priv *ctx)
@@ -647,7 +665,7 @@
struct pipe_context *pipe = ctx->base.pipe;
if (ctx->base.saved_render_cond_query) {
- pipe->render_condition(pipe, NULL, FALSE, 0);
+ pipe->render_condition(pipe, NULL, false, 0);
}
}
@@ -735,102 +753,55 @@
ctx->vertices[i][0][2] = depth; /*z*/
/* viewport */
- ctx->viewport.scale[0] = 0.5f * ctx->dst_width;
- ctx->viewport.scale[1] = 0.5f * ctx->dst_height;
- ctx->viewport.scale[2] = 1.0f;
- ctx->viewport.translate[0] = 0.5f * ctx->dst_width;
- ctx->viewport.translate[1] = 0.5f * ctx->dst_height;
- ctx->viewport.translate[2] = 0.0f;
- ctx->base.pipe->set_viewport_states(ctx->base.pipe, 0, 1, &ctx->viewport);
+ struct pipe_viewport_state viewport;
+ viewport.scale[0] = 0.5f * ctx->dst_width;
+ viewport.scale[1] = 0.5f * ctx->dst_height;
+ viewport.scale[2] = 1.0f;
+ viewport.translate[0] = 0.5f * ctx->dst_width;
+ viewport.translate[1] = 0.5f * ctx->dst_height;
+ viewport.translate[2] = 0.0f;
+ ctx->base.pipe->set_viewport_states(ctx->base.pipe, 0, 1, &viewport);
}
static void blitter_set_clear_color(struct blitter_context_priv *ctx,
- const union pipe_color_union *color)
+ const float color[4])
{
int i;
if (color) {
- for (i = 0; i < 4; i++) {
- uint32_t *uiverts = (uint32_t *)ctx->vertices[i][1];
- uiverts[0] = color->ui[0];
- uiverts[1] = color->ui[1];
- uiverts[2] = color->ui[2];
- uiverts[3] = color->ui[3];
- }
+ for (i = 0; i < 4; i++)
+ memcpy(&ctx->vertices[i][1][0], color, sizeof(uint32_t) * 4);
} else {
- for (i = 0; i < 4; i++) {
- ctx->vertices[i][1][0] = 0;
- ctx->vertices[i][1][1] = 0;
- ctx->vertices[i][1][2] = 0;
- ctx->vertices[i][1][3] = 0;
- }
+ for (i = 0; i < 4; i++)
+ memset(&ctx->vertices[i][1][0], 0, sizeof(uint32_t) * 4);
}
}
static void get_texcoords(struct pipe_sampler_view *src,
unsigned src_width0, unsigned src_height0,
- int x1, int y1, int x2, int y2, bool uses_txf,
- float out[4])
+ int x1, int y1, int x2, int y2,
+ float layer, unsigned sample,
+ bool uses_txf, union blitter_attrib *out)
{
unsigned level = src->u.tex.first_level;
- boolean normalized = !uses_txf &&
+ bool normalized = !uses_txf &&
src->target != PIPE_TEXTURE_RECT &&
src->texture->nr_samples <= 1;
if (normalized) {
- out[0] = x1 / (float)u_minify(src_width0, level);
- out[1] = y1 / (float)u_minify(src_height0, level);
- out[2] = x2 / (float)u_minify(src_width0, level);
- out[3] = y2 / (float)u_minify(src_height0, level);
+ out->texcoord.x1 = x1 / (float)u_minify(src_width0, level);
+ out->texcoord.y1 = y1 / (float)u_minify(src_height0, level);
+ out->texcoord.x2 = x2 / (float)u_minify(src_width0, level);
+ out->texcoord.y2 = y2 / (float)u_minify(src_height0, level);
} else {
- out[0] = (float) x1;
- out[1] = (float) y1;
- out[2] = (float) x2;
- out[3] = (float) y2;
+ out->texcoord.x1 = x1;
+ out->texcoord.y1 = y1;
+ out->texcoord.x2 = x2;
+ out->texcoord.y2 = y2;
}
-}
-
-static void set_texcoords_in_vertices(const float coord[4],
- float *out, unsigned stride)
-{
- out[0] = coord[0]; /*t0.s*/
- out[1] = coord[1]; /*t0.t*/
- out += stride;
- out[0] = coord[2]; /*t1.s*/
- out[1] = coord[1]; /*t1.t*/
- out += stride;
- out[0] = coord[2]; /*t2.s*/
- out[1] = coord[3]; /*t2.t*/
- out += stride;
- out[0] = coord[0]; /*t3.s*/
- out[1] = coord[3]; /*t3.t*/
-}
-static void blitter_set_texcoords(struct blitter_context_priv *ctx,
- struct pipe_sampler_view *src,
- unsigned src_width0, unsigned src_height0,
- float layer, unsigned sample,
- int x1, int y1, int x2, int y2,
- bool uses_txf)
-{
- unsigned i;
- float coord[4];
- float face_coord[4][2];
-
- get_texcoords(src, src_width0, src_height0, x1, y1, x2, y2, uses_txf,
- coord);
-
- if (src->target == PIPE_TEXTURE_CUBE ||
- src->target == PIPE_TEXTURE_CUBE_ARRAY) {
- set_texcoords_in_vertices(coord, &face_coord[0][0], 2);
- util_map_texcoords2d_onto_cubemap((unsigned)layer % 6,
- /* pointer, stride in floats */
- &face_coord[0][0], 2,
- &ctx->vertices[0][1][0], 8,
- FALSE);
- } else {
- set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
- }
+ out->texcoord.z = 0;
+ out->texcoord.w = 0;
/* Set the layer. */
switch (src->target) {
@@ -841,32 +812,25 @@
if (!uses_txf)
r /= u_minify(src->texture->depth0, src->u.tex.first_level);
- for (i = 0; i < 4; i++)
- ctx->vertices[i][1][2] = r; /*r*/
+ out->texcoord.z = r;
}
break;
case PIPE_TEXTURE_1D_ARRAY:
- for (i = 0; i < 4; i++)
- ctx->vertices[i][1][1] = (float) layer; /*t*/
+ out->texcoord.y1 = out->texcoord.y2 = layer;
break;
case PIPE_TEXTURE_2D_ARRAY:
- for (i = 0; i < 4; i++) {
- ctx->vertices[i][1][2] = (float) layer; /*r*/
- ctx->vertices[i][1][3] = (float) sample; /*q*/
- }
+ out->texcoord.z = layer;
+ out->texcoord.w = sample;
break;
case PIPE_TEXTURE_CUBE_ARRAY:
- for (i = 0; i < 4; i++)
- ctx->vertices[i][1][3] = (float) ((unsigned)layer / 6); /*w*/
+ out->texcoord.w = (unsigned)layer / 6;
break;
case PIPE_TEXTURE_2D:
- for (i = 0; i < 4; i++) {
- ctx->vertices[i][1][3] = (float) sample; /*r*/
- }
+ out->texcoord.w = sample;
break;
default:;
@@ -880,6 +844,22 @@
ctx->dst_height = height;
}
+static void set_texcoords_in_vertices(const union blitter_attrib *attrib,
+ float *out, unsigned stride)
+{
+ out[0] = attrib->texcoord.x1;
+ out[1] = attrib->texcoord.y1;
+ out += stride;
+ out[0] = attrib->texcoord.x2;
+ out[1] = attrib->texcoord.y1;
+ out += stride;
+ out[0] = attrib->texcoord.x2;
+ out[1] = attrib->texcoord.y2;
+ out += stride;
+ out[0] = attrib->texcoord.x1;
+ out[1] = attrib->texcoord.y2;
+}
+
static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
enum pipe_format src_format,
enum pipe_format dst_format,
@@ -890,7 +870,8 @@
bool use_txf)
{
struct pipe_context *pipe = ctx->base.pipe;
- unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_nr_samples);
+ enum tgsi_texture_type tgsi_tex =
+ util_pipe_tex_to_tgsi_tex(target, src_nr_samples);
enum tgsi_return_type stype;
enum tgsi_return_type dtype;
unsigned type;
@@ -1004,7 +985,7 @@
/* Create the fragment shader on-demand. */
if (!*shader) {
- unsigned tgsi_tex;
+ enum tgsi_texture_type tgsi_tex;
assert(!ctx->cached_all_shaders);
tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples);
*shader = util_make_fs_blit_msaa_depth(pipe, tgsi_tex);
@@ -1021,7 +1002,7 @@
/* Create the fragment shader on-demand. */
if (!*shader) {
- unsigned tgsi_tex;
+ enum tgsi_texture_type tgsi_tex;
assert(!ctx->cached_all_shaders);
tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
*shader =
@@ -1049,7 +1030,7 @@
/* Create the fragment shader on-demand. */
if (!*shader) {
- unsigned tgsi_tex;
+ enum tgsi_texture_type tgsi_tex;
assert(!ctx->cached_all_shaders);
tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples);
*shader = util_make_fs_blit_msaa_depthstencil(pipe, tgsi_tex);
@@ -1066,7 +1047,7 @@
/* Create the fragment shader on-demand. */
if (!*shader) {
- unsigned tgsi_tex;
+ enum tgsi_texture_type tgsi_tex;
assert(!ctx->cached_all_shaders);
tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
*shader =
@@ -1095,7 +1076,7 @@
/* Create the fragment shader on-demand. */
if (!*shader) {
- unsigned tgsi_tex;
+ enum tgsi_texture_type tgsi_tex;
assert(!ctx->cached_all_shaders);
tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples);
*shader = util_make_fs_blit_msaa_stencil(pipe, tgsi_tex);
@@ -1112,7 +1093,7 @@
/* Create the fragment shader on-demand. */
if (!*shader) {
- unsigned tgsi_tex;
+ enum tgsi_texture_type tgsi_tex;
assert(!ctx->cached_all_shaders);
tgsi_tex = util_pipe_tex_to_tgsi_tex(target, 0);
*shader =
@@ -1138,7 +1119,7 @@
struct pipe_context *pipe = blitter->pipe;
struct pipe_screen *screen = pipe->screen;
unsigned samples, j, f, target, max_samples, use_txf;
- boolean has_arraytex, has_cubearraytex;
+ bool has_arraytex, has_cubearraytex;
max_samples = ctx->has_texture_multisample ? 2 : 1;
has_arraytex = screen->get_param(screen,
@@ -1225,28 +1206,22 @@
ctx->fs_write_one_cbuf =
util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT, FALSE);
+ TGSI_INTERPOLATE_CONSTANT, false);
ctx->fs_write_all_cbufs =
util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT, TRUE);
+ TGSI_INTERPOLATE_CONSTANT, true);
- ctx->cached_all_shaders = TRUE;
+ ctx->cached_all_shaders = true;
}
static void blitter_set_common_draw_rect_state(struct blitter_context_priv *ctx,
- boolean scissor,
- boolean vs_layered)
+ bool scissor)
{
struct pipe_context *pipe = ctx->base.pipe;
pipe->bind_rasterizer_state(pipe, scissor ? ctx->rs_state_scissor
: ctx->rs_state);
- if (vs_layered)
- bind_vs_layered(ctx);
- else
- bind_vs_passthrough(ctx);
-
if (ctx->has_geometry_shader)
pipe->bind_gs_state(pipe, NULL);
if (ctx->has_tessellation) {
@@ -1258,6 +1233,8 @@
}
static void blitter_draw(struct blitter_context_priv *ctx,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
int x1, int y1, int x2, int y2, float depth,
unsigned num_instances)
{
@@ -1275,31 +1252,44 @@
u_upload_unmap(pipe->stream_uploader);
pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
+ pipe->bind_vertex_elements_state(pipe, vertex_elements_cso);
+ pipe->bind_vs_state(pipe, get_vs(&ctx->base));
util_draw_arrays_instanced(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, 4,
0, num_instances);
pipe_resource_reference(&vb.buffer.resource, NULL);
}
void util_blitter_draw_rectangle(struct blitter_context *blitter,
- int x1, int y1, int x2, int y2, float depth,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
+ int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances,
enum blitter_attrib_type type,
- const union pipe_color_union *attrib)
+ const union blitter_attrib *attrib)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ unsigned i;
switch (type) {
case UTIL_BLITTER_ATTRIB_COLOR:
- blitter_set_clear_color(ctx, attrib);
+ blitter_set_clear_color(ctx, attrib->color);
break;
- case UTIL_BLITTER_ATTRIB_TEXCOORD:
- set_texcoords_in_vertices(attrib->f, &ctx->vertices[0][1][0], 8);
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW:
+ for (i = 0; i < 4; i++) {
+ ctx->vertices[i][1][2] = attrib->texcoord.z;
+ ctx->vertices[i][1][3] = attrib->texcoord.w;
+ }
+ /* fall through */
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XY:
+ set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8);
break;
default:;
}
- blitter_draw(ctx, x1, y1, x2, y2, depth, 1);
+ blitter_draw(ctx, vertex_elements_cso, get_vs, x1, y1, x2, y2, depth,
+ num_instances);
}
static void *get_clear_blend_state(struct blitter_context_priv *ctx,
@@ -1393,18 +1383,34 @@
sr.ref_value[0] = stencil & 0xff;
pipe->set_stencil_ref(pipe, &sr);
- pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
bind_fs_write_all_cbufs(ctx);
+ union blitter_attrib attrib;
+ memcpy(attrib.color, color->ui, sizeof(color->ui));
+
+ bool pass_generic = (clear_buffers & PIPE_CLEAR_COLOR) != 0;
+ enum blitter_attrib_type type = pass_generic ? UTIL_BLITTER_ATTRIB_COLOR :
+ UTIL_BLITTER_ATTRIB_NONE;
+
if (num_layers > 1 && ctx->has_layered) {
- blitter_set_common_draw_rect_state(ctx, FALSE, TRUE);
- blitter_set_clear_color(ctx, color);
- blitter_draw(ctx, 0, 0, width, height, depth, num_layers);
- }
- else {
- blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
- blitter->draw_rectangle(blitter, 0, 0, width, height, (float) depth,
- UTIL_BLITTER_ATTRIB_COLOR, color);
+ blitter_get_vs_func get_vs = get_vs_layered;
+
+ blitter_set_common_draw_rect_state(ctx, false);
+ blitter->draw_rectangle(blitter, ctx->velem_state, get_vs,
+ 0, 0, width, height,
+ (float) depth, num_layers, type, &attrib);
+ } else {
+ blitter_get_vs_func get_vs;
+
+ if (pass_generic)
+ get_vs = get_vs_passthrough_pos_generic;
+ else
+ get_vs = get_vs_passthrough_pos;
+
+ blitter_set_common_draw_rect_state(ctx, false);
+ blitter->draw_rectangle(blitter, ctx->velem_state, get_vs,
+ 0, 0, width, height,
+ (float) depth, 1, type, &attrib);
}
util_blitter_restore_vertex_states(blitter);
@@ -1489,12 +1495,12 @@
src_templ->swizzle_a = PIPE_SWIZZLE_W;
}
-static boolean is_blit_generic_supported(struct blitter_context *blitter,
- const struct pipe_resource *dst,
- enum pipe_format dst_format,
- const struct pipe_resource *src,
- enum pipe_format src_format,
- unsigned mask)
+static bool is_blit_generic_supported(struct blitter_context *blitter,
+ const struct pipe_resource *dst,
+ enum pipe_format dst_format,
+ const struct pipe_resource *src,
+ enum pipe_format src_format,
+ unsigned mask)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_screen *screen = ctx->base.pipe->screen;
@@ -1503,12 +1509,12 @@
unsigned bind;
const struct util_format_description *desc =
util_format_description(dst_format);
- boolean dst_has_stencil = util_format_has_stencil(desc);
+ bool dst_has_stencil = util_format_has_stencil(desc);
/* Stencil export must be supported for stencil copy. */
if ((mask & PIPE_MASK_S) && dst_has_stencil &&
!ctx->has_stencil_export) {
- return FALSE;
+ return false;
}
if (dst_has_stencil || util_format_has_depth(desc))
@@ -1518,18 +1524,18 @@
if (!screen->is_format_supported(screen, dst_format, dst->target,
dst->nr_samples, bind)) {
- return FALSE;
+ return false;
}
}
if (src) {
if (src->nr_samples > 1 && !ctx->has_texture_multisample) {
- return FALSE;
+ return false;
}
if (!screen->is_format_supported(screen, src_format, src->target,
src->nr_samples, PIPE_BIND_SAMPLER_VIEW)) {
- return FALSE;
+ return false;
}
/* Check stencil sampler support for stencil copy. */
@@ -1543,25 +1549,25 @@
!screen->is_format_supported(screen, stencil_format,
src->target, src->nr_samples,
PIPE_BIND_SAMPLER_VIEW)) {
- return FALSE;
+ return false;
}
}
}
}
- return TRUE;
+ return true;
}
-boolean util_blitter_is_copy_supported(struct blitter_context *blitter,
- const struct pipe_resource *dst,
- const struct pipe_resource *src)
+bool util_blitter_is_copy_supported(struct blitter_context *blitter,
+ const struct pipe_resource *dst,
+ const struct pipe_resource *src)
{
return is_blit_generic_supported(blitter, dst, dst->format,
src, src->format, PIPE_MASK_RGBAZS);
}
-boolean util_blitter_is_blit_supported(struct blitter_context *blitter,
- const struct pipe_blit_info *info)
+bool util_blitter_is_blit_supported(struct blitter_context *blitter,
+ const struct pipe_blit_info *info)
{
return is_blit_generic_supported(blitter,
info->dst.resource, info->dst.format,
@@ -1601,12 +1607,51 @@
util_blitter_blit_generic(blitter, dst_view, &dstbox,
src_view, srcbox, src->width0, src->height0,
PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
- FALSE);
+ false);
pipe_surface_reference(&dst_view, NULL);
pipe_sampler_view_reference(&src_view, NULL);
}
+static void
+blitter_draw_tex(struct blitter_context_priv *ctx,
+ int dst_x1, int dst_y1, int dst_x2, int dst_y2,
+ struct pipe_sampler_view *src,
+ unsigned src_width0, unsigned src_height0,
+ int src_x1, int src_y1, int src_x2, int src_y2,
+ float layer, unsigned sample,
+ bool uses_txf, enum blitter_attrib_type type)
+{
+ union blitter_attrib coord;
+ blitter_get_vs_func get_vs = get_vs_passthrough_pos_generic;
+
+ get_texcoords(src, src_width0, src_height0,
+ src_x1, src_y1, src_x2, src_y2, layer, sample,
+ uses_txf, &coord);
+
+ if (src->target == PIPE_TEXTURE_CUBE ||
+ src->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ float face_coord[4][2];
+
+ set_texcoords_in_vertices(&coord, &face_coord[0][0], 2);
+ util_map_texcoords2d_onto_cubemap((unsigned)layer % 6,
+ /* pointer, stride in floats */
+ &face_coord[0][0], 2,
+ &ctx->vertices[0][1][0], 8,
+ false);
+ for (unsigned i = 0; i < 4; i++)
+ ctx->vertices[i][1][3] = coord.texcoord.w;
+
+ /* Cubemaps don't use draw_rectangle. */
+ blitter_draw(ctx, ctx->velem_state, get_vs,
+ dst_x1, dst_y1, dst_x2, dst_y2, 0, 1);
+ } else {
+ ctx->base.draw_rectangle(&ctx->base, ctx->velem_state, get_vs,
+ dst_x1, dst_y1, dst_x2, dst_y2,
+ 0, 1, type, &coord);
+ }
+}
+
static void do_blits(struct blitter_context_priv *ctx,
struct pipe_surface *dst,
const struct pipe_box *dstbox,
@@ -1634,18 +1679,6 @@
src_target == PIPE_TEXTURE_2D ||
src_target == PIPE_TEXTURE_RECT) &&
src_samples <= 1) {
- /* Draw the quad with the draw_rectangle callback. */
-
- /* Set texture coordinates. - use a pipe color union
- * for interface purposes.
- * XXX pipe_color_union is a wrong name since we use that to set
- * texture coordinates too.
- */
- union pipe_color_union coord;
- get_texcoords(src, src_width0, src_height0, srcbox->x, srcbox->y,
- srcbox->x+srcbox->width, srcbox->y+srcbox->height,
- uses_txf, coord.f);
-
/* Set framebuffer state. */
if (is_zsbuf) {
fb_state.zsbuf = dst;
@@ -1656,10 +1689,12 @@
/* Draw. */
pipe->set_sample_mask(pipe, ~0);
- ctx->base.draw_rectangle(&ctx->base, dstbox->x, dstbox->y,
- dstbox->x + dstbox->width,
- dstbox->y + dstbox->height, 0,
- UTIL_BLITTER_ATTRIB_TEXCOORD, &coord);
+ blitter_draw_tex(ctx, dstbox->x, dstbox->y,
+ dstbox->x + dstbox->width,
+ dstbox->y + dstbox->height,
+ src, src_width0, src_height0, srcbox->x, srcbox->y,
+ srcbox->x + srcbox->width, srcbox->y + srcbox->height,
+ 0, 0, uses_txf, UTIL_BLITTER_ATTRIB_TEXCOORD_XY);
} else {
/* Draw the quad with the generic codepath. */
int dst_z;
@@ -1703,33 +1738,35 @@
for (i = 0; i <= max_sample; i++) {
pipe->set_sample_mask(pipe, 1 << i);
- blitter_set_texcoords(ctx, src, src_width0, src_height0,
- srcbox->z + src_z,
- i, srcbox->x, srcbox->y,
- srcbox->x + srcbox->width,
- srcbox->y + srcbox->height, uses_txf);
- blitter_draw(ctx, dstbox->x, dstbox->y,
- dstbox->x + dstbox->width,
- dstbox->y + dstbox->height, 0, 1);
+ blitter_draw_tex(ctx, dstbox->x, dstbox->y,
+ dstbox->x + dstbox->width,
+ dstbox->y + dstbox->height,
+ src, src_width0, src_height0,
+ srcbox->x, srcbox->y,
+ srcbox->x + srcbox->width,
+ srcbox->y + srcbox->height,
+ srcbox->z + src_z, i, uses_txf,
+ UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW);
}
} else {
/* Normal copy, MSAA upsampling, or MSAA resolve. */
pipe->set_sample_mask(pipe, ~0);
- blitter_set_texcoords(ctx, src, src_width0, src_height0,
- srcbox->z + src_z, 0,
- srcbox->x, srcbox->y,
- srcbox->x + srcbox->width,
- srcbox->y + srcbox->height, uses_txf);
- blitter_draw(ctx, dstbox->x, dstbox->y,
- dstbox->x + dstbox->width,
- dstbox->y + dstbox->height, 0, 1);
+ blitter_draw_tex(ctx, dstbox->x, dstbox->y,
+ dstbox->x + dstbox->width,
+ dstbox->y + dstbox->height,
+ src, src_width0, src_height0,
+ srcbox->x, srcbox->y,
+ srcbox->x + srcbox->width,
+ srcbox->y + srcbox->height,
+ srcbox->z + src_z, 0, uses_txf,
+ UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW);
}
/* Get the next surface or (if this is the last iteration)
* just unreference the last one. */
old = dst;
if (dst_z < dstbox->depth-1) {
- dst = ctx->base.get_next_surface_layer(ctx->base.pipe, dst);
+ dst = util_blitter_get_next_surface_layer(ctx->base.pipe, dst);
}
if (dst_z) {
pipe_surface_reference(&old, NULL);
@@ -1746,15 +1783,15 @@
unsigned src_width0, unsigned src_height0,
unsigned mask, unsigned filter,
const struct pipe_scissor_state *scissor,
- boolean alpha_blend)
+ bool alpha_blend)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
enum pipe_texture_target src_target = src->target;
unsigned src_samples = src->texture->nr_samples;
unsigned dst_samples = dst->texture->nr_samples;
- boolean has_depth, has_stencil, has_color;
- boolean blit_stencil, blit_depth, blit_color;
+ bool has_depth, has_stencil, has_color;
+ bool blit_stencil, blit_depth, blit_color;
void *sampler_state;
const struct util_format_description *src_desc =
util_format_description(src->format);
@@ -1917,12 +1954,11 @@
0, 1, &sampler_state);
}
- pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
if (scissor) {
pipe->set_scissor_states(pipe, 0, 1, scissor);
}
- blitter_set_common_draw_rect_state(ctx, scissor != NULL, FALSE);
+ blitter_set_common_draw_rect_state(ctx, scissor != NULL);
do_blits(ctx, dst, dstbox, src, src_width0, src_height0,
srcbox, blit_depth || blit_stencil, use_txf);
@@ -1981,7 +2017,7 @@
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_surface dst_templ, *dst_view;
struct pipe_sampler_view src_templ, *src_view;
- boolean is_depth;
+ bool is_depth;
void *sampler_state;
const struct util_format_description *desc =
util_format_description(format);
@@ -2028,8 +2064,7 @@
pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT,
0, 1, &sampler_state);
- pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
- blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
+ blitter_set_common_draw_rect_state(ctx, false);
for (src_level = base_level; src_level < last_level; src_level++) {
struct pipe_box dstbox = {0}, srcbox = {0};
@@ -2104,7 +2139,6 @@
pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA][0]);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
bind_fs_write_one_cbuf(ctx);
- pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
fb_state.width = dstsurf->width;
@@ -2117,16 +2151,21 @@
blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+ union blitter_attrib attrib;
+ memcpy(attrib.color, color->ui, sizeof(color->ui));
+
num_layers = dstsurf->u.tex.last_layer - dstsurf->u.tex.first_layer + 1;
if (num_layers > 1 && ctx->has_layered) {
- blitter_set_common_draw_rect_state(ctx, FALSE, TRUE);
- blitter_set_clear_color(ctx, color);
- blitter_draw(ctx, dstx, dsty, dstx+width, dsty+height, 0, num_layers);
- }
- else {
- blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
- blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
- UTIL_BLITTER_ATTRIB_COLOR, color);
+ blitter_set_common_draw_rect_state(ctx, false);
+ blitter->draw_rectangle(blitter, ctx->velem_state, get_vs_layered,
+ dstx, dsty, dstx+width, dsty+height, 0,
+ num_layers, UTIL_BLITTER_ATTRIB_COLOR, &attrib);
+ } else {
+ blitter_set_common_draw_rect_state(ctx, false);
+ blitter->draw_rectangle(blitter, ctx->velem_state,
+ get_vs_passthrough_pos_generic,
+ dstx, dsty, dstx+width, dsty+height, 0,
+ 1, UTIL_BLITTER_ATTRIB_COLOR, &attrib);
}
util_blitter_restore_vertex_states(blitter);
@@ -2182,7 +2221,6 @@
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
bind_fs_empty(ctx);
- pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
fb_state.width = dstsurf->width;
@@ -2197,13 +2235,15 @@
num_layers = dstsurf->u.tex.last_layer - dstsurf->u.tex.first_layer + 1;
if (num_layers > 1 && ctx->has_layered) {
- blitter_set_common_draw_rect_state(ctx, FALSE, TRUE);
- blitter_draw(ctx, dstx, dsty, dstx+width, dsty+height, (float) depth, num_layers);
- }
- else {
- blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
- blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height,
- (float) depth,
+ blitter_set_common_draw_rect_state(ctx, false);
+ blitter->draw_rectangle(blitter, ctx->velem_state, get_vs_layered,
+ dstx, dsty, dstx+width, dsty+height, depth,
+ num_layers, UTIL_BLITTER_ATTRIB_NONE, NULL);
+ } else {
+ blitter_set_common_draw_rect_state(ctx, false);
+ blitter->draw_rectangle(blitter, ctx->velem_state,
+ get_vs_passthrough_pos,
+ dstx, dsty, dstx+width, dsty+height, depth, 1,
UTIL_BLITTER_ATTRIB_NONE, NULL);
}
@@ -2244,7 +2284,6 @@
bind_fs_write_one_cbuf(ctx);
else
bind_fs_empty(ctx);
- pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
fb_state.width = zsurf->width;
@@ -2261,10 +2300,11 @@
pipe->set_framebuffer_state(pipe, &fb_state);
pipe->set_sample_mask(pipe, sample_mask);
- blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
+ blitter_set_common_draw_rect_state(ctx, false);
blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height);
- blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth,
- UTIL_BLITTER_ATTRIB_NONE, NULL);
+ blitter->draw_rectangle(blitter, ctx->velem_state, get_vs_passthrough_pos,
+ 0, 0, zsurf->width, zsurf->height, depth,
+ 1, UTIL_BLITTER_ATTRIB_NONE, NULL);
util_blitter_restore_vertex_states(blitter);
util_blitter_restore_fragment_states(blitter);
@@ -2434,7 +2474,6 @@
/* bind states */
pipe->bind_blend_state(pipe, custom_blend);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
- pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
bind_fs_write_one_cbuf(ctx);
pipe->set_sample_mask(pipe, sample_mask);
@@ -2461,10 +2500,11 @@
fb_state.zsbuf = NULL;
pipe->set_framebuffer_state(pipe, &fb_state);
- blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
+ blitter_set_common_draw_rect_state(ctx, false);
blitter_set_dst_dimensions(ctx, src->width0, src->height0);
- blitter->draw_rectangle(blitter, 0, 0, src->width0, src->height0,
- 0, 0, NULL);
+ blitter->draw_rectangle(blitter, ctx->velem_state, get_vs_passthrough_pos,
+ 0, 0, src->width0, src->height0,
+ 0, 1, UTIL_BLITTER_ATTRIB_NONE, NULL);
util_blitter_restore_fb_state(blitter);
util_blitter_restore_vertex_states(blitter);
util_blitter_restore_fragment_states(blitter);
@@ -2499,7 +2539,6 @@
: ctx->blend[PIPE_MASK_RGBA][0]);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
bind_fs_write_one_cbuf(ctx);
- pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->set_sample_mask(pipe, (1ull << MAX2(1, dstsurf->texture->nr_samples)) - 1);
/* set a framebuffer state */
@@ -2511,10 +2550,11 @@
pipe->set_framebuffer_state(pipe, &fb_state);
pipe->set_sample_mask(pipe, ~0);
- blitter_set_common_draw_rect_state(ctx, FALSE, FALSE);
+ blitter_set_common_draw_rect_state(ctx, false);
blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
- blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height,
- 0, 0, NULL);
+ blitter->draw_rectangle(blitter, ctx->velem_state, get_vs_passthrough_pos,
+ 0, 0, dstsurf->width, dstsurf->height,
+ 0, 1, UTIL_BLITTER_ATTRIB_NONE, NULL);
util_blitter_restore_vertex_states(blitter);
util_blitter_restore_fragment_states(blitter);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_blitter.h mesa-17.3.3/src/gallium/auxiliary/util/u_blitter.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_blitter.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_blitter.h 2018-01-18 21:30:28.000000000 +0000
@@ -41,14 +41,32 @@
enum blitter_attrib_type {
UTIL_BLITTER_ATTRIB_NONE,
UTIL_BLITTER_ATTRIB_COLOR,
- UTIL_BLITTER_ATTRIB_TEXCOORD
+ UTIL_BLITTER_ATTRIB_TEXCOORD_XY,
+ UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW,
};
+union blitter_attrib {
+ float color[4];
+
+ struct {
+ float x1, y1, x2, y2, z, w;
+ } texcoord;
+};
+
+struct blitter_context;
+
+typedef void *(*blitter_get_vs_func)(struct blitter_context *blitter);
+
struct blitter_context
{
/**
* Draw a rectangle.
*
+ * \param get_vs Callback for obtaining the vertex shader for the draw call.
+ * It might invoke the shader compiler. The driver is
+ * responsible for setting the vertex shader, and the callback
+ * allows the driver to query the vertex shader CSO if it
+ * wants to use the default one.
* \param x1 An X coordinate of the top-left corner.
* \param y1 A Y coordinate of the top-left corner.
* \param x2 An X coordinate of the bottom-right corner.
@@ -72,22 +90,15 @@
* a rectangular point sprite.
*/
void (*draw_rectangle)(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
int x1, int y1, int x2, int y2,
- float depth,
+ float depth, unsigned num_instances,
enum blitter_attrib_type type,
- const union pipe_color_union *color);
-
- /**
- * Get the next surface layer for the pipe surface, i.e. make a copy
- * of the surface and increment the first and last layer by 1.
- *
- * This callback is exposed, so that drivers can override it if needed.
- */
- struct pipe_surface *(*get_next_surface_layer)(struct pipe_context *pipe,
- struct pipe_surface *surf);
+ const union blitter_attrib *attrib);
/* Whether the blitter is running. */
- boolean running;
+ bool running;
/* Private members, really. */
struct pipe_context *pipe; /**< pipe context */
@@ -102,7 +113,8 @@
struct pipe_stencil_ref saved_stencil_ref; /**< stencil ref */
struct pipe_viewport_state saved_viewport;
struct pipe_scissor_state saved_scissor;
- boolean is_sample_mask_saved;
+ bool skip_viewport_restore;
+ bool is_sample_mask_saved;
unsigned saved_sample_mask;
unsigned saved_num_sampler_states;
@@ -122,7 +134,7 @@
struct pipe_query *saved_render_cond_query;
uint saved_render_cond_mode;
- boolean saved_render_cond_cond;
+ bool saved_render_cond_cond;
};
/**
@@ -150,14 +162,17 @@
* Override PIPE_CAP_TEXTURE_MULTISAMPLE as reported by the driver.
*/
void util_blitter_set_texture_multisample(struct blitter_context *blitter,
- boolean supported);
+ bool supported);
/* The default function to draw a rectangle. This can only be used
* inside of the draw_rectangle callback if the driver overrides it. */
void util_blitter_draw_rectangle(struct blitter_context *blitter,
- int x1, int y1, int x2, int y2, float depth,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
+ int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances,
enum blitter_attrib_type type,
- const union pipe_color_union *attrib);
+ const union blitter_attrib *attrib);
/*
@@ -189,12 +204,12 @@
* Check if the blitter (with the help of the driver) can blit between
* the two resources.
*/
-boolean util_blitter_is_copy_supported(struct blitter_context *blitter,
- const struct pipe_resource *dst,
- const struct pipe_resource *src);
+bool util_blitter_is_copy_supported(struct blitter_context *blitter,
+ const struct pipe_resource *dst,
+ const struct pipe_resource *src);
-boolean util_blitter_is_blit_supported(struct blitter_context *blitter,
- const struct pipe_blit_info *info);
+bool util_blitter_is_blit_supported(struct blitter_context *blitter,
+ const struct pipe_blit_info *info);
/**
* Copy a block of pixels from one surface to another.
@@ -244,7 +259,7 @@
unsigned src_width0, unsigned src_height0,
unsigned mask, unsigned filter,
const struct pipe_scissor_state *scissor,
- boolean alpha_blend);
+ bool alpha_blend);
void util_blitter_blit(struct blitter_context *blitter,
const struct pipe_blit_info *info);
@@ -526,14 +541,14 @@
util_blitter_save_sample_mask(struct blitter_context *blitter,
unsigned sample_mask)
{
- blitter->is_sample_mask_saved = TRUE;
+ blitter->is_sample_mask_saved = true;
blitter->saved_sample_mask = sample_mask;
}
static inline void
util_blitter_save_render_condition(struct blitter_context *blitter,
struct pipe_query *query,
- boolean condition,
+ bool condition,
enum pipe_render_cond_flag mode)
{
blitter->saved_render_cond_query = query;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_dump_defines.c mesa-17.3.3/src/gallium/auxiliary/util/u_dump_defines.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_dump_defines.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_dump_defines.c 2018-01-18 21:30:28.000000000 +0000
@@ -62,36 +62,36 @@
}
-#define DEFINE_UTIL_DUMP_CONTINUOUS(_name) \
+#define DEFINE_UTIL_STR_CONTINUOUS(_name) \
const char * \
- util_dump_##_name(unsigned value, boolean shortened) \
+ util_str_##_name(unsigned value, boolean shortened) \
{ \
if(shortened) \
- return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_short_names), util_dump_##_name##_short_names); \
+ return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_short_names), util_##_name##_short_names); \
else \
- return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_names), util_dump_##_name##_names); \
+ return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_names), util_##_name##_names); \
}
/**
- * Same as DEFINE_UTIL_DUMP_CONTINUOUS but with static assertions to detect
+ * Same as DEFINE_UTIL_STR_CONTINUOUS but with static assertions to detect
* failures to update lists.
*/
-#define DEFINE_UTIL_DUMP_CONTINUOUS_COUNT(_name, _count) \
+#define DEFINE_UTIL_STR_CONTINUOUS_COUNT(_name, _count) \
const char * \
- util_dump_##_name(unsigned value, boolean shortened) \
+ util_str_##_name(unsigned value, boolean shortened) \
{ \
- STATIC_ASSERT(ARRAY_SIZE(util_dump_##_name##_names) == _count); \
- STATIC_ASSERT(ARRAY_SIZE(util_dump_##_name##_short_names) == _count); \
+ STATIC_ASSERT(ARRAY_SIZE(util_##_name##_names) == _count); \
+ STATIC_ASSERT(ARRAY_SIZE(util_##_name##_short_names) == _count); \
if(shortened) \
- return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_short_names), util_dump_##_name##_short_names); \
+ return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_short_names), util_##_name##_short_names); \
else \
- return util_dump_enum_continuous(value, ARRAY_SIZE(util_dump_##_name##_names), util_dump_##_name##_names); \
+ return util_dump_enum_continuous(value, ARRAY_SIZE(util_##_name##_names), util_##_name##_names); \
}
static const char *
-util_dump_blend_factor_names[] = {
+util_blend_factor_names[] = {
UTIL_DUMP_INVALID_NAME, /* 0x0 */
"PIPE_BLENDFACTOR_ONE",
"PIPE_BLENDFACTOR_SRC_COLOR",
@@ -122,7 +122,7 @@
};
static const char *
-util_dump_blend_factor_short_names[] = {
+util_blend_factor_short_names[] = {
UTIL_DUMP_INVALID_NAME, /* 0x0 */
"one",
"src_color",
@@ -152,11 +152,11 @@
"inv_src1_alpha"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(blend_factor)
+DEFINE_UTIL_STR_CONTINUOUS(blend_factor)
static const char *
-util_dump_blend_func_names[] = {
+util_blend_func_names[] = {
"PIPE_BLEND_ADD",
"PIPE_BLEND_SUBTRACT",
"PIPE_BLEND_REVERSE_SUBTRACT",
@@ -165,7 +165,7 @@
};
static const char *
-util_dump_blend_func_short_names[] = {
+util_blend_func_short_names[] = {
"add",
"sub",
"rev_sub",
@@ -173,11 +173,11 @@
"max"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(blend_func)
+DEFINE_UTIL_STR_CONTINUOUS(blend_func)
static const char *
-util_dump_logicop_names[] = {
+util_logicop_names[] = {
"PIPE_LOGICOP_CLEAR",
"PIPE_LOGICOP_NOR",
"PIPE_LOGICOP_AND_INVERTED",
@@ -197,7 +197,7 @@
};
static const char *
-util_dump_logicop_short_names[] = {
+util_logicop_short_names[] = {
"clear",
"nor",
"and_inverted",
@@ -216,11 +216,11 @@
"set"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(logicop)
+DEFINE_UTIL_STR_CONTINUOUS(logicop)
static const char *
-util_dump_func_names[] = {
+util_func_names[] = {
"PIPE_FUNC_NEVER",
"PIPE_FUNC_LESS",
"PIPE_FUNC_EQUAL",
@@ -232,7 +232,7 @@
};
static const char *
-util_dump_func_short_names[] = {
+util_func_short_names[] = {
"never",
"less",
"equal",
@@ -243,11 +243,11 @@
"always"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(func)
+DEFINE_UTIL_STR_CONTINUOUS(func)
static const char *
-util_dump_stencil_op_names[] = {
+util_stencil_op_names[] = {
"PIPE_STENCIL_OP_KEEP",
"PIPE_STENCIL_OP_ZERO",
"PIPE_STENCIL_OP_REPLACE",
@@ -259,7 +259,7 @@
};
static const char *
-util_dump_stencil_op_short_names[] = {
+util_stencil_op_short_names[] = {
"keep",
"zero",
"replace",
@@ -270,11 +270,11 @@
"invert"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(stencil_op)
+DEFINE_UTIL_STR_CONTINUOUS(stencil_op)
static const char *
-util_dump_tex_target_names[] = {
+util_tex_target_names[] = {
"PIPE_BUFFER",
"PIPE_TEXTURE_1D",
"PIPE_TEXTURE_2D",
@@ -287,7 +287,7 @@
};
static const char *
-util_dump_tex_target_short_names[] = {
+util_tex_target_short_names[] = {
"buffer",
"1d",
"2d",
@@ -299,11 +299,11 @@
"cube_array",
};
-DEFINE_UTIL_DUMP_CONTINUOUS_COUNT(tex_target, PIPE_MAX_TEXTURE_TYPES)
+DEFINE_UTIL_STR_CONTINUOUS_COUNT(tex_target, PIPE_MAX_TEXTURE_TYPES)
static const char *
-util_dump_tex_wrap_names[] = {
+util_tex_wrap_names[] = {
"PIPE_TEX_WRAP_REPEAT",
"PIPE_TEX_WRAP_CLAMP",
"PIPE_TEX_WRAP_CLAMP_TO_EDGE",
@@ -315,7 +315,7 @@
};
static const char *
-util_dump_tex_wrap_short_names[] = {
+util_tex_wrap_short_names[] = {
"repeat",
"clamp",
"clamp_to_edge",
@@ -326,45 +326,46 @@
"mirror_clamp_to_border"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(tex_wrap)
+DEFINE_UTIL_STR_CONTINUOUS(tex_wrap)
static const char *
-util_dump_tex_mipfilter_names[] = {
+util_tex_mipfilter_names[] = {
"PIPE_TEX_MIPFILTER_NEAREST",
"PIPE_TEX_MIPFILTER_LINEAR",
"PIPE_TEX_MIPFILTER_NONE"
};
static const char *
-util_dump_tex_mipfilter_short_names[] = {
+util_tex_mipfilter_short_names[] = {
"nearest",
"linear",
"none"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(tex_mipfilter)
+DEFINE_UTIL_STR_CONTINUOUS(tex_mipfilter)
static const char *
-util_dump_tex_filter_names[] = {
+util_tex_filter_names[] = {
"PIPE_TEX_FILTER_NEAREST",
"PIPE_TEX_FILTER_LINEAR"
};
static const char *
-util_dump_tex_filter_short_names[] = {
+util_tex_filter_short_names[] = {
"nearest",
"linear"
};
-DEFINE_UTIL_DUMP_CONTINUOUS(tex_filter)
+DEFINE_UTIL_STR_CONTINUOUS(tex_filter)
static const char *
-util_dump_query_type_names[] = {
+util_query_type_names[] = {
"PIPE_QUERY_OCCLUSION_COUNTER",
"PIPE_QUERY_OCCLUSION_PREDICATE",
+ "PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE",
"PIPE_QUERY_TIMESTAMP",
"PIPE_QUERY_TIMESTAMP_DISJOINT",
"PIPE_QUERY_TIME_ELAPSED",
@@ -372,12 +373,13 @@
"PIPE_QUERY_PRIMITIVES_EMITTED",
"PIPE_QUERY_SO_STATISTICS",
"PIPE_QUERY_SO_OVERFLOW_PREDICATE",
+ "PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE",
"PIPE_QUERY_GPU_FINISHED",
"PIPE_QUERY_PIPELINE_STATISTICS",
};
static const char *
-util_dump_query_type_short_names[] = {
+util_query_type_short_names[] = {
"occlusion_counter",
"occlusion_predicate",
"timestamp",
@@ -391,11 +393,30 @@
"pipeline_statistics",
};
-DEFINE_UTIL_DUMP_CONTINUOUS(query_type)
+DEFINE_UTIL_STR_CONTINUOUS(query_type)
static const char *
-util_dump_prim_mode_names[] = {
+util_query_value_type_names[] = {
+ "PIPE_QUERY_TYPE_I32",
+ "PIPE_QUERY_TYPE_U32",
+ "PIPE_QUERY_TYPE_I64",
+ "PIPE_QUERY_TYPE_U64",
+};
+
+static const char *
+util_query_value_type_short_names[] = {
+ "i32",
+ "u32",
+ "i64",
+ "u64",
+};
+
+DEFINE_UTIL_STR_CONTINUOUS(query_value_type)
+
+
+static const char *
+util_prim_mode_names[] = {
"PIPE_PRIM_POINTS",
"PIPE_PRIM_LINES",
"PIPE_PRIM_LINE_LOOP",
@@ -414,7 +435,7 @@
};
static const char *
-util_dump_prim_mode_short_names[] = {
+util_prim_mode_short_names[] = {
"points",
"lines",
"line_loop",
@@ -432,4 +453,20 @@
"patches",
};
-DEFINE_UTIL_DUMP_CONTINUOUS(prim_mode)
+DEFINE_UTIL_STR_CONTINUOUS(prim_mode)
+
+void
+util_dump_query_type(FILE *stream, unsigned value)
+{
+ if (value >= PIPE_QUERY_DRIVER_SPECIFIC)
+ fprintf(stream, "PIPE_QUERY_DRIVER_SPECIFIC + %i",
+ value - PIPE_QUERY_DRIVER_SPECIFIC);
+ else
+ fprintf(stream, "%s", util_str_query_type(value, false));
+}
+
+void
+util_dump_query_value_type(FILE *stream, unsigned value)
+{
+ fprintf(stream, "%s", util_str_query_value_type(value, false));
+}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_dump.h mesa-17.3.3/src/gallium/auxiliary/util/u_dump.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_dump.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_dump.h 2018-01-18 21:30:28.000000000 +0000
@@ -52,45 +52,49 @@
/*
* p_defines.h
- *
- * XXX: These functions don't really dump anything -- just translate into
- * strings so a verb better than "dump" should be used instead, in order to
- * free up the namespace to the true dumper functions.
*/
const char *
-util_dump_blend_factor(unsigned value, boolean shortened);
+util_str_blend_factor(unsigned value, boolean shortened);
const char *
-util_dump_blend_func(unsigned value, boolean shortened);
+util_str_blend_func(unsigned value, boolean shortened);
const char *
-util_dump_logicop(unsigned value, boolean shortened);
+util_str_logicop(unsigned value, boolean shortened);
const char *
-util_dump_func(unsigned value, boolean shortened);
+util_str_func(unsigned value, boolean shortened);
const char *
-util_dump_stencil_op(unsigned value, boolean shortened);
+util_str_stencil_op(unsigned value, boolean shortened);
const char *
-util_dump_tex_target(unsigned value, boolean shortened);
+util_str_tex_target(unsigned value, boolean shortened);
const char *
-util_dump_tex_wrap(unsigned value, boolean shortened);
+util_str_tex_wrap(unsigned value, boolean shortened);
const char *
-util_dump_tex_mipfilter(unsigned value, boolean shortened);
+util_str_tex_mipfilter(unsigned value, boolean shortened);
const char *
-util_dump_tex_filter(unsigned value, boolean shortened);
+util_str_tex_filter(unsigned value, boolean shortened);
const char *
-util_dump_query_type(unsigned value, boolean shortened);
+util_str_query_type(unsigned value, boolean shortened);
const char *
-util_dump_prim_mode(unsigned value, boolean shortened);
+util_str_query_value_type(unsigned value, boolean shortened);
+const char *
+util_str_prim_mode(unsigned value, boolean shortened);
+
+void
+util_dump_query_type(FILE *stream, unsigned value);
+
+void
+util_dump_query_value_type(FILE *stream, unsigned value);
/*
* p_state.h, through a FILE
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_dump_state.c mesa-17.3.3/src/gallium/auxiliary/util/u_dump_state.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_dump_state.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_dump_state.c 2018-01-18 21:30:28.000000000 +0000
@@ -232,55 +232,55 @@
static void
util_dump_enum_blend_factor(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_blend_factor(value, TRUE));
+ util_dump_enum(stream, util_str_blend_factor(value, TRUE));
}
static void
util_dump_enum_blend_func(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_blend_func(value, TRUE));
+ util_dump_enum(stream, util_str_blend_func(value, TRUE));
}
static void
util_dump_enum_func(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_func(value, TRUE));
+ util_dump_enum(stream, util_str_func(value, TRUE));
}
static void
util_dump_enum_prim_mode(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_prim_mode(value, TRUE));
+ util_dump_enum(stream, util_str_prim_mode(value, TRUE));
}
static void
util_dump_enum_tex_target(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_tex_target(value, TRUE));
+ util_dump_enum(stream, util_str_tex_target(value, TRUE));
}
static void
util_dump_enum_tex_filter(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_tex_filter(value, TRUE));
+ util_dump_enum(stream, util_str_tex_filter(value, TRUE));
}
static void
util_dump_enum_tex_mipfilter(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_tex_mipfilter(value, TRUE));
+ util_dump_enum(stream, util_str_tex_mipfilter(value, TRUE));
}
static void
util_dump_enum_tex_wrap(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_tex_wrap(value, TRUE));
+ util_dump_enum(stream, util_str_tex_wrap(value, TRUE));
}
static void
util_dump_enum_stencil_op(FILE *stream, unsigned value)
{
- util_dump_enum(stream, util_dump_stencil_op(value, TRUE));
+ util_dump_enum(stream, util_str_stencil_op(value, TRUE));
}
@@ -919,9 +919,15 @@
util_dump_member(stream, uint, state, max_index);
util_dump_member(stream, bool, state, primitive_restart);
- util_dump_member(stream, uint, state, restart_index);
+ if (state->primitive_restart)
+ util_dump_member(stream, uint, state, restart_index);
- util_dump_member(stream, ptr, state, index.resource);
+ if (state->index_size) {
+ if (state->has_user_indices)
+ util_dump_member(stream, ptr, state, index.user);
+ else
+ util_dump_member(stream, ptr, state, index.resource);
+ }
util_dump_member(stream, ptr, state, count_from_stream_output);
if (!state->indirect) {
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_format.c mesa-17.3.3/src/gallium/auxiliary/util/u_format.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_format.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_format.c 2018-01-18 21:30:28.000000000 +0000
@@ -238,7 +238,7 @@
boolean
util_format_is_supported(enum pipe_format format, unsigned bind)
{
- if (util_format_is_s3tc(format) && !util_format_s3tc_enabled) {
+ if (format >= PIPE_FORMAT_COUNT) {
return FALSE;
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_format.csv mesa-17.3.3/src/gallium/auxiliary/util/u_format.csv
--- mesa-17.2.4/src/gallium/auxiliary/util/u_format.csv 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_format.csv 2018-01-18 21:30:28.000000000 +0000
@@ -74,10 +74,13 @@
PIPE_FORMAT_R8G8B8X8_UNORM , plain, 1, 1, un8 , un8 , un8 , x8 , xyz1, rgb
PIPE_FORMAT_B5G5R5X1_UNORM , plain, 1, 1, un5 , un5 , un5 , x1 , zyx1, rgb, x1 , un5 , un5 , un5 , yzw1
PIPE_FORMAT_B5G5R5A1_UNORM , plain, 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb, un1 , un5 , un5 , un5 , yzwx
+PIPE_FORMAT_X1B5G5R5_UNORM , plain, 1, 1, x1 , un5 , un5 , un5 , wzy1, rgb, un5 , un5 , un5 , x1 , xyz1
+PIPE_FORMAT_A1B5G5R5_UNORM , plain, 1, 1, un1 , un5 , un5 , un5 , wzyx, rgb, un5 , un5 , un5 , un1 , xyzw
PIPE_FORMAT_B4G4R4A4_UNORM , plain, 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb, un4 , un4 , un4 , un4 , yzwx
PIPE_FORMAT_B4G4R4X4_UNORM , plain, 1, 1, un4 , un4 , un4 , x4 , zyx1, rgb, x4 , un4 , un4 , un4 , yzw1
PIPE_FORMAT_B5G6R5_UNORM , plain, 1, 1, un5 , un6 , un5 , , zyx1, rgb, un5 , un6 , un5 , , xyz1
PIPE_FORMAT_R10G10B10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , xyzw, rgb, un2 , un10, un10, un10, wzyx
+PIPE_FORMAT_R10G10B10X2_UNORM , plain, 1, 1, un10, un10, un10, x2, xyz1, rgb, x2 , un10, un10, un10, wzy1
PIPE_FORMAT_B10G10R10A2_UNORM , plain, 1, 1, un10, un10, un10, un2 , zyxw, rgb, un2 , un10, un10, un10, yzwx
PIPE_FORMAT_B2G3R3_UNORM , plain, 1, 1, un2 , un3 , un3 , , zyx1, rgb, un3 , un3 , un2 , , xyz1
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_format_s3tc.c mesa-17.3.3/src/gallium/auxiliary/util/u_format_s3tc.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_format_s3tc.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_format_s3tc.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,136 +28,15 @@
#include "u_format.h"
#include "u_format_s3tc.h"
#include "util/format_srgb.h"
+#include "../../../mesa/main/texcompress_s3tc_tmp.h"
-#if defined(_WIN32) || defined(WIN32)
-#define DXTN_LIBNAME "dxtn.dll"
-#elif defined(__CYGWIN__)
-#define DXTN_LIBNAME "cygtxc_dxtn.dll"
-#elif defined(__APPLE__)
-#define DXTN_LIBNAME "libtxc_dxtn.dylib"
-#else
-#define DXTN_LIBNAME "libtxc_dxtn.so"
-#endif
+util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1;
+util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1;
+util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3;
+util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5;
-
-static void
-util_format_dxt1_rgb_fetch_stub(int src_stride,
- const uint8_t *src,
- int col, int row,
- uint8_t *dst)
-{
- assert(0);
-}
-
-
-static void
-util_format_dxt1_rgba_fetch_stub(int src_stride,
- const uint8_t *src,
- int col, int row,
- uint8_t *dst )
-{
- assert(0);
-}
-
-
-static void
-util_format_dxt3_rgba_fetch_stub(int src_stride,
- const uint8_t *src,
- int col, int row,
- uint8_t *dst )
-{
- assert(0);
-}
-
-
-static void
-util_format_dxt5_rgba_fetch_stub(int src_stride,
- const uint8_t *src,
- int col, int row,
- uint8_t *dst )
-{
- assert(0);
-}
-
-
-static void
-util_format_dxtn_pack_stub(int src_comps,
- int width, int height,
- const uint8_t *src,
- enum util_format_dxtn dst_format,
- uint8_t *dst,
- int dst_stride)
-{
- assert(0);
-}
-
-
-boolean util_format_s3tc_enabled = FALSE;
-
-util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = util_format_dxt1_rgb_fetch_stub;
-util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = util_format_dxt1_rgba_fetch_stub;
-util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = util_format_dxt3_rgba_fetch_stub;
-util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = util_format_dxt5_rgba_fetch_stub;
-
-util_format_dxtn_pack_t util_format_dxtn_pack = util_format_dxtn_pack_stub;
-
-
-void
-util_format_s3tc_init(void)
-{
- static boolean first_time = TRUE;
- struct util_dl_library *library = NULL;
- util_dl_proc fetch_2d_texel_rgb_dxt1;
- util_dl_proc fetch_2d_texel_rgba_dxt1;
- util_dl_proc fetch_2d_texel_rgba_dxt3;
- util_dl_proc fetch_2d_texel_rgba_dxt5;
- util_dl_proc tx_compress_dxtn;
-
- if (!first_time)
- return;
- first_time = FALSE;
-
- if (util_format_s3tc_enabled)
- return;
-
- library = util_dl_open(DXTN_LIBNAME);
- if (!library) {
- debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
- "compression/decompression unavailable\n");
- return;
- }
-
- fetch_2d_texel_rgb_dxt1 =
- util_dl_get_proc_address(library, "fetch_2d_texel_rgb_dxt1");
- fetch_2d_texel_rgba_dxt1 =
- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt1");
- fetch_2d_texel_rgba_dxt3 =
- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt3");
- fetch_2d_texel_rgba_dxt5 =
- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt5");
- tx_compress_dxtn =
- util_dl_get_proc_address(library, "tx_compress_dxtn");
-
- if (!util_format_dxt1_rgb_fetch ||
- !util_format_dxt1_rgba_fetch ||
- !util_format_dxt3_rgba_fetch ||
- !util_format_dxt5_rgba_fetch ||
- !util_format_dxtn_pack) {
- debug_printf("couldn't reference all symbols in " DXTN_LIBNAME
- ", software DXTn compression/decompression "
- "unavailable\n");
- util_dl_close(library);
- return;
- }
-
- util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1;
- util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1;
- util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3;
- util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5;
- util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn;
- util_format_s3tc_enabled = TRUE;
-}
+util_format_dxtn_pack_t util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn;
/*
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_format_s3tc.h mesa-17.3.3/src/gallium/auxiliary/util/u_format_s3tc.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_format_s3tc.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_format_s3tc.h 2018-01-18 21:30:28.000000000 +0000
@@ -58,8 +58,6 @@
uint8_t *dst,
int dst_stride);
-extern boolean util_format_s3tc_enabled;
-
extern util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch;
extern util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch;
extern util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch;
@@ -69,10 +67,6 @@
void
-util_format_s3tc_init(void);
-
-
-void
util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
void
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_format_table.c mesa-17.3.3/src/gallium/auxiliary/util/u_format_table.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_format_table.c 2017-10-30 14:50:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_format_table.c 2018-01-18 21:31:11.000000000 +0000
@@ -1893,6 +1893,396 @@
}
}
+union util_format_x1b5g5r5_unorm {
+ uint16_t value;
+ struct {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ unsigned r:5;
+ unsigned g:5;
+ unsigned b:5;
+ unsigned x:1;
+#else
+ unsigned x:1;
+ unsigned b:5;
+ unsigned g:5;
+ unsigned r:5;
+#endif
+ } chan;
+};
+
+static inline void
+util_format_x1b5g5r5_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ float *dst = dst_row;
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t r;
+ uint16_t g;
+ uint16_t b;
+ r = value >> 11;
+ g = (value >> 6) & 0x1f;
+ b = (value >> 1) & 0x1f;
+ dst[0] = (float)(r * (1.0f/0x1f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x1f)); /* g */
+ dst[2] = (float)(b * (1.0f/0x1f)); /* b */
+ dst[3] = 1; /* a */
+#else
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t b;
+ uint16_t g;
+ uint16_t r;
+ b = (value >> 1) & 0x1f;
+ g = (value >> 6) & 0x1f;
+ r = value >> 11;
+ dst[0] = (float)(r * (1.0f/0x1f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x1f)); /* g */
+ dst[2] = (float)(b * (1.0f/0x1f)); /* b */
+ dst[3] = 1; /* a */
+#endif
+ src += 2;
+ dst += 4;
+ }
+ src_row += src_stride;
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+static inline void
+util_format_x1b5g5r5_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ const float *src = src_row;
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = 0;
+ value |= ((uint16_t)util_iround(CLAMP(src[0], 0.0f, 1.0f) * 0x1f)) << 11;
+ value |= (((uint16_t)util_iround(CLAMP(src[1], 0.0f, 1.0f) * 0x1f)) & 0x1f) << 6;
+ value |= (((uint16_t)util_iround(CLAMP(src[2], 0.0f, 1.0f) * 0x1f)) & 0x1f) << 1;
+ *(uint16_t *)dst = value;
+#else
+ uint16_t value = 0;
+ value |= (((uint16_t)util_iround(CLAMP(src[2], 0.0f, 1.0f) * 0x1f)) & 0x1f) << 1;
+ value |= (((uint16_t)util_iround(CLAMP(src[1], 0.0f, 1.0f) * 0x1f)) & 0x1f) << 6;
+ value |= ((uint16_t)util_iround(CLAMP(src[0], 0.0f, 1.0f) * 0x1f)) << 11;
+ *(uint16_t *)dst = value;
+#endif
+ src += 4;
+ dst += 2;
+ }
+ dst_row += dst_stride;
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+static inline void
+util_format_x1b5g5r5_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t r;
+ uint16_t g;
+ uint16_t b;
+ r = value >> 11;
+ g = (value >> 6) & 0x1f;
+ b = (value >> 1) & 0x1f;
+ dst[0] = (float)(r * (1.0f/0x1f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x1f)); /* g */
+ dst[2] = (float)(b * (1.0f/0x1f)); /* b */
+ dst[3] = 1; /* a */
+#else
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t b;
+ uint16_t g;
+ uint16_t r;
+ b = (value >> 1) & 0x1f;
+ g = (value >> 6) & 0x1f;
+ r = value >> 11;
+ dst[0] = (float)(r * (1.0f/0x1f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x1f)); /* g */
+ dst[2] = (float)(b * (1.0f/0x1f)); /* b */
+ dst[3] = 1; /* a */
+#endif
+}
+
+static inline void
+util_format_x1b5g5r5_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ uint8_t *dst = dst_row;
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t r;
+ uint16_t g;
+ uint16_t b;
+ r = value >> 11;
+ g = (value >> 6) & 0x1f;
+ b = (value >> 1) & 0x1f;
+ dst[0] = (uint8_t)(((uint32_t)r) * 0xff / 0x1f); /* r */
+ dst[1] = (uint8_t)(((uint32_t)g) * 0xff / 0x1f); /* g */
+ dst[2] = (uint8_t)(((uint32_t)b) * 0xff / 0x1f); /* b */
+ dst[3] = 255; /* a */
+#else
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t b;
+ uint16_t g;
+ uint16_t r;
+ b = (value >> 1) & 0x1f;
+ g = (value >> 6) & 0x1f;
+ r = value >> 11;
+ dst[0] = (uint8_t)(((uint32_t)r) * 0xff / 0x1f); /* r */
+ dst[1] = (uint8_t)(((uint32_t)g) * 0xff / 0x1f); /* g */
+ dst[2] = (uint8_t)(((uint32_t)b) * 0xff / 0x1f); /* b */
+ dst[3] = 255; /* a */
+#endif
+ src += 2;
+ dst += 4;
+ }
+ src_row += src_stride;
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+static inline void
+util_format_x1b5g5r5_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ const uint8_t *src = src_row;
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = 0;
+ value |= ((uint16_t)(src[0] >> 3)) << 11;
+ value |= (((uint16_t)(src[1] >> 3)) & 0x1f) << 6;
+ value |= (((uint16_t)(src[2] >> 3)) & 0x1f) << 1;
+ *(uint16_t *)dst = value;
+#else
+ uint16_t value = 0;
+ value |= (((uint16_t)(src[2] >> 3)) & 0x1f) << 1;
+ value |= (((uint16_t)(src[1] >> 3)) & 0x1f) << 6;
+ value |= ((uint16_t)(src[0] >> 3)) << 11;
+ *(uint16_t *)dst = value;
+#endif
+ src += 4;
+ dst += 2;
+ }
+ dst_row += dst_stride;
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+union util_format_a1b5g5r5_unorm {
+ uint16_t value;
+ struct {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ unsigned r:5;
+ unsigned g:5;
+ unsigned b:5;
+ unsigned a:1;
+#else
+ unsigned a:1;
+ unsigned b:5;
+ unsigned g:5;
+ unsigned r:5;
+#endif
+ } chan;
+};
+
+static inline void
+util_format_a1b5g5r5_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ float *dst = dst_row;
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t r;
+ uint16_t g;
+ uint16_t b;
+ uint16_t a;
+ r = value >> 11;
+ g = (value >> 6) & 0x1f;
+ b = (value >> 1) & 0x1f;
+ a = (value) & 0x1;
+ dst[0] = (float)(r * (1.0f/0x1f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x1f)); /* g */
+ dst[2] = (float)(b * (1.0f/0x1f)); /* b */
+ dst[3] = (float)(a * (1.0f/0x1)); /* a */
+#else
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t a;
+ uint16_t b;
+ uint16_t g;
+ uint16_t r;
+ a = (value) & 0x1;
+ b = (value >> 1) & 0x1f;
+ g = (value >> 6) & 0x1f;
+ r = value >> 11;
+ dst[0] = (float)(r * (1.0f/0x1f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x1f)); /* g */
+ dst[2] = (float)(b * (1.0f/0x1f)); /* b */
+ dst[3] = (float)(a * (1.0f/0x1)); /* a */
+#endif
+ src += 2;
+ dst += 4;
+ }
+ src_row += src_stride;
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+static inline void
+util_format_a1b5g5r5_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ const float *src = src_row;
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = 0;
+ value |= ((uint16_t)util_iround(CLAMP(src[0], 0.0f, 1.0f) * 0x1f)) << 11;
+ value |= (((uint16_t)util_iround(CLAMP(src[1], 0.0f, 1.0f) * 0x1f)) & 0x1f) << 6;
+ value |= (((uint16_t)util_iround(CLAMP(src[2], 0.0f, 1.0f) * 0x1f)) & 0x1f) << 1;
+ value |= ((uint16_t)util_iround(CLAMP(src[3], 0.0f, 1.0f) * 0x1)) & 0x1;
+ *(uint16_t *)dst = value;
+#else
+ uint16_t value = 0;
+ value |= ((uint16_t)util_iround(CLAMP(src[3], 0.0f, 1.0f) * 0x1)) & 0x1;
+ value |= (((uint16_t)util_iround(CLAMP(src[2], 0.0f, 1.0f) * 0x1f)) & 0x1f) << 1;
+ value |= (((uint16_t)util_iround(CLAMP(src[1], 0.0f, 1.0f) * 0x1f)) & 0x1f) << 6;
+ value |= ((uint16_t)util_iround(CLAMP(src[0], 0.0f, 1.0f) * 0x1f)) << 11;
+ *(uint16_t *)dst = value;
+#endif
+ src += 4;
+ dst += 2;
+ }
+ dst_row += dst_stride;
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+static inline void
+util_format_a1b5g5r5_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t r;
+ uint16_t g;
+ uint16_t b;
+ uint16_t a;
+ r = value >> 11;
+ g = (value >> 6) & 0x1f;
+ b = (value >> 1) & 0x1f;
+ a = (value) & 0x1;
+ dst[0] = (float)(r * (1.0f/0x1f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x1f)); /* g */
+ dst[2] = (float)(b * (1.0f/0x1f)); /* b */
+ dst[3] = (float)(a * (1.0f/0x1)); /* a */
+#else
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t a;
+ uint16_t b;
+ uint16_t g;
+ uint16_t r;
+ a = (value) & 0x1;
+ b = (value >> 1) & 0x1f;
+ g = (value >> 6) & 0x1f;
+ r = value >> 11;
+ dst[0] = (float)(r * (1.0f/0x1f)); /* r */
+ dst[1] = (float)(g * (1.0f/0x1f)); /* g */
+ dst[2] = (float)(b * (1.0f/0x1f)); /* b */
+ dst[3] = (float)(a * (1.0f/0x1)); /* a */
+#endif
+}
+
+static inline void
+util_format_a1b5g5r5_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ uint8_t *dst = dst_row;
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t r;
+ uint16_t g;
+ uint16_t b;
+ uint16_t a;
+ r = value >> 11;
+ g = (value >> 6) & 0x1f;
+ b = (value >> 1) & 0x1f;
+ a = (value) & 0x1;
+ dst[0] = (uint8_t)(((uint32_t)r) * 0xff / 0x1f); /* r */
+ dst[1] = (uint8_t)(((uint32_t)g) * 0xff / 0x1f); /* g */
+ dst[2] = (uint8_t)(((uint32_t)b) * 0xff / 0x1f); /* b */
+ dst[3] = (uint8_t)(((uint32_t)a) * 0xff / 0x1); /* a */
+#else
+ uint16_t value = *(const uint16_t *)src;
+ uint16_t a;
+ uint16_t b;
+ uint16_t g;
+ uint16_t r;
+ a = (value) & 0x1;
+ b = (value >> 1) & 0x1f;
+ g = (value >> 6) & 0x1f;
+ r = value >> 11;
+ dst[0] = (uint8_t)(((uint32_t)r) * 0xff / 0x1f); /* r */
+ dst[1] = (uint8_t)(((uint32_t)g) * 0xff / 0x1f); /* g */
+ dst[2] = (uint8_t)(((uint32_t)b) * 0xff / 0x1f); /* b */
+ dst[3] = (uint8_t)(((uint32_t)a) * 0xff / 0x1); /* a */
+#endif
+ src += 2;
+ dst += 4;
+ }
+ src_row += src_stride;
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+static inline void
+util_format_a1b5g5r5_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ const uint8_t *src = src_row;
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint16_t value = 0;
+ value |= ((uint16_t)(src[0] >> 3)) << 11;
+ value |= (((uint16_t)(src[1] >> 3)) & 0x1f) << 6;
+ value |= (((uint16_t)(src[2] >> 3)) & 0x1f) << 1;
+ value |= ((uint16_t)(src[3] >> 7)) & 0x1;
+ *(uint16_t *)dst = value;
+#else
+ uint16_t value = 0;
+ value |= ((uint16_t)(src[3] >> 7)) & 0x1;
+ value |= (((uint16_t)(src[2] >> 3)) & 0x1f) << 1;
+ value |= (((uint16_t)(src[1] >> 3)) & 0x1f) << 6;
+ value |= ((uint16_t)(src[0] >> 3)) << 11;
+ *(uint16_t *)dst = value;
+#endif
+ src += 4;
+ dst += 2;
+ }
+ dst_row += dst_stride;
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
union util_format_b4g4r4a4_unorm {
uint16_t value;
struct {
@@ -2671,6 +3061,193 @@
}
}
+union util_format_r10g10b10x2_unorm {
+ uint32_t value;
+ struct {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ unsigned x:2;
+ unsigned b:10;
+ unsigned g:10;
+ unsigned r:10;
+#else
+ unsigned r:10;
+ unsigned g:10;
+ unsigned b:10;
+ unsigned x:2;
+#endif
+ } chan;
+};
+
+static inline void
+util_format_r10g10b10x2_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ float *dst = dst_row;
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint32_t value = *(const uint32_t *)src;
+ uint32_t b;
+ uint32_t g;
+ uint32_t r;
+ b = (value >> 20) & 0x3ff;
+ g = (value >> 10) & 0x3ff;
+ r = (value) & 0x3ff;
+ dst[0] = (float)(r * (1.0f/0x3ff)); /* r */
+ dst[1] = (float)(g * (1.0f/0x3ff)); /* g */
+ dst[2] = (float)(b * (1.0f/0x3ff)); /* b */
+ dst[3] = 1; /* a */
+#else
+ uint32_t value = *(const uint32_t *)src;
+ uint32_t r;
+ uint32_t g;
+ uint32_t b;
+ r = (value) & 0x3ff;
+ g = (value >> 10) & 0x3ff;
+ b = (value >> 20) & 0x3ff;
+ dst[0] = (float)(r * (1.0f/0x3ff)); /* r */
+ dst[1] = (float)(g * (1.0f/0x3ff)); /* g */
+ dst[2] = (float)(b * (1.0f/0x3ff)); /* b */
+ dst[3] = 1; /* a */
+#endif
+ src += 4;
+ dst += 4;
+ }
+ src_row += src_stride;
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+static inline void
+util_format_r10g10b10x2_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ const float *src = src_row;
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint32_t value = 0;
+ value |= (((uint32_t)util_iround(CLAMP(src[2], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 20;
+ value |= (((uint32_t)util_iround(CLAMP(src[1], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 10;
+ value |= ((uint32_t)util_iround(CLAMP(src[0], 0.0f, 1.0f) * 0x3ff)) & 0x3ff;
+ *(uint32_t *)dst = value;
+#else
+ uint32_t value = 0;
+ value |= ((uint32_t)util_iround(CLAMP(src[0], 0.0f, 1.0f) * 0x3ff)) & 0x3ff;
+ value |= (((uint32_t)util_iround(CLAMP(src[1], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 10;
+ value |= (((uint32_t)util_iround(CLAMP(src[2], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 20;
+ *(uint32_t *)dst = value;
+#endif
+ src += 4;
+ dst += 4;
+ }
+ dst_row += dst_stride;
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
+static inline void
+util_format_r10g10b10x2_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint32_t value = *(const uint32_t *)src;
+ uint32_t b;
+ uint32_t g;
+ uint32_t r;
+ b = (value >> 20) & 0x3ff;
+ g = (value >> 10) & 0x3ff;
+ r = (value) & 0x3ff;
+ dst[0] = (float)(r * (1.0f/0x3ff)); /* r */
+ dst[1] = (float)(g * (1.0f/0x3ff)); /* g */
+ dst[2] = (float)(b * (1.0f/0x3ff)); /* b */
+ dst[3] = 1; /* a */
+#else
+ uint32_t value = *(const uint32_t *)src;
+ uint32_t r;
+ uint32_t g;
+ uint32_t b;
+ r = (value) & 0x3ff;
+ g = (value >> 10) & 0x3ff;
+ b = (value >> 20) & 0x3ff;
+ dst[0] = (float)(r * (1.0f/0x3ff)); /* r */
+ dst[1] = (float)(g * (1.0f/0x3ff)); /* g */
+ dst[2] = (float)(b * (1.0f/0x3ff)); /* b */
+ dst[3] = 1; /* a */
+#endif
+}
+
+static inline void
+util_format_r10g10b10x2_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ uint8_t *dst = dst_row;
+ const uint8_t *src = src_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint32_t value = *(const uint32_t *)src;
+ uint32_t b;
+ uint32_t g;
+ uint32_t r;
+ b = (value >> 20) & 0x3ff;
+ g = (value >> 10) & 0x3ff;
+ r = (value) & 0x3ff;
+ dst[0] = (uint8_t)(r >> 2); /* r */
+ dst[1] = (uint8_t)(g >> 2); /* g */
+ dst[2] = (uint8_t)(b >> 2); /* b */
+ dst[3] = 255; /* a */
+#else
+ uint32_t value = *(const uint32_t *)src;
+ uint32_t r;
+ uint32_t g;
+ uint32_t b;
+ r = (value) & 0x3ff;
+ g = (value >> 10) & 0x3ff;
+ b = (value >> 20) & 0x3ff;
+ dst[0] = (uint8_t)(r >> 2); /* r */
+ dst[1] = (uint8_t)(g >> 2); /* g */
+ dst[2] = (uint8_t)(b >> 2); /* b */
+ dst[3] = 255; /* a */
+#endif
+ src += 4;
+ dst += 4;
+ }
+ src_row += src_stride;
+ dst_row += dst_stride/sizeof(*dst_row);
+ }
+}
+
+static inline void
+util_format_r10g10b10x2_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ unsigned x, y;
+ for(y = 0; y < height; y += 1) {
+ const uint8_t *src = src_row;
+ uint8_t *dst = dst_row;
+ for(x = 0; x < width; x += 1) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ uint32_t value = 0;
+ value |= (((uint32_t)(((uint32_t)src[2]) * 0x3ff / 0xff)) & 0x3ff) << 20;
+ value |= (((uint32_t)(((uint32_t)src[1]) * 0x3ff / 0xff)) & 0x3ff) << 10;
+ value |= ((uint32_t)(((uint32_t)src[0]) * 0x3ff / 0xff)) & 0x3ff;
+ *(uint32_t *)dst = value;
+#else
+ uint32_t value = 0;
+ value |= ((uint32_t)(((uint32_t)src[0]) * 0x3ff / 0xff)) & 0x3ff;
+ value |= (((uint32_t)(((uint32_t)src[1]) * 0x3ff / 0xff)) & 0x3ff) << 10;
+ value |= (((uint32_t)(((uint32_t)src[2]) * 0x3ff / 0xff)) & 0x3ff) << 20;
+ *(uint32_t *)dst = value;
+#endif
+ src += 4;
+ dst += 4;
+ }
+ dst_row += dst_stride;
+ src_row += src_stride/sizeof(*src_row);
+ }
+}
+
union util_format_b10g10r10a2_unorm {
uint32_t value;
struct {
@@ -33273,6 +33850,130 @@
};
const struct util_format_description
+util_format_x1b5g5r5_unorm_description = {
+ PIPE_FORMAT_X1B5G5R5_UNORM,
+ "PIPE_FORMAT_X1B5G5R5_UNORM",
+ "x1b5g5r5_unorm",
+ {1, 1, 16}, /* block */
+ UTIL_FORMAT_LAYOUT_PLAIN,
+ 4, /* nr_channels */
+ FALSE, /* is_array */
+ TRUE, /* is_bitmask */
+ FALSE, /* is_mixed */
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ {
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 11}, /* x = r */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 6}, /* y = g */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 1}, /* z = b */
+ {UTIL_FORMAT_TYPE_VOID, FALSE, FALSE, 1, 0} /* w = x */
+ },
+#else
+ {
+ {UTIL_FORMAT_TYPE_VOID, FALSE, FALSE, 1, 0}, /* x = x */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 1}, /* y = b */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 6}, /* z = g */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 11} /* w = r */
+ },
+#endif
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ {
+ PIPE_SWIZZLE_X, /* r */
+ PIPE_SWIZZLE_Y, /* g */
+ PIPE_SWIZZLE_Z, /* b */
+ PIPE_SWIZZLE_1 /* a */
+ },
+#else
+ {
+ PIPE_SWIZZLE_W, /* r */
+ PIPE_SWIZZLE_Z, /* g */
+ PIPE_SWIZZLE_Y, /* b */
+ PIPE_SWIZZLE_1 /* a */
+ },
+#endif
+ UTIL_FORMAT_COLORSPACE_RGB,
+ &util_format_x1b5g5r5_unorm_unpack_rgba_8unorm,
+ &util_format_x1b5g5r5_unorm_pack_rgba_8unorm,
+ NULL, /* fetch_rgba_8unorm */
+ &util_format_x1b5g5r5_unorm_unpack_rgba_float,
+ &util_format_x1b5g5r5_unorm_pack_rgba_float,
+ &util_format_x1b5g5r5_unorm_fetch_rgba_float,
+ NULL, /* unpack_z_32unorm */
+ NULL, /* pack_z_32unorm */
+ NULL, /* unpack_z_float */
+ NULL, /* pack_z_float */
+ NULL, /* unpack_s_8uint */
+ NULL, /* pack_s_8uint */
+ NULL, /* unpack_rgba_uint */
+ NULL, /* pack_rgba_uint */
+ NULL, /* unpack_rgba_sint */
+ NULL, /* pack_rgba_sint */
+ NULL, /* fetch_rgba_uint */
+ NULL /* fetch_rgba_sint */
+};
+
+const struct util_format_description
+util_format_a1b5g5r5_unorm_description = {
+ PIPE_FORMAT_A1B5G5R5_UNORM,
+ "PIPE_FORMAT_A1B5G5R5_UNORM",
+ "a1b5g5r5_unorm",
+ {1, 1, 16}, /* block */
+ UTIL_FORMAT_LAYOUT_PLAIN,
+ 4, /* nr_channels */
+ FALSE, /* is_array */
+ TRUE, /* is_bitmask */
+ FALSE, /* is_mixed */
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ {
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 11}, /* x = r */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 6}, /* y = g */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 1}, /* z = b */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 1, 0} /* w = a */
+ },
+#else
+ {
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 1, 0}, /* x = a */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 1}, /* y = b */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 6}, /* z = g */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 5, 11} /* w = r */
+ },
+#endif
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ {
+ PIPE_SWIZZLE_X, /* r */
+ PIPE_SWIZZLE_Y, /* g */
+ PIPE_SWIZZLE_Z, /* b */
+ PIPE_SWIZZLE_W /* a */
+ },
+#else
+ {
+ PIPE_SWIZZLE_W, /* r */
+ PIPE_SWIZZLE_Z, /* g */
+ PIPE_SWIZZLE_Y, /* b */
+ PIPE_SWIZZLE_X /* a */
+ },
+#endif
+ UTIL_FORMAT_COLORSPACE_RGB,
+ &util_format_a1b5g5r5_unorm_unpack_rgba_8unorm,
+ &util_format_a1b5g5r5_unorm_pack_rgba_8unorm,
+ NULL, /* fetch_rgba_8unorm */
+ &util_format_a1b5g5r5_unorm_unpack_rgba_float,
+ &util_format_a1b5g5r5_unorm_pack_rgba_float,
+ &util_format_a1b5g5r5_unorm_fetch_rgba_float,
+ NULL, /* unpack_z_32unorm */
+ NULL, /* pack_z_32unorm */
+ NULL, /* unpack_z_float */
+ NULL, /* pack_z_float */
+ NULL, /* unpack_s_8uint */
+ NULL, /* pack_s_8uint */
+ NULL, /* unpack_rgba_uint */
+ NULL, /* pack_rgba_uint */
+ NULL, /* unpack_rgba_sint */
+ NULL, /* pack_rgba_sint */
+ NULL, /* fetch_rgba_uint */
+ NULL /* fetch_rgba_sint */
+};
+
+const struct util_format_description
util_format_b4g4r4a4_unorm_description = {
PIPE_FORMAT_B4G4R4A4_UNORM,
"PIPE_FORMAT_B4G4R4A4_UNORM",
@@ -33521,6 +34222,68 @@
};
const struct util_format_description
+util_format_r10g10b10x2_unorm_description = {
+ PIPE_FORMAT_R10G10B10X2_UNORM,
+ "PIPE_FORMAT_R10G10B10X2_UNORM",
+ "r10g10b10x2_unorm",
+ {1, 1, 32}, /* block */
+ UTIL_FORMAT_LAYOUT_PLAIN,
+ 4, /* nr_channels */
+ FALSE, /* is_array */
+ TRUE, /* is_bitmask */
+ FALSE, /* is_mixed */
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ {
+ {UTIL_FORMAT_TYPE_VOID, FALSE, FALSE, 2, 30}, /* x = x */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 10, 20}, /* y = b */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 10, 10}, /* z = g */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 10, 0} /* w = r */
+ },
+#else
+ {
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 10, 0}, /* x = r */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 10, 10}, /* y = g */
+ {UTIL_FORMAT_TYPE_UNSIGNED, TRUE, FALSE, 10, 20}, /* z = b */
+ {UTIL_FORMAT_TYPE_VOID, FALSE, FALSE, 2, 30} /* w = x */
+ },
+#endif
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ {
+ PIPE_SWIZZLE_W, /* r */
+ PIPE_SWIZZLE_Z, /* g */
+ PIPE_SWIZZLE_Y, /* b */
+ PIPE_SWIZZLE_1 /* a */
+ },
+#else
+ {
+ PIPE_SWIZZLE_X, /* r */
+ PIPE_SWIZZLE_Y, /* g */
+ PIPE_SWIZZLE_Z, /* b */
+ PIPE_SWIZZLE_1 /* a */
+ },
+#endif
+ UTIL_FORMAT_COLORSPACE_RGB,
+ &util_format_r10g10b10x2_unorm_unpack_rgba_8unorm,
+ &util_format_r10g10b10x2_unorm_pack_rgba_8unorm,
+ NULL, /* fetch_rgba_8unorm */
+ &util_format_r10g10b10x2_unorm_unpack_rgba_float,
+ &util_format_r10g10b10x2_unorm_pack_rgba_float,
+ &util_format_r10g10b10x2_unorm_fetch_rgba_float,
+ NULL, /* unpack_z_32unorm */
+ NULL, /* pack_z_32unorm */
+ NULL, /* unpack_z_float */
+ NULL, /* pack_z_float */
+ NULL, /* unpack_s_8uint */
+ NULL, /* pack_s_8uint */
+ NULL, /* unpack_rgba_uint */
+ NULL, /* pack_rgba_uint */
+ NULL, /* unpack_rgba_sint */
+ NULL, /* pack_rgba_sint */
+ NULL, /* fetch_rgba_uint */
+ NULL /* fetch_rgba_sint */
+};
+
+const struct util_format_description
util_format_b10g10r10a2_unorm_description = {
PIPE_FORMAT_B10G10R10A2_UNORM,
"PIPE_FORMAT_B10G10R10A2_UNORM",
@@ -49046,6 +49809,10 @@
return &util_format_b5g5r5x1_unorm_description;
case PIPE_FORMAT_B5G5R5A1_UNORM:
return &util_format_b5g5r5a1_unorm_description;
+ case PIPE_FORMAT_X1B5G5R5_UNORM:
+ return &util_format_x1b5g5r5_unorm_description;
+ case PIPE_FORMAT_A1B5G5R5_UNORM:
+ return &util_format_a1b5g5r5_unorm_description;
case PIPE_FORMAT_B4G4R4A4_UNORM:
return &util_format_b4g4r4a4_unorm_description;
case PIPE_FORMAT_B4G4R4X4_UNORM:
@@ -49054,6 +49821,8 @@
return &util_format_b5g6r5_unorm_description;
case PIPE_FORMAT_R10G10B10A2_UNORM:
return &util_format_r10g10b10a2_unorm_description;
+ case PIPE_FORMAT_R10G10B10X2_UNORM:
+ return &util_format_r10g10b10x2_unorm_description;
case PIPE_FORMAT_B10G10R10A2_UNORM:
return &util_format_b10g10r10a2_unorm_description;
case PIPE_FORMAT_B2G3R3_UNORM:
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_format_tests.c mesa-17.3.3/src/gallium/auxiliary/util/u_format_tests.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_format_tests.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_format_tests.c 2018-01-18 21:30:28.000000000 +0000
@@ -140,6 +140,12 @@
{PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xc0000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
{PIPE_FORMAT_R10G10B10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0xffffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3ff00000), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_R10G10B10X2_UNORM, PACKED_1x32(0x3fffffff), PACKED_1x32(0x3fffffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
{PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x00000000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
{PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000003ff), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
{PIPE_FORMAT_B10G10R10A2_UNORM, PACKED_1x32(0xffffffff), PACKED_1x32(0x000ffc00), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
@@ -164,6 +170,19 @@
{PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x8000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
{PIPE_FORMAT_B5G5R5A1_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x003e), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0x07c0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0xf800), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_X1B5G5R5_UNORM, PACKED_1x16(0xfffe), PACKED_1x16(0xfffe), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x003e), UNPACKED_1x1(0.0, 0.0, 1.0, 0.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x07c0), UNPACKED_1x1(0.0, 1.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xf800), UNPACKED_1x1(1.0, 0.0, 0.0, 0.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0x0001), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+ {PIPE_FORMAT_A1B5G5R5_UNORM, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
+
{PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
{PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x000f), UNPACKED_1x1(0.0, 0.0, 1.0, 1.0)},
{PIPE_FORMAT_B4G4R4X4_UNORM, PACKED_1x16(0x0fff), PACKED_1x16(0x00f0), UNPACKED_1x1(0.0, 1.0, 0.0, 1.0)},
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_idalloc.c mesa-17.3.3/src/gallium/auxiliary/util/u_idalloc.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_idalloc.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_idalloc.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,96 @@
+/**************************************************************************
+ *
+ * Copyright 2017 Valve Corporation
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * A simple allocator that allocates and release "numbers".
+ *
+ * @author Samuel Pitoiset
+ */
+
+#include "util/u_idalloc.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+void
+util_idalloc_init(struct util_idalloc *buf)
+{
+ memset(buf, 0, sizeof(*buf));
+}
+
+void
+util_idalloc_fini(struct util_idalloc *buf)
+{
+ if (buf->data)
+ free(buf->data);
+}
+
+void
+util_idalloc_resize(struct util_idalloc *buf, unsigned new_num_elements)
+{
+ new_num_elements = align(new_num_elements, 32);
+
+ if (new_num_elements > buf->num_elements) {
+ unsigned i;
+
+ buf->data = realloc(buf->data,
+ (new_num_elements / 32) * sizeof(*buf->data));
+
+ for (i = buf->num_elements / 32; i < new_num_elements / 32; i++)
+ buf->data[i] = 0;
+ buf->num_elements = new_num_elements;
+ }
+}
+
+unsigned
+util_idalloc_alloc(struct util_idalloc *buf)
+{
+ unsigned num_elements = buf->num_elements;
+
+ for (unsigned i = 0; i < num_elements / 32; i++) {
+ if (buf->data[i] == 0xffffffff)
+ continue;
+
+ unsigned bit = ffs(~buf->data[i]) - 1;
+ buf->data[i] |= 1u << bit;
+ return i * 32 + bit;
+ }
+
+ /* No slots available, resize and return the first free. */
+ util_idalloc_resize(buf, num_elements * 2);
+
+ buf->data[num_elements / 32] |= 1 << (num_elements % 32);
+
+ return num_elements;
+}
+
+void
+util_idalloc_free(struct util_idalloc *buf, unsigned id)
+{
+ assert(id < buf->num_elements);
+ buf->data[id / 32] &= ~(1 << (id % 32));
+}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_idalloc.h mesa-17.3.3/src/gallium/auxiliary/util/u_idalloc.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_idalloc.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_idalloc.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,62 @@
+/**************************************************************************
+ *
+ * Copyright 2017 Valve Corporation
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_IDALLOC_H
+#define U_IDALLOC_H
+
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct util_idalloc
+{
+ uint32_t *data;
+ unsigned num_elements;
+};
+
+void
+util_idalloc_init(struct util_idalloc *buf);
+
+void
+util_idalloc_fini(struct util_idalloc *buf);
+
+void
+util_idalloc_resize(struct util_idalloc *buf, unsigned new_num_elements);
+
+unsigned
+util_idalloc_alloc(struct util_idalloc *buf);
+
+void
+util_idalloc_free(struct util_idalloc *buf, unsigned id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_IDALLOC_H */
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_inlines.h mesa-17.3.3/src/gallium/auxiliary/util/u_inlines.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_inlines.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_inlines.h 2018-01-18 21:30:28.000000000 +0000
@@ -536,7 +536,9 @@
{
switch (type) {
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
case PIPE_QUERY_GPU_FINISHED:
result->b = FALSE;
break;
@@ -562,7 +564,7 @@
}
/** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */
-static inline unsigned
+static inline enum tgsi_texture_type
util_pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target,
unsigned nr_samples)
{
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_keymap.c mesa-17.3.3/src/gallium/auxiliary/util/u_keymap.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_keymap.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_keymap.c 1970-01-01 00:00:00.000000000 +0000
@@ -1,318 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Key lookup/associative container.
- *
- * Like Jose's util_hash_table, based on CSO cache code for now.
- *
- * Author: Brian Paul
- */
-
-
-#include "pipe/p_compiler.h"
-#include "util/u_debug.h"
-
-#include "cso_cache/cso_hash.h"
-
-#include "util/u_memory.h"
-#include "util/u_keymap.h"
-
-
-struct keymap
-{
- struct cso_hash *cso;
- unsigned key_size;
- unsigned max_entries; /* XXX not obeyed net */
- unsigned num_entries;
- keymap_delete_func delete_func;
-};
-
-
-struct keymap_item
-{
- void *key, *value;
-};
-
-
-/**
- * This the default key-delete function used when the client doesn't
- * provide one.
- */
-static void
-default_delete_func(const struct keymap *map,
- const void *key, void *data, void *user)
-{
- FREE((void*) data);
-}
-
-
-static inline struct keymap_item *
-hash_table_item(struct cso_hash_iter iter)
-{
- return (struct keymap_item *) cso_hash_iter_data(iter);
-}
-
-
-/**
- * Return 4-byte hash key for a block of bytes.
- */
-static unsigned
-hash(const void *key, unsigned keySize)
-{
- unsigned i, hash;
-
- keySize /= 4; /* convert from bytes to uints */
-
- hash = 0;
- for (i = 0; i < keySize; i++) {
- hash ^= (i + 1) * ((const unsigned *) key)[i];
- }
-
- /*hash = hash ^ (hash >> 11) ^ (hash >> 22);*/
-
- return hash;
-}
-
-
-/**
- * Create a new map.
- * \param keySize size of the keys in bytes
- * \param maxEntries max number of entries to allow (~0 = infinity)
- * \param deleteFunc optional callback to call when entries
- * are deleted/replaced
- */
-struct keymap *
-util_new_keymap(unsigned keySize, unsigned maxEntries,
- keymap_delete_func deleteFunc)
-{
- struct keymap *map = MALLOC_STRUCT(keymap);
- if (!map)
- return NULL;
-
- map->cso = cso_hash_create();
- if (!map->cso) {
- FREE(map);
- return NULL;
- }
-
- map->max_entries = maxEntries;
- map->num_entries = 0;
- map->key_size = keySize;
- map->delete_func = deleteFunc ? deleteFunc : default_delete_func;
-
- return map;
-}
-
-
-/**
- * Delete/free a keymap and all entries. The deleteFunc that was given at
- * create time will be called for each entry.
- * \param user user-provided pointer passed through to the delete callback
- */
-void
-util_delete_keymap(struct keymap *map, void *user)
-{
- util_keymap_remove_all(map, user);
- cso_hash_delete(map->cso);
- FREE(map);
-}
-
-
-static inline struct cso_hash_iter
-hash_table_find_iter(const struct keymap *map, const void *key,
- unsigned key_hash)
-{
- struct cso_hash_iter iter;
- struct keymap_item *item;
-
- iter = cso_hash_find(map->cso, key_hash);
- while (!cso_hash_iter_is_null(iter)) {
- item = (struct keymap_item *) cso_hash_iter_data(iter);
- if (!memcmp(item->key, key, map->key_size))
- break;
- iter = cso_hash_iter_next(iter);
- }
-
- return iter;
-}
-
-
-static inline struct keymap_item *
-hash_table_find_item(const struct keymap *map, const void *key,
- unsigned key_hash)
-{
- struct cso_hash_iter iter = hash_table_find_iter(map, key, key_hash);
- if (cso_hash_iter_is_null(iter)) {
- return NULL;
- }
- else {
- return hash_table_item(iter);
- }
-}
-
-
-/**
- * Insert a new key + data pointer into the table.
- * Note: we create a copy of the key, but not the data!
- * If the key is already present in the table, replace the existing
- * entry (calling the delete callback on the previous entry).
- * If the maximum capacity of the map is reached an old entry
- * will be deleted (the delete callback will be called).
- */
-boolean
-util_keymap_insert(struct keymap *map, const void *key,
- const void *data, void *user)
-{
- unsigned key_hash;
- struct keymap_item *item;
- struct cso_hash_iter iter;
-
- assert(map);
- if (!map)
- return FALSE;
-
- key_hash = hash(key, map->key_size);
-
- item = hash_table_find_item(map, key, key_hash);
- if (item) {
- /* call delete callback for old entry/item */
- map->delete_func(map, item->key, item->value, user);
- item->value = (void *) data;
- return TRUE;
- }
-
- item = MALLOC_STRUCT(keymap_item);
- if (!item)
- return FALSE;
-
- item->key = mem_dup(key, map->key_size);
- item->value = (void *) data;
-
- iter = cso_hash_insert(map->cso, key_hash, item);
- if (cso_hash_iter_is_null(iter)) {
- FREE(item);
- return FALSE;
- }
-
- map->num_entries++;
-
- return TRUE;
-}
-
-
-/**
- * Look up a key in the map and return the associated data pointer.
- */
-const void *
-util_keymap_lookup(const struct keymap *map, const void *key)
-{
- unsigned key_hash;
- struct keymap_item *item;
-
- assert(map);
- if (!map)
- return NULL;
-
- key_hash = hash(key, map->key_size);
-
- item = hash_table_find_item(map, key, key_hash);
- if (!item)
- return NULL;
-
- return item->value;
-}
-
-
-/**
- * Remove an entry from the map.
- * The delete callback will be called if the given key/entry is found.
- * \param user passed to the delete callback as the last param.
- */
-void
-util_keymap_remove(struct keymap *map, const void *key, void *user)
-{
- unsigned key_hash;
- struct cso_hash_iter iter;
- struct keymap_item *item;
-
- assert(map);
- if (!map)
- return;
-
- key_hash = hash(key, map->key_size);
-
- iter = hash_table_find_iter(map, key, key_hash);
- if (cso_hash_iter_is_null(iter))
- return;
-
- item = hash_table_item(iter);
- assert(item);
- if (!item)
- return;
- map->delete_func(map, item->key, item->value, user);
- FREE(item->key);
- FREE(item);
-
- map->num_entries--;
-
- cso_hash_erase(map->cso, iter);
-}
-
-
-/**
- * Remove all entries from the map, calling the delete callback for each.
- * \param user passed to the delete callback as the last param.
- */
-void
-util_keymap_remove_all(struct keymap *map, void *user)
-{
- struct cso_hash_iter iter;
- struct keymap_item *item;
-
- assert(map);
- if (!map)
- return;
-
- iter = cso_hash_first_node(map->cso);
- while (!cso_hash_iter_is_null(iter)) {
- item = (struct keymap_item *)
- cso_hash_take(map->cso, cso_hash_iter_key(iter));
- map->delete_func(map, item->key, item->value, user);
- FREE(item->key);
- FREE(item);
- iter = cso_hash_first_node(map->cso);
- }
-}
-
-
-extern void
-util_keymap_info(const struct keymap *map)
-{
- debug_printf("Keymap %p: %u of max %u entries\n",
- (void *) map, map->num_entries, map->max_entries);
-}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_keymap.h mesa-17.3.3/src/gallium/auxiliary/util/u_keymap.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_keymap.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_keymap.h 1970-01-01 00:00:00.000000000 +0000
@@ -1,68 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef U_KEYMAP_H
-#define U_KEYMAP_H
-
-#include "pipe/p_compiler.h"
-
-
-/** opaque keymap type */
-struct keymap;
-
-
-/** Delete/callback function type */
-typedef void (*keymap_delete_func)(const struct keymap *map,
- const void *key, void *data,
- void *user);
-
-
-extern struct keymap *
-util_new_keymap(unsigned keySize, unsigned maxEntries,
- keymap_delete_func deleteFunc);
-
-extern void
-util_delete_keymap(struct keymap *map, void *user);
-
-extern boolean
-util_keymap_insert(struct keymap *map, const void *key,
- const void *data, void *user);
-
-extern const void *
-util_keymap_lookup(const struct keymap *map, const void *key);
-
-extern void
-util_keymap_remove(struct keymap *map, const void *key, void *user);
-
-extern void
-util_keymap_remove_all(struct keymap *map, void *user);
-
-extern void
-util_keymap_info(const struct keymap *map);
-
-
-#endif /* U_KEYMAP_H */
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_log.c mesa-17.3.3/src/gallium/auxiliary/util/u_log.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_log.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_log.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "u_log.h"
+
+#include "u_memory.h"
+#include "util/u_string.h"
+
+struct page_entry {
+ const struct u_log_chunk_type *type;
+ void *data;
+};
+
+struct u_log_page {
+ struct page_entry *entries;
+ unsigned num_entries;
+ unsigned max_entries;
+};
+
+struct u_log_auto_logger {
+ u_auto_log_fn *callback;
+ void *data;
+};
+
+/**
+ * Initialize the given logging context.
+ */
+void
+u_log_context_init(struct u_log_context *ctx)
+{
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/**
+ * Free all resources associated with the given logging context.
+ *
+ * Pages taken from the context via \ref u_log_new_page must be destroyed
+ * separately.
+ */
+void
+u_log_context_destroy(struct u_log_context *ctx)
+{
+ u_log_page_destroy(ctx->cur);
+ FREE(ctx->auto_loggers);
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/**
+ * Add an auto logger.
+ *
+ * Auto loggers are called each time a chunk is added to the log.
+ */
+void
+u_log_add_auto_logger(struct u_log_context *ctx, u_auto_log_fn *callback,
+ void *data)
+{
+ struct u_log_auto_logger *new_auto_loggers =
+ REALLOC(ctx->auto_loggers,
+ sizeof(*new_auto_loggers) * ctx->num_auto_loggers,
+ sizeof(*new_auto_loggers) * (ctx->num_auto_loggers + 1));
+ if (!new_auto_loggers) {
+ fprintf(stderr, "Gallium u_log: out of memory\n");
+ return;
+ }
+
+ unsigned idx = ctx->num_auto_loggers++;
+ ctx->auto_loggers = new_auto_loggers;
+ ctx->auto_loggers[idx].callback = callback;
+ ctx->auto_loggers[idx].data = data;
+}
+
+/**
+ * Make sure that auto loggers have run.
+ */
+void
+u_log_flush(struct u_log_context *ctx)
+{
+ if (!ctx->num_auto_loggers)
+ return;
+
+ struct u_log_auto_logger *auto_loggers = ctx->auto_loggers;
+ unsigned num_auto_loggers = ctx->num_auto_loggers;
+
+ /* Prevent recursion. */
+ ctx->num_auto_loggers = 0;
+ ctx->auto_loggers = NULL;
+
+ for (unsigned i = 0; i < num_auto_loggers; ++i)
+ auto_loggers[i].callback(auto_loggers[i].data, ctx);
+
+ assert(!ctx->num_auto_loggers);
+ ctx->num_auto_loggers = num_auto_loggers;
+ ctx->auto_loggers = auto_loggers;
+}
+
+static void str_print(void *data, FILE *stream)
+{
+ fputs((char *)data, stream);
+}
+
+static const struct u_log_chunk_type str_chunk_type = {
+ .destroy = free,
+ .print = str_print,
+};
+
+void
+u_log_printf(struct u_log_context *ctx, const char *fmt, ...)
+{
+ va_list va;
+ char *str = NULL;
+
+ va_start(va, fmt);
+ int ret = util_vasprintf(&str, fmt, va);
+ va_end(va);
+
+ if (ret >= 0) {
+ u_log_chunk(ctx, &str_chunk_type, str);
+ } else {
+ fprintf(stderr, "Gallium u_log_printf: out of memory\n");
+ }
+}
+
+/**
+ * Add a custom chunk to the log.
+ *
+ * type->destroy will be called as soon as \p data is no longer needed.
+ */
+void
+u_log_chunk(struct u_log_context *ctx, const struct u_log_chunk_type *type,
+ void *data)
+{
+ struct u_log_page *page = ctx->cur;
+
+ u_log_flush(ctx);
+
+ if (!page) {
+ ctx->cur = CALLOC_STRUCT(u_log_page);
+ page = ctx->cur;
+ if (!page)
+ goto out_of_memory;
+ }
+
+ if (page->num_entries >= page->max_entries) {
+ unsigned new_max_entries = MAX2(16, page->num_entries * 2);
+ struct page_entry *new_entries = REALLOC(page->entries,
+ page->max_entries * sizeof(*page->entries),
+ new_max_entries * sizeof(*page->entries));
+ if (!new_entries)
+ goto out_of_memory;
+
+ page->entries = new_entries;
+ page->max_entries = new_max_entries;
+ }
+
+ page->entries[page->num_entries].type = type;
+ page->entries[page->num_entries].data = data;
+ page->num_entries++;
+ return;
+
+out_of_memory:
+ fprintf(stderr, "Gallium: u_log: out of memory\n");
+}
+
+/**
+ * Convenience helper that starts a new page and prints the previous one.
+ */
+void
+u_log_new_page_print(struct u_log_context *ctx, FILE *stream)
+{
+ if (ctx->cur) {
+ u_log_page_print(ctx->cur, stream);
+ u_log_page_destroy(ctx->cur);
+ ctx->cur = NULL;
+ }
+}
+
+/**
+ * Return the current page from the logging context and start a new one.
+ *
+ * The caller is responsible for destroying the returned page.
+ */
+struct u_log_page *
+u_log_new_page(struct u_log_context *ctx)
+{
+ struct u_log_page *page = ctx->cur;
+ ctx->cur = NULL;
+ return page;
+}
+
+/**
+ * Free all data associated with \p page.
+ */
+void
+u_log_page_destroy(struct u_log_page *page)
+{
+ if (!page)
+ return;
+
+ for (unsigned i = 0; i < page->num_entries; ++i) {
+ if (page->entries[i].type->destroy)
+ page->entries[i].type->destroy(page->entries[i].data);
+ }
+ FREE(page->entries);
+ FREE(page);
+}
+
+/**
+ * Print the given page to \p stream.
+ */
+void
+u_log_page_print(struct u_log_page *page, FILE *stream)
+{
+ for (unsigned i = 0; i < page->num_entries; ++i)
+ page->entries[i].type->print(page->entries[i].data, stream);
+}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_log.h mesa-17.3.3/src/gallium/auxiliary/util/u_log.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_log.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_log.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file u_log.h
+ * @brief Context logging facilities
+ *
+ * Provides a means of logging context events (draw calls, command streams, ...)
+ * into files.
+ *
+ * Log entries start their life cycle as "chunks". Chunks can be plain text
+ * written by \ref u_log_printf or custom internal representations added by
+ * \ref u_log_chunk that are only converted to text on-demand (e.g. for higher
+ * performance pipelined hang-debugging).
+ *
+ * Chunks are accumulated into "pages". The manager of the log can periodically
+ * take out the current page using \ref u_log_new_page and dump it to a file.
+ *
+ * Furthermore, "auto loggers" can be added to a context, which are callbacks
+ * that are given the opportunity to add their own logging each time a chunk is
+ * added. Drivers can use this to lazily log chunks of their command stream.
+ * Lazy loggers don't need to be re-entrant.
+ */
+
+#ifndef U_LOG_H
+#define U_LOG_H
+
+#include
+
+#include "u_debug.h"
+
+struct u_log_page;
+struct u_log_auto_logger;
+
+struct u_log_chunk_type {
+ void (*destroy)(void *data);
+ void (*print)(void *data, FILE *stream);
+};
+
+struct u_log_context {
+ struct u_log_page *cur;
+ struct u_log_auto_logger *auto_loggers;
+ unsigned num_auto_loggers;
+};
+
+typedef void (u_auto_log_fn)(void *data, struct u_log_context *ctx);
+
+void
+u_log_context_init(struct u_log_context *ctx);
+
+void
+u_log_context_destroy(struct u_log_context *ctx);
+
+void
+u_log_add_auto_logger(struct u_log_context *ctx, u_auto_log_fn *callback,
+ void *data);
+
+void
+u_log_flush(struct u_log_context *ctx);
+
+void
+u_log_printf(struct u_log_context *ctx, const char *fmt, ...) _util_printf_format(2,3);
+
+void
+u_log_chunk(struct u_log_context *ctx, const struct u_log_chunk_type *type,
+ void *data);
+
+void
+u_log_new_page_print(struct u_log_context *ctx, FILE *stream);
+
+struct u_log_page *
+u_log_new_page(struct u_log_context *ctx);
+
+void
+u_log_page_destroy(struct u_log_page *page);
+
+void
+u_log_page_print(struct u_log_page *page, FILE *stream);
+
+#endif /* U_LOG_H */
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_mm.c mesa-17.3.3/src/gallium/auxiliary/util/u_mm.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_mm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_mm.c 2018-01-18 21:30:28.000000000 +0000
@@ -183,7 +183,10 @@
assert(size >= 0);
assert(align2 >= 0);
- assert(align2 <= 12); /* sanity check, 2^12 (4KB) enough? */
+ /* Make sure that a byte alignment isn't getting passed for our
+ * power-of-two alignment arg.
+ */
+ assert(align2 < 32);
if (!heap || align2 < 0 || size <= 0)
return NULL;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_resource.c mesa-17.3.3/src/gallium/auxiliary/util/u_resource.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_resource.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_resource.c 2018-01-18 21:30:28.000000000 +0000
@@ -42,6 +42,7 @@
unsigned depth = res->depth0;
unsigned size = 0;
unsigned level;
+ unsigned samples = MAX2(1, res->nr_samples);
for (level = 0; level <= res->last_level; level++) {
unsigned slices;
@@ -54,7 +55,7 @@
slices = res->array_size;
size += (util_format_get_nblocksy(res->format, height) *
- util_format_get_stride(res->format, width) * slices);
+ util_format_get_stride(res->format, width) * slices * samples);
width = u_minify(width, 1);
height = u_minify(height, 1);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_simple_shaders.c mesa-17.3.3/src/gallium/auxiliary/util/u_simple_shaders.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_simple_shaders.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_simple_shaders.c 2018-01-18 21:30:28.000000000 +0000
@@ -66,7 +66,7 @@
return util_make_vertex_passthrough_shader_with_so(pipe, num_attribs,
semantic_names,
semantic_indexes,
- window_space, NULL);
+ window_space, false, NULL);
}
void *
@@ -74,7 +74,7 @@
uint num_attribs,
const uint *semantic_names,
const uint *semantic_indexes,
- bool window_space,
+ bool window_space, bool layered,
const struct pipe_stream_output_info *so)
{
struct ureg_program *ureg;
@@ -100,6 +100,15 @@
ureg_MOV( ureg, dst, src );
}
+ if (layered) {
+ struct ureg_src instance_id =
+ ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0);
+ struct ureg_dst layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+
+ ureg_MOV(ureg, ureg_writemask(layer, TGSI_WRITEMASK_X),
+ ureg_scalar(instance_id, TGSI_SWIZZLE_X));
+ }
+
ureg_END( ureg );
return ureg_create_shader_with_so_and_destroy( ureg, pipe, so );
@@ -108,28 +117,13 @@
void *util_make_layered_clear_vertex_shader(struct pipe_context *pipe)
{
- static const char text[] =
- "VERT\n"
- "DCL IN[0]\n"
- "DCL IN[1]\n"
- "DCL SV[0], INSTANCEID\n"
- "DCL OUT[0], POSITION\n"
- "DCL OUT[1], GENERIC[0]\n"
- "DCL OUT[2], LAYER\n"
-
- "MOV OUT[0], IN[0]\n"
- "MOV OUT[1], IN[1]\n"
- "MOV OUT[2].x, SV[0].xxxx\n"
- "END\n";
- struct tgsi_token tokens[1000];
- struct pipe_shader_state state;
-
- if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
- assert(0);
- return NULL;
- }
- pipe_shader_state_from_tgsi(&state, tokens);
- return pipe->create_vs_state(pipe, &state);
+ const unsigned semantic_names[] = {TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC};
+ const unsigned semantic_indices[] = {0, 0};
+
+ return util_make_vertex_passthrough_shader_with_so(pipe, 2, semantic_names,
+ semantic_indices, false,
+ true, NULL);
}
/**
@@ -594,7 +588,7 @@
static void *
util_make_fs_blit_msaa_gen(struct pipe_context *pipe,
- unsigned tgsi_tex,
+ enum tgsi_texture_type tgsi_tex,
const char *samp_type,
const char *output_semantic,
const char *output_mask,
@@ -648,7 +642,7 @@
*/
void *
util_make_fs_blit_msaa_color(struct pipe_context *pipe,
- unsigned tgsi_tex,
+ enum tgsi_texture_type tgsi_tex,
enum tgsi_return_type stype,
enum tgsi_return_type dtype)
{
@@ -688,7 +682,7 @@
*/
void *
util_make_fs_blit_msaa_depth(struct pipe_context *pipe,
- unsigned tgsi_tex)
+ enum tgsi_texture_type tgsi_tex)
{
return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "FLOAT",
"POSITION", ".z", "", "");
@@ -702,7 +696,7 @@
*/
void *
util_make_fs_blit_msaa_stencil(struct pipe_context *pipe,
- unsigned tgsi_tex)
+ enum tgsi_texture_type tgsi_tex)
{
return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "UINT",
"STENCIL", ".y", "", "");
@@ -718,7 +712,7 @@
*/
void *
util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe,
- unsigned tgsi_tex)
+ enum tgsi_texture_type tgsi_tex)
{
static const char shader_templ[] =
"FRAG\n"
@@ -759,7 +753,7 @@
void *
util_make_fs_msaa_resolve(struct pipe_context *pipe,
- unsigned tgsi_tex, unsigned nr_samples,
+ enum tgsi_texture_type tgsi_tex, unsigned nr_samples,
enum tgsi_return_type stype)
{
struct ureg_program *ureg;
@@ -819,7 +813,8 @@
void *
util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
- unsigned tgsi_tex, unsigned nr_samples,
+ enum tgsi_texture_type tgsi_tex,
+ unsigned nr_samples,
enum tgsi_return_type stype)
{
struct ureg_program *ureg;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_simple_shaders.h mesa-17.3.3/src/gallium/auxiliary/util/u_simple_shaders.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_simple_shaders.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_simple_shaders.h 2018-01-18 21:30:28.000000000 +0000
@@ -56,7 +56,7 @@
uint num_attribs,
const uint *semantic_names,
const uint *semantic_indexes,
- bool window_space,
+ bool window_space, bool layered,
const struct pipe_stream_output_info *so);
extern void *
@@ -129,35 +129,36 @@
extern void *
util_make_fs_blit_msaa_color(struct pipe_context *pipe,
- unsigned tgsi_tex,
+ enum tgsi_texture_type tgsi_tex,
enum tgsi_return_type stype,
enum tgsi_return_type dtype);
extern void *
util_make_fs_blit_msaa_depth(struct pipe_context *pipe,
- unsigned tgsi_tex);
+ enum tgsi_texture_type tgsi_tex);
extern void *
util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe,
- unsigned tgsi_tex);
+ enum tgsi_texture_type tgsi_tex);
void *
util_make_fs_blit_msaa_stencil(struct pipe_context *pipe,
- unsigned tgsi_tex);
+ enum tgsi_texture_type tgsi_tex);
void *
util_make_fs_msaa_resolve(struct pipe_context *pipe,
- unsigned tgsi_tex, unsigned nr_samples,
+ enum tgsi_texture_type tgsi_tex, unsigned nr_samples,
enum tgsi_return_type stype);
void *
util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
- unsigned tgsi_tex, unsigned nr_samples,
+ enum tgsi_texture_type tgsi_tex,
+ unsigned nr_samples,
enum tgsi_return_type stype);
extern void *
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_tests.c mesa-17.3.3/src/gallium/auxiliary/util/u_tests.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_tests.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_tests.c 2018-01-18 21:30:28.000000000 +0000
@@ -418,10 +418,10 @@
{
static const char *text = /* I don't like ureg... */
"FRAG\n"
- "DCL CONST[0]\n"
+ "DCL CONST[0][0]\n"
"DCL OUT[0], COLOR\n"
- "MOV OUT[0], CONST[0]\n"
+ "MOV OUT[0], CONST[0][0]\n"
"END\n";
struct tgsi_token tokens[1000];
struct pipe_shader_state state;
@@ -490,6 +490,106 @@
util_report_result(qresult.u64 == 2);
}
+#if defined(PIPE_OS_LINUX) && defined(HAVE_LIBDRM)
+#include
+#else
+#define sync_merge(str, fd1, fd2) (-1)
+#define sync_wait(fd, timeout) (-1)
+#endif
+
+static void
+test_sync_file_fences(struct pipe_context *ctx)
+{
+ struct pipe_screen *screen = ctx->screen;
+ bool pass = true;
+
+ if (!screen->get_param(screen, PIPE_CAP_NATIVE_FENCE_FD))
+ return;
+
+ struct cso_context *cso = cso_create_context(ctx, 0);
+ struct pipe_resource *buf =
+ pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 1024 * 1024);
+ struct pipe_resource *tex =
+ util_create_texture2d(screen, 4096, 1024, PIPE_FORMAT_R8_UNORM);
+ struct pipe_fence_handle *buf_fence = NULL, *tex_fence = NULL;
+
+ /* Run 2 clears, get fencess. */
+ uint32_t value = 0;
+ ctx->clear_buffer(ctx, buf, 0, buf->width0, &value, sizeof(value));
+ ctx->flush(ctx, &buf_fence, PIPE_FLUSH_FENCE_FD);
+
+ struct pipe_box box;
+ u_box_2d(0, 0, tex->width0, tex->height0, &box);
+ ctx->clear_texture(ctx, tex, 0, &box, &value);
+ ctx->flush(ctx, &tex_fence, PIPE_FLUSH_FENCE_FD);
+ pass = pass && buf_fence && tex_fence;
+
+ /* Export fences. */
+ int buf_fd = screen->fence_get_fd(screen, buf_fence);
+ int tex_fd = screen->fence_get_fd(screen, tex_fence);
+ pass = pass && buf_fd >= 0 && tex_fd >= 0;
+
+ /* Merge fences. */
+ int merged_fd = sync_merge("test", buf_fd, tex_fd);
+ pass = pass && merged_fd >= 0;
+
+ /* (Re)import all fences. */
+ struct pipe_fence_handle *re_buf_fence = NULL, *re_tex_fence = NULL;
+ struct pipe_fence_handle *merged_fence = NULL;
+ ctx->create_fence_fd(ctx, &re_buf_fence, buf_fd);
+ ctx->create_fence_fd(ctx, &re_tex_fence, tex_fd);
+ ctx->create_fence_fd(ctx, &merged_fence, merged_fd);
+ pass = pass && re_buf_fence && re_tex_fence && merged_fence;
+
+ /* Run another clear after waiting for everything. */
+ struct pipe_fence_handle *final_fence = NULL;
+ ctx->fence_server_sync(ctx, merged_fence);
+ value = 0xff;
+ ctx->clear_buffer(ctx, buf, 0, buf->width0, &value, sizeof(value));
+ ctx->flush(ctx, &final_fence, PIPE_FLUSH_FENCE_FD);
+ pass = pass && final_fence;
+
+ /* Wait for the last fence. */
+ int final_fd = screen->fence_get_fd(screen, final_fence);
+ pass = pass && final_fd >= 0;
+ pass = pass && sync_wait(final_fd, -1) == 0;
+
+ /* Check that all fences are signalled. */
+ pass = pass && sync_wait(buf_fd, 0) == 0;
+ pass = pass && sync_wait(tex_fd, 0) == 0;
+ pass = pass && sync_wait(merged_fd, 0) == 0;
+
+ pass = pass && screen->fence_finish(screen, NULL, buf_fence, 0);
+ pass = pass && screen->fence_finish(screen, NULL, tex_fence, 0);
+ pass = pass && screen->fence_finish(screen, NULL, re_buf_fence, 0);
+ pass = pass && screen->fence_finish(screen, NULL, re_tex_fence, 0);
+ pass = pass && screen->fence_finish(screen, NULL, merged_fence, 0);
+ pass = pass && screen->fence_finish(screen, NULL, final_fence, 0);
+
+ /* Cleanup. */
+ if (buf_fd >= 0)
+ close(buf_fd);
+ if (tex_fd >= 0)
+ close(tex_fd);
+ if (merged_fd >= 0)
+ close(merged_fd);
+ if (final_fd >= 0)
+ close(final_fd);
+
+ screen->fence_reference(screen, &buf_fence, NULL);
+ screen->fence_reference(screen, &tex_fence, NULL);
+ screen->fence_reference(screen, &re_buf_fence, NULL);
+ screen->fence_reference(screen, &re_tex_fence, NULL);
+ screen->fence_reference(screen, &merged_fence, NULL);
+ screen->fence_reference(screen, &final_fence, NULL);
+
+ cso_destroy_context(cso);
+ pipe_resource_reference(&buf, NULL);
+ pipe_resource_reference(&tex, NULL);
+
+ util_report_result(pass);
+}
+
/**
* Run all tests. This should be run with a clean context after
* context_create.
@@ -504,6 +604,7 @@
null_sampler_view(ctx, TGSI_TEXTURE_2D);
null_sampler_view(ctx, TGSI_TEXTURE_BUFFER);
util_test_constant_buffer(ctx, NULL);
+ test_sync_file_fences(ctx);
ctx->destroy(ctx);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_threaded_context.c mesa-17.3.3/src/gallium/auxiliary/util/u_threaded_context.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_threaded_context.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_threaded_context.c 2018-01-18 21:30:28.000000000 +0000
@@ -1297,10 +1297,10 @@
/* Handle CPU reads trivially. */
if (usage & PIPE_TRANSFER_READ) {
- /* Driver aren't allowed to do buffer invalidations. */
+ /* Drivers aren't allowed to do buffer invalidations. */
return (usage & ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) |
TC_TRANSFER_MAP_NO_INVALIDATE |
- TC_TRANSFER_MAP_IGNORE_VALID_RANGE;
+ TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
}
/* See if the buffer range being mapped has never been initialized,
@@ -1337,13 +1337,15 @@
usage &= ~PIPE_TRANSFER_DISCARD_RANGE;
/* Unsychronized buffer mappings don't have to synchronize the thread. */
- if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+ usage &= ~PIPE_TRANSFER_DISCARD_RANGE;
usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
+ }
/* Never invalidate inside the driver and never infer "unsynchronized". */
return usage |
TC_TRANSFER_MAP_NO_INVALIDATE |
- TC_TRANSFER_MAP_IGNORE_VALID_RANGE;
+ TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
}
static void *
@@ -1978,9 +1980,11 @@
tc_call_generate_mipmap(struct pipe_context *pipe, union tc_payload *payload)
{
struct tc_generate_mipmap *p = (struct tc_generate_mipmap *)payload;
- bool result = pipe->generate_mipmap(pipe, p->res, p->format, p->base_level,
- p->last_level, p->first_layer,
- p->last_layer);
+ bool MAYBE_UNUSED result = pipe->generate_mipmap(pipe, p->res, p->format,
+ p->base_level,
+ p->last_level,
+ p->first_layer,
+ p->last_layer);
assert(result);
pipe_resource_reference(&p->res, NULL);
}
@@ -2229,6 +2233,13 @@
struct threaded_context *tc = threaded_context(_pipe);
struct pipe_context *pipe = tc->pipe;
+ if (tc->base.const_uploader &&
+ tc->base.stream_uploader != tc->base.const_uploader)
+ u_upload_destroy(tc->base.const_uploader);
+
+ if (tc->base.stream_uploader)
+ u_upload_destroy(tc->base.stream_uploader);
+
tc_sync(tc);
if (util_queue_is_initialized(&tc->queue)) {
@@ -2238,14 +2249,8 @@
util_queue_fence_destroy(&tc->batch_slots[i].fence);
}
- if (tc->base.const_uploader &&
- tc->base.stream_uploader != tc->base.const_uploader)
- u_upload_destroy(tc->base.const_uploader);
-
- if (tc->base.stream_uploader)
- u_upload_destroy(tc->base.stream_uploader);
-
slab_destroy_child(&tc->pool_transfers);
+ assert(tc->batch_slots[tc->next].num_total_call_slots == 0);
pipe->destroy(pipe);
os_free_aligned(tc);
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_threaded_context.h mesa-17.3.3/src/gallium/auxiliary/util/u_threaded_context.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_threaded_context.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_threaded_context.h 2018-01-18 21:30:28.000000000 +0000
@@ -94,7 +94,7 @@
*
* 2) The driver isn't allowed to infer unsychronized mappings by tracking
* the valid buffer range. The threaded context always sends TC_TRANSFER_-
- * MAP_IGNORE_VALID_RANGE to indicate this. Ignoring the flag will lead
+ * MAP_NO_INFER_UNSYNCHRONIZED to indicate this. Ignoring the flag will lead
* to failures.
* The threaded context does its own detection of unsynchronized mappings.
*
@@ -166,7 +166,7 @@
/* These are transfer flags sent to drivers. */
/* Never infer whether it's safe to use unsychronized mappings: */
-#define TC_TRANSFER_MAP_IGNORE_VALID_RANGE (1u << 29)
+#define TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED (1u << 29)
/* Don't invalidate buffers: */
#define TC_TRANSFER_MAP_NO_INVALIDATE (1u << 30)
/* transfer_map is called from a non-driver thread: */
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_vbuf.c mesa-17.3.3/src/gallium/auxiliary/util/u_vbuf.c
--- mesa-17.2.4/src/gallium/auxiliary/util/u_vbuf.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_vbuf.c 2018-01-18 21:30:28.000000000 +0000
@@ -513,9 +513,9 @@
mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
/* Move the buffer reference. */
- pipe_resource_reference(
- &mgr->real_vertex_buffer[out_vb].buffer.resource, NULL);
+ pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]);
mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer;
+ mgr->real_vertex_buffer[out_vb].is_user_buffer = false;
return PIPE_OK;
}
@@ -833,8 +833,7 @@
unsigned dst_index = start_slot + i;
pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
- pipe_resource_reference(&mgr->real_vertex_buffer[dst_index].buffer.resource,
- NULL);
+ pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
}
pipe->set_vertex_buffers(pipe, start_slot, count, NULL);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/util/u_video.h mesa-17.3.3/src/gallium/auxiliary/util/u_video.h
--- mesa-17.2.4/src/gallium/auxiliary/util/u_video.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/util/u_video.h 2018-01-18 21:30:28.000000000 +0000
@@ -76,6 +76,9 @@
case PIPE_VIDEO_PROFILE_HEVC_MAIN_444:
return PIPE_VIDEO_FORMAT_HEVC;
+ case PIPE_VIDEO_PROFILE_JPEG_BASELINE:
+ return PIPE_VIDEO_FORMAT_JPEG;
+
default:
return PIPE_VIDEO_FORMAT_UNKNOWN;
}
diff -Nru mesa-17.2.4/src/gallium/auxiliary/vl/vl_compositor.c mesa-17.3.3/src/gallium/auxiliary/vl/vl_compositor.c
--- mesa-17.2.4/src/gallium/auxiliary/vl/vl_compositor.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/vl/vl_compositor.c 2018-01-18 21:30:28.000000000 +0000
@@ -34,6 +34,7 @@
#include "util/u_draw.h"
#include "util/u_surface.h"
#include "util/u_upload_mgr.h"
+#include "util/u_sampler.h"
#include "tgsi/tgsi_ureg.h"
@@ -239,20 +240,13 @@
ureg_release_temporary(shader, temp[i]);
}
-static void *
-create_frag_shader_video_buffer(struct vl_compositor *c)
+static void
+create_frag_shader_yuv(struct ureg_program *shader, struct ureg_dst texel)
{
- struct ureg_program *shader;
struct ureg_src tc;
struct ureg_src sampler[3];
- struct ureg_dst texel;
- struct ureg_dst fragment;
unsigned i;
- shader = ureg_create(PIPE_SHADER_FRAGMENT);
- if (!shader)
- return false;
-
tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
for (i = 0; i < 3; ++i) {
sampler[i] = ureg_DECL_sampler(shader, i);
@@ -262,17 +256,29 @@
TGSI_RETURN_TYPE_FLOAT,
TGSI_RETURN_TYPE_FLOAT);
}
-
- texel = ureg_DECL_temporary(shader);
- fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
/*
* texel.xyz = tex(tc, sampler[i])
- * fragment = csc * texel
*/
for (i = 0; i < 3; ++i)
ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D_ARRAY, tc, sampler[i]);
+}
+
+static void *
+create_frag_shader_video_buffer(struct vl_compositor *c)
+{
+ struct ureg_program *shader;
+ struct ureg_dst texel;
+ struct ureg_dst fragment;
+
+ shader = ureg_create(PIPE_SHADER_FRAGMENT);
+ if (!shader)
+ return false;
+
+ texel = ureg_DECL_temporary(shader);
+ fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+ create_frag_shader_yuv(shader, texel);
create_frag_shader_csc(shader, texel, fragment);
ureg_release_temporary(shader, texel);
@@ -305,7 +311,7 @@
}
static void *
-create_frag_shader_weave_yuv(struct vl_compositor *c, bool y)
+create_frag_shader_deint_yuv(struct vl_compositor *c, bool y, bool w)
{
struct ureg_program *shader;
struct ureg_dst texel, fragment;
@@ -317,7 +323,10 @@
texel = ureg_DECL_temporary(shader);
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
- create_frag_shader_weave(shader, texel);
+ if (w)
+ create_frag_shader_weave(shader, texel);
+ else
+ create_frag_shader_yuv(shader, texel);
if (y)
ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), ureg_src(texel));
@@ -424,6 +433,43 @@
return ureg_create_shader_and_destroy(shader, c->pipe);
}
+static void *
+create_frag_shader_rgb_yuv(struct vl_compositor *c, bool y)
+{
+ struct ureg_program *shader;
+ struct ureg_src tc, sampler;
+ struct ureg_dst texel, fragment;
+
+ struct ureg_src csc[3];
+ unsigned i;
+
+ shader = ureg_create(PIPE_SHADER_FRAGMENT);
+ if (!shader)
+ return false;
+
+ for (i = 0; i < 3; ++i)
+ csc[i] = ureg_DECL_constant(shader, i);
+
+ sampler = ureg_DECL_sampler(shader, 0);
+ tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
+ texel = ureg_DECL_temporary(shader);
+ fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+ ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+
+ if (y) {
+ ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), csc[0], ureg_src(texel));
+ } else {
+ for (i = 0; i < 2; ++i)
+ ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i + 1], ureg_src(texel));
+ }
+
+ ureg_release_temporary(shader, texel);
+ ureg_END(shader);
+
+ return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
static bool
init_shaders(struct vl_compositor *c)
{
@@ -447,10 +493,13 @@
return false;
}
- c->fs_weave_yuv.y = create_frag_shader_weave_yuv(c, true);
- c->fs_weave_yuv.uv = create_frag_shader_weave_yuv(c, false);
- if (!c->fs_weave_yuv.y || !c->fs_weave_yuv.uv) {
- debug_printf("Unable to create YCbCr i-to-YCbCr p weave fragment shader.\n");
+ c->fs_yuv.weave.y = create_frag_shader_deint_yuv(c, true, true);
+ c->fs_yuv.weave.uv = create_frag_shader_deint_yuv(c, false, true);
+ c->fs_yuv.bob.y = create_frag_shader_deint_yuv(c, true, false);
+ c->fs_yuv.bob.uv = create_frag_shader_deint_yuv(c, false, false);
+ if (!c->fs_yuv.weave.y || !c->fs_yuv.weave.uv ||
+ !c->fs_yuv.bob.y || !c->fs_yuv.bob.uv) {
+ debug_printf("Unable to create YCbCr i-to-YCbCr p deint fragment shader.\n");
return false;
}
@@ -472,6 +521,13 @@
return false;
}
+ c->fs_rgb_yuv.y = create_frag_shader_rgb_yuv(c, true);
+ c->fs_rgb_yuv.uv = create_frag_shader_rgb_yuv(c, false);
+ if (!c->fs_rgb_yuv.y || !c->fs_rgb_yuv.uv) {
+ debug_printf("Unable to create RGB-to-YUV fragment shader.\n");
+ return false;
+ }
+
return true;
}
@@ -482,11 +538,15 @@
c->pipe->delete_vs_state(c->pipe, c->vs);
c->pipe->delete_fs_state(c->pipe, c->fs_video_buffer);
c->pipe->delete_fs_state(c->pipe, c->fs_weave_rgb);
- c->pipe->delete_fs_state(c->pipe, c->fs_weave_yuv.y);
- c->pipe->delete_fs_state(c->pipe, c->fs_weave_yuv.uv);
+ c->pipe->delete_fs_state(c->pipe, c->fs_yuv.weave.y);
+ c->pipe->delete_fs_state(c->pipe, c->fs_yuv.weave.uv);
+ c->pipe->delete_fs_state(c->pipe, c->fs_yuv.bob.y);
+ c->pipe->delete_fs_state(c->pipe, c->fs_yuv.bob.uv);
c->pipe->delete_fs_state(c->pipe, c->fs_palette.yuv);
c->pipe->delete_fs_state(c->pipe, c->fs_palette.rgb);
c->pipe->delete_fs_state(c->pipe, c->fs_rgba);
+ c->pipe->delete_fs_state(c->pipe, c->fs_rgb_yuv.y);
+ c->pipe->delete_fs_state(c->pipe, c->fs_rgb_yuv.uv);
}
static bool
@@ -885,6 +945,85 @@
}
}
+static void
+set_yuv_layer(struct vl_compositor_state *s, struct vl_compositor *c,
+ unsigned layer, struct pipe_video_buffer *buffer,
+ struct u_rect *src_rect, struct u_rect *dst_rect,
+ bool y, enum vl_compositor_deinterlace deinterlace)
+{
+ struct pipe_sampler_view **sampler_views;
+ float half_a_line;
+ unsigned i;
+
+ assert(s && c && buffer);
+
+ assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+ s->used_layers |= 1 << layer;
+ sampler_views = buffer->get_sampler_view_components(buffer);
+ for (i = 0; i < 3; ++i) {
+ s->layers[layer].samplers[i] = c->sampler_linear;
+ pipe_sampler_view_reference(&s->layers[layer].sampler_views[i], sampler_views[i]);
+ }
+
+ calc_src_and_dst(&s->layers[layer], buffer->width, buffer->height,
+ src_rect ? *src_rect : default_rect(&s->layers[layer]),
+ dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
+
+ half_a_line = 0.5f / s->layers[layer].zw.y;
+
+ switch(deinterlace) {
+ case VL_COMPOSITOR_BOB_TOP:
+ s->layers[layer].zw.x = 0.0f;
+ s->layers[layer].src.tl.y += half_a_line;
+ s->layers[layer].src.br.y += half_a_line;
+ s->layers[layer].fs = (y) ? c->fs_yuv.bob.y : c->fs_yuv.bob.uv;
+ break;
+
+ case VL_COMPOSITOR_BOB_BOTTOM:
+ s->layers[layer].zw.x = 1.0f;
+ s->layers[layer].src.tl.y -= half_a_line;
+ s->layers[layer].src.br.y -= half_a_line;
+ s->layers[layer].fs = (y) ? c->fs_yuv.bob.y : c->fs_yuv.bob.uv;
+ break;
+
+ default:
+ s->layers[layer].fs = (y) ? c->fs_yuv.weave.y : c->fs_yuv.weave.uv;
+ break;
+ }
+}
+
+static void
+set_rgb_to_yuv_layer(struct vl_compositor_state *s, struct vl_compositor *c,
+ unsigned layer, struct pipe_sampler_view *v,
+ struct u_rect *src_rect, struct u_rect *dst_rect, bool y)
+{
+ vl_csc_matrix csc_matrix;
+
+ assert(s && c && v);
+
+ assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+ s->used_layers |= 1 << layer;
+
+ s->layers[layer].fs = y? c->fs_rgb_yuv.y : c->fs_rgb_yuv.uv;
+
+ vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_BT_709_REV, NULL, false, &csc_matrix);
+ vl_compositor_set_csc_matrix(s, (const vl_csc_matrix *)&csc_matrix, 1.0f, 0.0f);
+
+ s->layers[layer].samplers[0] = c->sampler_linear;
+ s->layers[layer].samplers[1] = NULL;
+ s->layers[layer].samplers[2] = NULL;
+
+ pipe_sampler_view_reference(&s->layers[layer].sampler_views[0], v);
+ pipe_sampler_view_reference(&s->layers[layer].sampler_views[1], NULL);
+ pipe_sampler_view_reference(&s->layers[layer].sampler_views[2], NULL);
+
+ calc_src_and_dst(&s->layers[layer], v->texture->width0, v->texture->height0,
+ src_rect ? *src_rect : default_rect(&s->layers[layer]),
+ dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
+}
+
void
vl_compositor_reset_dirty_area(struct u_rect *dirty)
{
@@ -1143,33 +1282,70 @@
}
void
-vl_compositor_set_yuv_layer(struct vl_compositor_state *s,
- struct vl_compositor *c,
- unsigned layer,
- struct pipe_video_buffer *buffer,
- struct u_rect *src_rect,
- struct u_rect *dst_rect,
- bool y)
+vl_compositor_yuv_deint_full(struct vl_compositor_state *s,
+ struct vl_compositor *c,
+ struct pipe_video_buffer *src,
+ struct pipe_video_buffer *dst,
+ struct u_rect *src_rect,
+ struct u_rect *dst_rect,
+ enum vl_compositor_deinterlace deinterlace)
{
- struct pipe_sampler_view **sampler_views;
- unsigned i;
+ struct pipe_surface **dst_surfaces;
- assert(s && c && buffer);
+ dst_surfaces = dst->get_surfaces(dst);
+ vl_compositor_clear_layers(s);
- assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+ set_yuv_layer(s, c, 0, src, src_rect, NULL, true, deinterlace);
+ vl_compositor_set_layer_dst_area(s, 0, dst_rect);
+ vl_compositor_render(s, c, dst_surfaces[0], NULL, false);
- s->used_layers |= 1 << layer;
- sampler_views = buffer->get_sampler_view_components(buffer);
- for (i = 0; i < 3; ++i) {
- s->layers[layer].samplers[i] = c->sampler_linear;
- pipe_sampler_view_reference(&s->layers[layer].sampler_views[i], sampler_views[i]);
+ if (dst_rect) {
+ dst_rect->x1 /= 2;
+ dst_rect->y1 /= 2;
}
- calc_src_and_dst(&s->layers[layer], buffer->width, buffer->height,
- src_rect ? *src_rect : default_rect(&s->layers[layer]),
- dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
+ set_yuv_layer(s, c, 0, src, src_rect, NULL, false, deinterlace);
+ vl_compositor_set_layer_dst_area(s, 0, dst_rect);
+ vl_compositor_render(s, c, dst_surfaces[1], NULL, false);
+
+ s->pipe->flush(s->pipe, NULL, 0);
+}
+
+void
+vl_compositor_convert_rgb_to_yuv(struct vl_compositor_state *s,
+ struct vl_compositor *c,
+ unsigned layer,
+ struct pipe_resource *src_res,
+ struct pipe_video_buffer *dst,
+ struct u_rect *src_rect,
+ struct u_rect *dst_rect)
+{
+ struct pipe_sampler_view *sv, sv_templ;
+ struct pipe_surface **dst_surfaces;
+
+ dst_surfaces = dst->get_surfaces(dst);
+
+ memset(&sv_templ, 0, sizeof(sv_templ));
+ u_sampler_view_default_template(&sv_templ, src_res, src_res->format);
+ sv = s->pipe->create_sampler_view(s->pipe, src_res, &sv_templ);
+
+ vl_compositor_clear_layers(s);
+
+ set_rgb_to_yuv_layer(s, c, 0, sv, src_rect, NULL, true);
+ vl_compositor_set_layer_dst_area(s, 0, dst_rect);
+ vl_compositor_render(s, c, dst_surfaces[0], NULL, false);
+
+ if (dst_rect) {
+ dst_rect->x1 /= 2;
+ dst_rect->y1 /= 2;
+ }
+
+ set_rgb_to_yuv_layer(s, c, 0, sv, src_rect, NULL, false);
+ vl_compositor_set_layer_dst_area(s, 0, dst_rect);
+ vl_compositor_render(s, c, dst_surfaces[1], NULL, false);
+ pipe_sampler_view_reference(&sv, NULL);
- s->layers[layer].fs = (y) ? c->fs_weave_yuv.y : c->fs_weave_yuv.uv;
+ s->pipe->flush(s->pipe, NULL, 0);
}
void
diff -Nru mesa-17.2.4/src/gallium/auxiliary/vl/vl_compositor.h mesa-17.3.3/src/gallium/auxiliary/vl/vl_compositor.h
--- mesa-17.2.4/src/gallium/auxiliary/vl/vl_compositor.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/vl/vl_compositor.h 2018-01-18 21:30:28.000000000 +0000
@@ -116,14 +116,25 @@
void *fs_rgba;
struct {
- void *y;
- void *uv;
- } fs_weave_yuv;
+ struct {
+ void *y;
+ void *uv;
+ } weave;
+ struct {
+ void *y;
+ void *uv;
+ } bob;
+ } fs_yuv;
struct {
void *rgb;
void *yuv;
} fs_palette;
+
+ struct {
+ void *y;
+ void *uv;
+ } fs_rgb_yuv;
};
/**
@@ -241,16 +252,28 @@
enum vl_compositor_rotation rotate);
/**
- * set a layer of y or uv to render
+ * deinterlace yuv buffer with full abilities
*/
void
-vl_compositor_set_yuv_layer(struct vl_compositor_state *s,
- struct vl_compositor *c,
- unsigned layer,
- struct pipe_video_buffer *buffer,
- struct u_rect *src_rect,
- struct u_rect *dst_rect,
- bool y);
+vl_compositor_yuv_deint_full(struct vl_compositor_state *state,
+ struct vl_compositor *compositor,
+ struct pipe_video_buffer *src,
+ struct pipe_video_buffer *dst,
+ struct u_rect *src_rect,
+ struct u_rect *dst_rect,
+ enum vl_compositor_deinterlace deinterlace);
+
+/**
++ * convert rgb to yuv
++ */
+void
+vl_compositor_convert_rgb_to_yuv(struct vl_compositor_state *state,
+ struct vl_compositor *compositor,
+ unsigned layer,
+ struct pipe_resource *src_res,
+ struct pipe_video_buffer *dst,
+ struct u_rect *src_rect,
+ struct u_rect *dst_rect);
/*@}*/
diff -Nru mesa-17.2.4/src/gallium/auxiliary/vl/vl_csc.c mesa-17.3.3/src/gallium/auxiliary/vl/vl_csc.c
--- mesa-17.2.4/src/gallium/auxiliary/vl/vl_csc.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/vl/vl_csc.c 2018-01-18 21:30:28.000000000 +0000
@@ -131,6 +131,12 @@
{ 1.0f, 1.785f, 0.0f, 0.0f, }
};
+static const vl_csc_matrix bt_709_rev = {
+ { 0.183f, 0.614f, 0.062f, 0.0625f},
+ {-0.101f, -0.338f, 0.439f, 0.5f },
+ { 0.439f, -0.399f, -0.040f, 0.5f }
+};
+
static const vl_csc_matrix identity =
{
{ 1.0f, 0.0f, 0.0f, 0.0f, },
@@ -184,6 +190,9 @@
case VL_CSC_COLOR_STANDARD_SMPTE_240M:
cstd = &smpte240m;
break;
+ case VL_CSC_COLOR_STANDARD_BT_709_REV:
+ memcpy(matrix, bt_709_rev, sizeof(vl_csc_matrix));
+ return;
case VL_CSC_COLOR_STANDARD_IDENTITY:
default:
assert(cs == VL_CSC_COLOR_STANDARD_IDENTITY);
diff -Nru mesa-17.2.4/src/gallium/auxiliary/vl/vl_csc.h mesa-17.3.3/src/gallium/auxiliary/vl/vl_csc.h
--- mesa-17.2.4/src/gallium/auxiliary/vl/vl_csc.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/vl/vl_csc.h 2018-01-18 21:30:28.000000000 +0000
@@ -45,7 +45,8 @@
VL_CSC_COLOR_STANDARD_IDENTITY,
VL_CSC_COLOR_STANDARD_BT_601,
VL_CSC_COLOR_STANDARD_BT_709,
- VL_CSC_COLOR_STANDARD_SMPTE_240M
+ VL_CSC_COLOR_STANDARD_SMPTE_240M,
+ VL_CSC_COLOR_STANDARD_BT_709_REV
};
extern const struct vl_procamp vl_default_procamp;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/vl/vl_winsys_dri3.c mesa-17.3.3/src/gallium/auxiliary/vl/vl_winsys_dri3.c
--- mesa-17.2.4/src/gallium/auxiliary/vl/vl_winsys_dri3.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/vl/vl_winsys_dri3.c 2018-01-18 21:30:28.000000000 +0000
@@ -817,7 +817,7 @@
free(geom_reply);
if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
- scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, 0);
+ scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev);
if (!scrn->base.pscreen)
goto release_pipe;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/vl/vl_winsys_dri.c mesa-17.3.3/src/gallium/auxiliary/vl/vl_winsys_dri.c
--- mesa-17.2.4/src/gallium/auxiliary/vl/vl_winsys_dri.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/vl/vl_winsys_dri.c 2018-01-18 21:30:28.000000000 +0000
@@ -181,7 +181,7 @@
struct vl_dri_screen *scrn = (struct vl_dri_screen *)vscreen;
struct winsys_handle dri2_handle;
- struct pipe_resource template, *tex;
+ struct pipe_resource templ, *tex;
xcb_dri2_get_buffers_reply_t *reply;
xcb_dri2_dri2_buffer_t *buffers, *back_left;
@@ -235,19 +235,19 @@
dri2_handle.handle = back_left->name;
dri2_handle.stride = back_left->pitch;
- memset(&template, 0, sizeof(template));
- template.target = PIPE_TEXTURE_2D;
- template.format = PIPE_FORMAT_B8G8R8X8_UNORM;
- template.last_level = 0;
- template.width0 = reply->width;
- template.height0 = reply->height;
- template.depth0 = 1;
- template.array_size = 1;
- template.usage = PIPE_USAGE_DEFAULT;
- template.bind = PIPE_BIND_RENDER_TARGET;
- template.flags = 0;
+ memset(&templ, 0, sizeof(templ));
+ templ.target = PIPE_TEXTURE_2D;
+ templ.format = PIPE_FORMAT_B8G8R8X8_UNORM;
+ templ.last_level = 0;
+ templ.width0 = reply->width;
+ templ.height0 = reply->height;
+ templ.depth0 = 1;
+ templ.array_size = 1;
+ templ.usage = PIPE_USAGE_DEFAULT;
+ templ.bind = PIPE_BIND_RENDER_TARGET;
+ templ.flags = 0;
- tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &template,
+ tex = scrn->base.pscreen->resource_from_handle(scrn->base.pscreen, &templ,
&dri2_handle,
PIPE_HANDLE_USAGE_READ_WRITE);
free(reply);
@@ -406,7 +406,7 @@
goto free_authenticate;
if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
- scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, 0);
+ scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev);
if (!scrn->base.pscreen)
goto release_pipe;
diff -Nru mesa-17.2.4/src/gallium/auxiliary/vl/vl_winsys_drm.c mesa-17.3.3/src/gallium/auxiliary/vl/vl_winsys_drm.c
--- mesa-17.2.4/src/gallium/auxiliary/vl/vl_winsys_drm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/auxiliary/vl/vl_winsys_drm.c 2018-01-18 21:30:28.000000000 +0000
@@ -52,7 +52,7 @@
goto free_screen;
if (pipe_loader_drm_probe_fd(&vscreen->dev, new_fd))
- vscreen->pscreen = pipe_loader_create_screen(vscreen->dev, 0);
+ vscreen->pscreen = pipe_loader_create_screen(vscreen->dev);
if (!vscreen->pscreen)
goto release_pipe;
diff -Nru mesa-17.2.4/src/gallium/docs/llvm-todo.txt mesa-17.3.3/src/gallium/docs/llvm-todo.txt
--- mesa-17.2.4/src/gallium/docs/llvm-todo.txt 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/docs/llvm-todo.txt 1970-01-01 00:00:00.000000000 +0000
@@ -1,14 +0,0 @@
-TODO covering gallivm/llvmpipe
-==============================
-
-Goal: GL3.0 support in llvmpipe
--------------------------------
-
-EXT_packed_float support.
-ARB_depth_buffer_float support.
-EXT_framebuffer_sRGB support.
-MSAA support.
-
-Goal: extension parity with softpipe:
--------------------------------------
-
diff -Nru mesa-17.2.4/src/gallium/docs/source/context.rst mesa-17.3.3/src/gallium/docs/source/context.rst
--- mesa-17.2.4/src/gallium/docs/source/context.rst 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/docs/source/context.rst 2018-01-18 21:30:28.000000000 +0000
@@ -118,7 +118,7 @@
is being cast to another format. Casting can be done only between compatible
formats, that is formats that have matching component order and sizes.
-Swizzle fields specify they way in which fetched texel components are placed
+Swizzle fields specify the way in which fetched texel components are placed
in the result register. For example, ``swizzle_r`` specifies what is going to be
placed in first component of result register.
@@ -394,6 +394,12 @@
for all other cases.
This query can be used with ``render_condition``.
+In cases where a conservative approximation of an occlusion query is enough,
+``PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE`` should be used. It behaves
+like ``PIPE_QUERY_OCCLUSION_PREDICATE``, except that it may return TRUE in
+additional, implementation-dependent cases.
+This query can be used with ``render_condition``.
+
``PIPE_QUERY_TIME_ELAPSED`` returns the amount of time, in nanoseconds,
the context takes to perform operations.
The result is an unsigned 64-bit integer.
@@ -428,9 +434,17 @@
unclear if it should be increased if stream output is not active.
``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` returns a boolean value indicating
-whether the stream output targets have overflowed as a result of the
+whether a selected stream output target has overflowed as a result of the
commands issued between ``begin_query`` and ``end_query``.
-This query can be used with ``render_condition``.
+This query can be used with ``render_condition``. The output stream is
+selected by the stream number passed to ``create_query``.
+
+``PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE`` returns a boolean value indicating
+whether any stream output target has overflowed as a result of the commands
+issued between ``begin_query`` and ``end_query``. This query can be used
+with ``render_condition``, and its result is the logical OR of multiple
+``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` queries, one for each stream output
+target.
``PIPE_QUERY_GPU_FINISHED`` returns a boolean value indicating whether
all commands issued before ``end_query`` have completed. However, this
diff -Nru mesa-17.2.4/src/gallium/docs/source/screen.rst mesa-17.3.3/src/gallium/docs/source/screen.rst
--- mesa-17.2.4/src/gallium/docs/source/screen.rst 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/docs/source/screen.rst 2018-01-18 21:30:28.000000000 +0000
@@ -396,6 +396,21 @@
``TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE`` is supported.
* ``PIPE_CAP_BINDLESS_TEXTURE``: Whether bindless texture operations are
supported.
+* ``PIPE_CAP_NIR_SAMPLERS_AS_DEREF``: Whether NIR tex instructions should
+ reference texture and sampler as NIR derefs instead of by indices.
+* ``PIPE_CAP_QUERY_SO_OVERFLOW``: Whether the
+ ``PIPE_QUERY_SO_OVERFLOW_PREDICATE`` and
+ ``PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE`` query types are supported. Note that
+ for a driver that does not support multiple output streams (i.e.,
+ ``PIPE_CAP_MAX_VERTEX_STREAMS`` is 1), both query types are identical.
+* ``PIPE_CAP_MEMOBJ``: Whether operations on memory objects are supported.
+* ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports TGSI_OPCODE_LOAD use
+ with constant buffers.
+* ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be used as
+ an address for indirect register indexing.
+* ``PIPE_CAP_TILE_RASTER_ORDER``: Whether the driver supports
+ GL_MESA_tile_raster_order, using the tile_raster_order_* fields in
+ pipe_rasterizer_state.
.. _pipe_capf:
@@ -438,21 +453,16 @@
* ``PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE``: The maximum size per constant buffer in bytes.
* ``PIPE_SHADER_CAP_MAX_CONST_BUFFERS``: Maximum number of constant buffers that can be bound
to any shader stage using ``set_constant_buffer``. If 0 or 1, the pipe will
- only permit binding one constant buffer per shader, and the shaders will
- not permit two-dimensional access to constants.
+ only permit binding one constant buffer per shader.
If a value greater than 0 is returned, the driver can have multiple
-constant buffers bound to shader stages. The CONST register file can
-be accessed with two-dimensional indices, like in the example below.
+constant buffers bound to shader stages. The CONST register file is
+accessed with two-dimensional indices, like in the example below.
DCL CONST[0][0..7] # declare first 8 vectors of constbuf 0
DCL CONST[3][0] # declare first vector of constbuf 3
MOV OUT[0], CONST[0][3] # copy vector 3 of constbuf 0
-For backwards compatibility, one-dimensional access to CONST register
-file is still supported. In that case, the constbuf index is assumed
-to be 0.
-
* ``PIPE_SHADER_CAP_MAX_TEMPS``: The maximum number of temporary registers.
* ``PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED``: Whether the continue opcode is supported.
* ``PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR``: Whether indirect addressing
@@ -467,6 +477,9 @@
BGNSUB, ENDSUB, CAL, and RET, including RET in the main block.
* ``PIPE_SHADER_CAP_INTEGERS``: Whether integer opcodes are supported.
If unsupported, only float opcodes are supported.
+* ``PIPE_SHADER_CAP_INT64_ATOMICS``: Whether int64 atomic opcodes are supported. The device needs to support add, sub, swap, cmpswap, and, or, xor, min, and max.
+* ``PIPE_SHADER_CAP_FP16``: Whether half precision floating-point opcodes are supported.
+ If unsupported, half precision ops need to be lowered to full precision.
* ``PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS``: The maximum number of texture
samplers.
* ``PIPE_SHADER_CAP_PREFERRED_IR``: Preferred representation of the
@@ -477,6 +490,7 @@
is supported. If it is, DTRUNC/DCEIL/DFLR/DROUND opcodes may be used.
* ``PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED``: Whether DFRACEXP and
DLDEXP are supported.
+* ``PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED``: Whether LDEXP is supported.
* ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only)
are supported.
* ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
@@ -662,7 +676,7 @@
Get a floating-point screen parameter.
-**param** is one of the :ref:`PIPE_CAP` names.
+**param** is one of the :ref:`PIPE_CAPF` names.
context_create
^^^^^^^^^^^^^^
@@ -731,9 +745,9 @@
**nr_samples** the nr of msaa samples. 0 (or 1) specifies a resource
which isn't multisampled.
-**usage** one of the PIPE_USAGE flags.
+**usage** one of the :ref:`PIPE_USAGE` flags.
-**bind** bitmask of the PIPE_BIND flags.
+**bind** bitmask of the :ref:`PIPE_BIND` flags.
**flags** bitmask of PIPE_RESOURCE_FLAG flags.
diff -Nru mesa-17.2.4/src/gallium/docs/source/tgsi.rst mesa-17.3.3/src/gallium/docs/source/tgsi.rst
--- mesa-17.2.4/src/gallium/docs/source/tgsi.rst 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/docs/source/tgsi.rst 2018-01-18 21:30:28.000000000 +0000
@@ -285,19 +285,6 @@
dst.w = src0.w \times src1.w + src2.w
-.. opcode:: DP2A - 2-component Dot Product And Add
-
-.. math::
-
- dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
-
- dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
-
- dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
-
- dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
-
-
.. opcode:: FRC - Fraction
.. math::
@@ -363,26 +350,17 @@
dst = src0.x^{src1.x}
-.. opcode:: XPD - Cross Product
-.. math::
-
- dst.x = src0.y \times src1.z - src1.y \times src0.z
-
- dst.y = src0.z \times src1.x - src1.z \times src0.x
-
- dst.z = src0.x \times src1.y - src1.x \times src0.y
+.. opcode:: LDEXP - Multiply Number by Integral Power of 2
- dst.w = 1
-
-
-.. opcode:: DPH - Homogeneous Dot Product
-
-This instruction replicates its result.
+src1 is an integer.
.. math::
- dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
+ dst.x = src0.x * 2^{src1.x}
+ dst.y = src0.y * 2^{src1.y}
+ dst.z = src0.z * 2^{src1.z}
+ dst.w = src0.w * 2^{src1.w}
.. opcode:: COS - Cosine
@@ -439,17 +417,35 @@
.. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
- TBD
+This instruction replicates its result.
+
+.. math::
+
+ dst = f32\_to\_unorm16(src.x) | f32\_to\_unorm16(src.y) << 16
.. opcode:: PK4B - Pack Four Signed 8-bit Scalars
- TBD
+This instruction replicates its result.
+
+.. math::
+
+ dst = f32\_to\_snorm8(src.x) |
+ (f32\_to\_snorm8(src.y) << 8) |
+ (f32\_to\_snorm8(src.z) << 16) |
+ (f32\_to\_snorm8(src.w) << 24)
.. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
- TBD
+This instruction replicates its result.
+
+.. math::
+
+ dst = f32\_to\_unorm8(src.x) |
+ (f32\_to\_unorm8(src.y) << 8) |
+ (f32\_to\_unorm8(src.z) << 16) |
+ (f32\_to\_unorm8(src.w) << 24)
.. opcode:: SEQ - Set On Equal
@@ -685,19 +681,6 @@
Unconditional discard. Allowed in fragment shaders only.
-.. opcode:: SCS - Sine Cosine
-
-.. math::
-
- dst.x = \cos{src.x}
-
- dst.y = \sin{src.x}
-
- dst.z = 0
-
- dst.w = 1
-
-
.. opcode:: TXB - Texture Lookup With Bias
for cube map array textures and shadow cube maps, the bias value
@@ -835,50 +818,6 @@
dst = texture\_sample(unit, coord, lod)
-.. opcode:: PUSHA - Push Address Register On Stack
-
- push(src.x)
- push(src.y)
- push(src.z)
- push(src.w)
-
-.. note::
-
- Considered for cleanup.
-
-.. note::
-
- Considered for removal.
-
-.. opcode:: POPA - Pop Address Register From Stack
-
- dst.w = pop()
- dst.z = pop()
- dst.y = pop()
- dst.x = pop()
-
-.. note::
-
- Considered for cleanup.
-
-.. note::
-
- Considered for removal.
-
-
-.. opcode:: CALLNZ - Subroutine Call If Not Zero
-
- TBD
-
-.. note::
-
- Considered for cleanup.
-
-.. note::
-
- Considered for removal.
-
-
Compute ISA
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -932,26 +871,15 @@
destination register, which is assumed to be an address (ADDR) register.
-.. opcode:: SAD - Sum Of Absolute Differences
-
-.. math::
-
- dst.x = |src0.x - src1.x| + src2.x
-
- dst.y = |src0.y - src1.y| + src2.y
-
- dst.z = |src0.z - src1.z| + src2.z
-
- dst.w = |src0.w - src1.w| + src2.w
-
-
.. opcode:: TXF - Texel Fetch
As per NV_gpu_shader4, extract a single texel from a specified texture
image or PIPE_BUFFER resource. The source sampler may not be a CUBE or
SHADOW. src 0 is a
four-component signed integer vector used to identify the single texel
- accessed. 3 components + level. Just like texture instructions, an optional
+ accessed. 3 components + level. If the texture is multisampled, then
+ the fourth component indicates the sample, not the mipmap level.
+ Just like texture instructions, an optional
offset vector is provided, which is subject to various driver restrictions
(regarding range, source of offsets). This instruction ignores the sampler
state.
@@ -959,12 +887,6 @@
TXF(uint_vec coord, int_vec offset).
-.. opcode:: TXF_LZ - Texel Fetch
-
- This is the same as TXF with level = 0. Like TXF, it obeys
- pipe_sampler_view::u.tex.first_level.
-
-
.. opcode:: TXQ - Texture Size Query
As per NV_gpu_program4, retrieve the dimensions of the texture depending on
@@ -1691,7 +1613,7 @@
These opcodes are part of :term:`GLSL`'s opcode set. Support for these
opcodes is determined by a special capability bit, ``GLSL``.
-Some require glsl version 1.30 (UIF/BREAKC/SWITCH/CASE/DEFAULT/ENDSWITCH).
+Some require glsl version 1.30 (UIF/SWITCH/CASE/DEFAULT/ENDSWITCH).
.. opcode:: CAL - Subroutine Call
@@ -1747,20 +1669,6 @@
or switch/endswitch.
-.. opcode:: BREAKC - Break Conditional
-
- Conditionally moves the point of execution to the instruction after the
- next endloop or endswitch. The instruction must appear within a loop/endloop
- or switch/endswitch.
- Condition evaluates to true if src0.x != 0 where src0.x is interpreted
- as an integer register.
-
-.. note::
-
- Considered for removal as it's quite inconsistent wrt other opcodes
- (could emulate with UIF/BRK/ENDIF).
-
-
.. opcode:: IF - Float If
Start an IF ... ELSE .. ENDIF block. Condition evaluates to true if
@@ -1884,7 +1792,7 @@
dst.z = src0.zw == src1.zw ? \sim 0 : 0
-.. opcode:: DSNE - Set on Equal
+.. opcode:: DSNE - Set on Not Equal
.. math::
@@ -1960,17 +1868,15 @@
Like the ``frexp()`` routine in many math libraries, this opcode stores the
exponent of its source to ``dst0``, and the significand to ``dst1``, such that
-:math:`dst1 \times 2^{dst0} = src` .
+:math:`dst1 \times 2^{dst0} = src` . The results are replicated across
+channels.
.. math::
- dst0.xy = exp(src.xy)
-
- dst1.xy = frac(src.xy)
+ dst0.xy = dst.zw = frac(src.xy)
- dst0.zw = exp(src.zw)
+ dst1 = frac(src.xy)
- dst1.zw = frac(src.zw)
.. opcode:: DLDEXP - Multiply Number by Integral Power of 2
@@ -1981,7 +1887,7 @@
dst.xy = src0.xy \times 2^{src1.x}
- dst.zw = src0.zw \times 2^{src1.y}
+ dst.zw = src0.zw \times 2^{src1.z}
.. opcode:: DMIN - Minimum
@@ -2321,9 +2227,9 @@
.. math::
- dst.xy = (uint64_t) src0.x
+ dst.xy = (int64_t) src0.x
- dst.zw = (uint64_t) src0.y
+ dst.zw = (int64_t) src0.y
.. opcode:: I2I64 - Signed Integer to 64-bit Integer
@@ -2591,6 +2497,18 @@
NOTE: no driver has implemented this opcode yet (and no state tracker
emits it). This information is subject to change.
+.. opcode:: LOD - level of detail
+
+ Same syntax as the SAMPLE opcode but instead of performing an actual
+ texture lookup/filter, return the computed LOD information that the
+ texture pipe would use to access the texture. The Y component contains
+ the computed LOD lambda_prime. The X component contains the LOD that will
+ be accessed, based on min/max lod's and mipmap filters.
+ The Z and W components are set to 0.
+
+ Syntax: ``LOD dst, address, sampler_view, sampler``
+
+
.. _resourceopcodes:
Resource Access Opcodes
@@ -2683,36 +2601,6 @@
within the same compute grid. For now they're only valid in compute
programs.
-.. opcode:: MFENCE - Memory fence
-
- Syntax: ``MFENCE resource``
-
- Example: ``MFENCE RES[0]``
-
- This opcode forces strong ordering between any memory access
- operations that affect the specified resource. This means that
- previous loads and stores (and only those) will be performed and
- visible to other threads before the program execution continues.
-
-
-.. opcode:: LFENCE - Load memory fence
-
- Syntax: ``LFENCE resource``
-
- Example: ``LFENCE RES[0]``
-
- Similar to MFENCE, but it only affects the ordering of memory loads.
-
-
-.. opcode:: SFENCE - Store memory fence
-
- Syntax: ``SFENCE resource``
-
- Example: ``SFENCE RES[0]``
-
- Similar to MFENCE, but it only affects the ordering of memory stores.
-
-
.. opcode:: BARRIER - Thread group barrier
``BARRIER``
@@ -3331,6 +3219,9 @@
Only the X component is used. If per-sample shading is not enabled,
the result is (0, undef, undef, undef).
+Note that if the fragment shader uses this system value, the fragment
+shader is automatically executed at per sample frequency.
+
TGSI_SEMANTIC_SAMPLEPOS
"""""""""""""""""""""""
@@ -3340,6 +3231,9 @@
is in effect. Position values are in the range [0, 1] where 0.5 is
the center of the fragment.
+Note that if the fragment shader uses this system value, the fragment
+shader is automatically executed at per sample frequency.
+
TGSI_SEMANTIC_SAMPLEMASK
""""""""""""""""""""""""
@@ -3543,7 +3437,7 @@
TGSI_SEMANTIC_SUBGROUP_LT_MASK
""""""""""""""""""""""""""""""
-A bit mask of ``bit index > TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
+A bit mask of ``bit index < TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
``(1 << subgroup_invocation) - 1`` in arbitrary precision arithmetic.
@@ -3770,7 +3664,7 @@
should be set the same way for an entire pipeline. Note that this
applies not only to the literal MUL TGSI opcode, but all FP32
multiplications implied by other operations, such as MAD, FMA, DP2,
-DP3, DP4, DPH, DST, LOG, LRP, XPD, and possibly others. If there is a
+DP3, DP4, DST, LOG, LRP, and possibly others. If there is a
mismatch between shaders, then it is unspecified whether this behavior
will be enabled.
diff -Nru mesa-17.2.4/src/gallium/drivers/ddebug/dd_context.c mesa-17.3.3/src/gallium/drivers/ddebug/dd_context.c
--- mesa-17.2.4/src/gallium/drivers/ddebug/dd_context.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/ddebug/dd_context.c 2018-01-18 21:30:28.000000000 +0000
@@ -45,22 +45,6 @@
* queries
*/
-static struct dd_query *
-dd_query(struct pipe_query *query)
-{
- return (struct dd_query *)query;
-}
-
-static struct pipe_query *
-dd_query_unwrap(struct pipe_query *query)
-{
- if (query) {
- return dd_query(query)->query;
- } else {
- return NULL;
- }
-}
-
static struct pipe_query *
dd_context_create_query(struct pipe_context *_pipe, unsigned query_type,
unsigned index)
@@ -151,21 +135,6 @@
}
static void
-dd_context_get_query_result_resource(struct pipe_context *_pipe,
- struct pipe_query *query,
- boolean wait,
- enum pipe_query_value_type result_type,
- int index,
- struct pipe_resource *resource,
- unsigned offset)
-{
- struct pipe_context *pipe = dd_context(_pipe)->pipe;
-
- pipe->get_query_result_resource(pipe, dd_query_unwrap(query), wait,
- result_type, index, resource, offset);
-}
-
-static void
dd_context_set_active_query_state(struct pipe_context *_pipe, boolean enable)
{
struct pipe_context *pipe = dd_context(_pipe)->pipe;
@@ -614,6 +583,22 @@
pipe->transfer_unmap(pipe, dctx->fence_transfer);
pipe_resource_reference(&dctx->fence, NULL);
}
+
+ if (pipe->set_log_context) {
+ pipe->set_log_context(pipe, NULL);
+
+ if (dd_screen(dctx->base.screen)->mode == DD_DUMP_ALL_CALLS) {
+ FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), 0);
+ if (f) {
+ fprintf(f, "Remainder of driver log:\n\n");
+ }
+
+ u_log_new_page_print(&dctx->log, f);
+ fclose(f);
+ }
+ }
+ u_log_context_destroy(&dctx->log);
+
pipe->destroy(pipe);
FREE(dctx);
}
@@ -847,7 +832,6 @@
CTX_INIT(begin_query);
CTX_INIT(end_query);
CTX_INIT(get_query_result);
- CTX_INIT(get_query_result_resource);
CTX_INIT(set_active_query_state);
CTX_INIT(create_blend_state);
CTX_INIT(bind_blend_state);
@@ -931,6 +915,10 @@
dd_init_draw_functions(dctx);
+ u_log_context_init(&dctx->log);
+ if (pipe->set_log_context)
+ pipe->set_log_context(pipe, &dctx->log);
+
dctx->draw_state.sample_mask = ~0;
if (dscreen->mode == DD_DETECT_HANGS_PIPELINED) {
diff -Nru mesa-17.2.4/src/gallium/drivers/ddebug/dd_draw.c mesa-17.3.3/src/gallium/drivers/ddebug/dd_draw.c
--- mesa-17.2.4/src/gallium/drivers/ddebug/dd_draw.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/ddebug/dd_draw.c 2018-01-18 21:30:28.000000000 +0000
@@ -39,7 +39,7 @@
#include
-static FILE *
+FILE *
dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number)
{
struct pipe_screen *screen = dscreen->screen;
@@ -154,6 +154,12 @@
}
static void
+util_dump_int(FILE *f, int i)
+{
+ fprintf(f, "%d", i);
+}
+
+static void
util_dump_hex(FILE *f, unsigned i)
{
fprintf(f, "0x%x", i);
@@ -180,21 +186,11 @@
}
static void
-util_dump_query(FILE *f, struct dd_query *query)
-{
- if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
- fprintf(f, "PIPE_QUERY_DRIVER_SPECIFIC + %i",
- query->type - PIPE_QUERY_DRIVER_SPECIFIC);
- else
- fprintf(f, "%s", util_dump_query_type(query->type, false));
-}
-
-static void
dd_dump_render_condition(struct dd_draw_state *dstate, FILE *f)
{
if (dstate->render_cond.query) {
fprintf(f, "render condition:\n");
- DUMP_M(query, &dstate->render_cond, query);
+ DUMP_M(query_type, &dstate->render_cond, query->type);
DUMP_M(uint, &dstate->render_cond, condition);
DUMP_M(uint, &dstate->render_cond, mode);
fprintf(f, "\n");
@@ -429,6 +425,18 @@
}
static void
+dd_dump_get_query_result_resource(struct call_get_query_result_resource *info, FILE *f)
+{
+ fprintf(f, "%s:\n", __func__ + 8);
+ DUMP_M(query_type, info, query_type);
+ DUMP_M(uint, info, wait);
+ DUMP_M(query_value_type, info, result_type);
+ DUMP_M(int, info, index);
+ DUMP_M(resource, info, resource);
+ DUMP_M(uint, info, offset);
+}
+
+static void
dd_dump_flush_resource(struct dd_draw_state *dstate, struct pipe_resource *res,
FILE *f)
{
@@ -535,6 +543,9 @@
case CALL_GENERATE_MIPMAP:
dd_dump_generate_mipmap(state, f);
break;
+ case CALL_GET_QUERY_RESULT_RESOURCE:
+ dd_dump_get_query_result_resource(&call->info.get_query_result_resource, f);
+ break;
}
}
@@ -550,6 +561,12 @@
dd_dump_call(f, &dctx->draw_state, call);
dd_dump_driver_state(dctx, f, flags);
+
+ fprintf(f,"\n\n**************************************************"
+ "***************************\n");
+ fprintf(f, "Context Log:\n\n");
+ u_log_new_page_print(&dctx->log, f);
+
if (dump_dmesg)
dd_dump_dmesg(f);
dd_close_file_stream(f);
@@ -603,10 +620,7 @@
if (f) {
fprintf(f, "dd: %s.\n", cause);
dd_dump_driver_state(dctx, f,
- PIPE_DUMP_DEVICE_STATUS_REGISTERS |
- PIPE_DUMP_CURRENT_STATES |
- PIPE_DUMP_CURRENT_SHADERS |
- PIPE_DUMP_LAST_COMMAND_BUFFER);
+ PIPE_DUMP_DEVICE_STATUS_REGISTERS);
dd_dump_dmesg(f);
dd_close_file_stream(f);
}
@@ -658,6 +672,9 @@
case CALL_GENERATE_MIPMAP:
pipe_resource_reference(&dst->info.generate_mipmap.res, NULL);
break;
+ case CALL_GET_QUERY_RESULT_RESOURCE:
+ pipe_resource_reference(&dst->info.get_query_result_resource.resource, NULL);
+ break;
}
}
@@ -735,6 +752,12 @@
src->info.generate_mipmap.res);
dst->info.generate_mipmap = src->info.generate_mipmap;
break;
+ case CALL_GET_QUERY_RESULT_RESOURCE:
+ pipe_resource_reference(&dst->info.get_query_result_resource.resource,
+ src->info.get_query_result_resource.resource);
+ dst->info.get_query_result_resource = src->info.get_query_result_resource;
+ dst->info.get_query_result_resource.query = NULL;
+ break;
}
}
@@ -824,7 +847,7 @@
}
dst->num_so_targets = src->num_so_targets;
- for (i = 0; i < ARRAY_SIZE(src->so_targets); i++)
+ for (i = 0; i < src->num_so_targets; i++)
pipe_so_target_reference(&dst->so_targets[i], src->so_targets[i]);
memcpy(dst->so_offsets, src->so_offsets, sizeof(src->so_offsets));
@@ -916,9 +939,9 @@
{
struct dd_draw_record *next = (*record)->next;
+ u_log_page_destroy((*record)->log_page);
dd_unreference_copy_of_call(&(*record)->call);
dd_unreference_copy_of_draw_state(&(*record)->draw_state);
- FREE((*record)->driver_state_log);
FREE(*record);
*record = next;
}
@@ -938,7 +961,11 @@
(now - record->timestamp) / 1000);
dd_dump_call(f, &record->draw_state.base, &record->call);
- fprintf(f, "%s\n", record->driver_state_log);
+
+ fprintf(f,"\n\n**************************************************"
+ "***************************\n");
+ fprintf(f, "Context Log:\n\n");
+ u_log_page_print(record->log_page, f);
dctx->pipe->dump_debug_state(dctx->pipe, f,
PIPE_DUMP_DEVICE_STATUS_REGISTERS);
@@ -1003,71 +1030,17 @@
return 0;
}
-static char *
-dd_get_driver_shader_log(struct dd_context *dctx)
-{
-#if defined(PIPE_OS_LINUX)
- FILE *f;
- char *buf;
- int written_bytes;
-
- if (!dctx->max_log_buffer_size)
- dctx->max_log_buffer_size = 16 * 1024;
-
- /* Keep increasing the buffer size until there is enough space.
- *
- * open_memstream can resize automatically, but it's VERY SLOW.
- * fmemopen is much faster.
- */
- while (1) {
- buf = malloc(dctx->max_log_buffer_size);
- buf[0] = 0;
-
- f = fmemopen(buf, dctx->max_log_buffer_size, "a");
- if (!f) {
- free(buf);
- return NULL;
- }
-
- dd_dump_driver_state(dctx, f, PIPE_DUMP_CURRENT_SHADERS);
- written_bytes = ftell(f);
- fclose(f);
-
- /* Return if the backing buffer is large enough. */
- if (written_bytes < dctx->max_log_buffer_size - 1)
- break;
-
- /* Try again. */
- free(buf);
- dctx->max_log_buffer_size *= 2;
- }
-
- return buf;
-#else
- /* Return an empty string. */
- return (char*)calloc(1, 4);
-#endif
-}
-
static void
dd_pipelined_process_draw(struct dd_context *dctx, struct dd_call *call)
{
struct pipe_context *pipe = dctx->pipe;
struct dd_draw_record *record;
- char *log;
/* Make a record of the draw call. */
record = MALLOC_STRUCT(dd_draw_record);
if (!record)
return;
- /* Create the log. */
- log = dd_get_driver_shader_log(dctx);
- if (!log) {
- FREE(record);
- return;
- }
-
/* Update the fence with the GPU.
*
* radeonsi/clear_buffer waits in the command processor until shaders are
@@ -1080,7 +1053,7 @@
/* Initialize the record. */
record->timestamp = os_time_get();
record->sequence_no = dctx->sequence_no;
- record->driver_state_log = log;
+ record->log_page = u_log_new_page(&dctx->log);
memset(&record->call, 0, sizeof(record->call));
dd_copy_call(&record->call, call);
@@ -1142,14 +1115,13 @@
if (!dscreen->no_flush &&
dd_flush_and_check_hang(dctx, NULL, 0)) {
dd_write_report(dctx, call,
- PIPE_DUMP_DEVICE_STATUS_REGISTERS |
- PIPE_DUMP_CURRENT_STATES |
- PIPE_DUMP_CURRENT_SHADERS |
- PIPE_DUMP_LAST_COMMAND_BUFFER,
+ PIPE_DUMP_DEVICE_STATUS_REGISTERS,
true);
/* Terminate the process to prevent future hangs. */
dd_kill_process();
+ } else {
+ u_log_page_destroy(u_log_new_page(&dctx->log));
}
break;
case DD_DETECT_HANGS_PIPELINED:
@@ -1158,21 +1130,16 @@
case DD_DUMP_ALL_CALLS:
if (!dscreen->no_flush)
pipe->flush(pipe, NULL, 0);
- dd_write_report(dctx, call,
- PIPE_DUMP_CURRENT_STATES |
- PIPE_DUMP_CURRENT_SHADERS |
- PIPE_DUMP_LAST_COMMAND_BUFFER,
- false);
+ dd_write_report(dctx, call, 0, false);
break;
case DD_DUMP_APITRACE_CALL:
if (dscreen->apitrace_dump_call ==
dctx->draw_state.apitrace_call_number) {
- dd_write_report(dctx, call,
- PIPE_DUMP_CURRENT_STATES |
- PIPE_DUMP_CURRENT_SHADERS,
- false);
+ dd_write_report(dctx, call, 0, false);
/* No need to continue. */
exit(0);
+ } else {
+ u_log_page_destroy(u_log_new_page(&dctx->log));
}
break;
default:
@@ -1297,6 +1264,39 @@
}
static void
+dd_context_get_query_result_resource(struct pipe_context *_pipe,
+ struct pipe_query *query,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct dd_context *dctx = dd_context(_pipe);
+ struct dd_query *dquery = dd_query(query);
+ struct pipe_context *pipe = dctx->pipe;
+ struct dd_call call;
+
+ call.type = CALL_GET_QUERY_RESULT_RESOURCE;
+ call.info.get_query_result_resource.query = query;
+ call.info.get_query_result_resource.wait = wait;
+ call.info.get_query_result_resource.result_type = result_type;
+ call.info.get_query_result_resource.index = index;
+ call.info.get_query_result_resource.resource = resource;
+ call.info.get_query_result_resource.offset = offset;
+
+ /* In pipelined mode, the query may be deleted by the time we need to
+ * print it.
+ */
+ call.info.get_query_result_resource.query_type = dquery->type;
+
+ dd_before_draw(dctx);
+ pipe->get_query_result_resource(pipe, dquery->query, wait,
+ result_type, index, resource, offset);
+ dd_after_draw(dctx, &call);
+}
+
+static void
dd_context_flush_resource(struct pipe_context *_pipe,
struct pipe_resource *resource)
{
@@ -1426,4 +1426,5 @@
CTX_INIT(clear_texture);
CTX_INIT(flush_resource);
CTX_INIT(generate_mipmap);
+ CTX_INIT(get_query_result_resource);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/ddebug/dd_pipe.h mesa-17.3.3/src/gallium/drivers/ddebug/dd_pipe.h
--- mesa-17.2.4/src/gallium/drivers/ddebug/dd_pipe.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/ddebug/dd_pipe.h 2018-01-18 21:30:28.000000000 +0000
@@ -33,6 +33,7 @@
#include "pipe/p_screen.h"
#include "dd_util.h"
#include "os/os_thread.h"
+#include "util/u_log.h"
enum dd_mode {
DD_DETECT_HANGS,
@@ -66,6 +67,7 @@
CALL_CLEAR_RENDER_TARGET,
CALL_CLEAR_DEPTH_STENCIL,
CALL_GENERATE_MIPMAP,
+ CALL_GET_QUERY_RESULT_RESOURCE,
};
struct call_resource_copy_region
@@ -109,6 +111,16 @@
struct pipe_draw_indirect_info indirect;
};
+struct call_get_query_result_resource {
+ struct pipe_query *query;
+ enum pipe_query_type query_type;
+ boolean wait;
+ enum pipe_query_value_type result_type;
+ int index;
+ struct pipe_resource *resource;
+ unsigned offset;
+};
+
struct dd_call
{
enum call_type type;
@@ -122,6 +134,7 @@
struct call_clear clear;
struct call_clear_buffer clear_buffer;
struct call_generate_mipmap generate_mipmap;
+ struct call_get_query_result_resource get_query_result_resource;
} info;
};
@@ -212,7 +225,7 @@
struct dd_call call;
struct dd_draw_state_copy draw_state;
- char *driver_state_log;
+ struct u_log_page *log_page;
};
struct dd_context
@@ -223,6 +236,8 @@
struct dd_draw_state draw_state;
unsigned num_draw_calls;
+ struct u_log_context log;
+
/* Pipelined hang detection.
*
* This is without unnecessary flushes and waits. There is a memory-based
@@ -259,6 +274,8 @@
int
dd_thread_pipelined_hang_detect(void *input);
+FILE *
+dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number);
static inline struct dd_context *
dd_context(struct pipe_context *pipe)
@@ -272,6 +289,22 @@
return (struct dd_screen*)screen;
}
+static inline struct dd_query *
+dd_query(struct pipe_query *query)
+{
+ return (struct dd_query *)query;
+}
+
+static inline struct pipe_query *
+dd_query_unwrap(struct pipe_query *query)
+{
+ if (query) {
+ return dd_query(query)->query;
+ } else {
+ return NULL;
+ }
+}
+
#define CTX_INIT(_member) \
dctx->base._member = dctx->pipe->_member ? dd_context_##_member : NULL
diff -Nru mesa-17.2.4/src/gallium/drivers/ddebug/dd_screen.c mesa-17.3.3/src/gallium/drivers/ddebug/dd_screen.c
--- mesa-17.2.4/src/gallium/drivers/ddebug/dd_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/ddebug/dd_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -197,6 +197,22 @@
}
+static void
+dd_screen_get_driver_uuid(struct pipe_screen *_screen, char *uuid)
+{
+ struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+ screen->get_driver_uuid(screen, uuid);
+}
+
+static void
+dd_screen_get_device_uuid(struct pipe_screen *_screen, char *uuid)
+{
+ struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+ screen->get_device_uuid(screen, uuid);
+}
+
/********************************************************************
* resource
*/
@@ -245,6 +261,22 @@
return res;
}
+static struct pipe_resource *
+dd_screen_resource_from_memobj(struct pipe_screen *_screen,
+ const struct pipe_resource *templ,
+ struct pipe_memory_object *memobj,
+ uint64_t offset)
+{
+ struct pipe_screen *screen = dd_screen(_screen)->screen;
+ struct pipe_resource *res =
+ screen->resource_from_memobj(screen, templ, memobj, offset);
+
+ if (!res)
+ return NULL;
+ res->screen = _screen;
+ return res;
+}
+
static void
dd_screen_resource_changed(struct pipe_screen *_screen,
struct pipe_resource *res)
@@ -276,6 +308,16 @@
return screen->resource_get_handle(screen, pipe, resource, handle, usage);
}
+static bool
+dd_screen_check_resource_capability(struct pipe_screen *_screen,
+ struct pipe_resource *resource,
+ unsigned bind)
+{
+ struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+ return screen->check_resource_capability(screen, resource, bind);
+}
+
/********************************************************************
* fence
@@ -303,7 +345,28 @@
return screen->fence_finish(screen, ctx, fence, timeout);
}
+/********************************************************************
+ * memobj
+ */
+
+static struct pipe_memory_object *
+dd_screen_memobj_create_from_handle(struct pipe_screen *_screen,
+ struct winsys_handle *handle,
+ bool dedicated)
+{
+ struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+ return screen->memobj_create_from_handle(screen, handle, dedicated);
+}
+
+static void
+dd_screen_memobj_destroy(struct pipe_screen *_screen,
+ struct pipe_memory_object *memobj)
+{
+ struct pipe_screen *screen = dd_screen(_screen)->screen;
+ screen->memobj_destroy(screen, memobj);
+}
/********************************************************************
* screen
*/
@@ -412,16 +475,22 @@
SCR_INIT(can_create_resource);
dscreen->base.resource_create = dd_screen_resource_create;
dscreen->base.resource_from_handle = dd_screen_resource_from_handle;
+ SCR_INIT(resource_from_memobj);
SCR_INIT(resource_from_user_memory);
+ SCR_INIT(check_resource_capability);
dscreen->base.resource_get_handle = dd_screen_resource_get_handle;
SCR_INIT(resource_changed);
dscreen->base.resource_destroy = dd_screen_resource_destroy;
SCR_INIT(flush_frontbuffer);
SCR_INIT(fence_reference);
SCR_INIT(fence_finish);
+ SCR_INIT(memobj_create_from_handle);
+ SCR_INIT(memobj_destroy);
SCR_INIT(get_driver_query_info);
SCR_INIT(get_driver_query_group_info);
SCR_INIT(get_compiler_options);
+ SCR_INIT(get_driver_uuid);
+ SCR_INIT(get_device_uuid);
#undef SCR_INIT
diff -Nru mesa-17.2.4/src/gallium/drivers/ddebug/Makefile.in mesa-17.3.3/src/gallium/drivers/ddebug/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/ddebug/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/ddebug/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -101,7 +101,8 @@
subdir = src/gallium/drivers/ddebug
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -328,9 +329,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -384,6 +385,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -395,12 +398,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -534,6 +540,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/ddebug/meson.build mesa-17.3.3/src/gallium/drivers/ddebug/meson.build
--- mesa-17.2.4/src/gallium/drivers/ddebug/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/ddebug/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,28 @@
+# Copyright © 2017 Dylan Baker
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libddebug = static_library(
+ 'ddebug',
+ files('dd_context.c', 'dd_draw.c', 'dd_pipe.h', 'dd_public.h', 'dd_screen.c',
+ 'dd_util.h'),
+ c_args : [c_vis_args],
+ include_directories : [inc_gallium, inc_include, inc_src, inc_gallium_aux],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_asm.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_asm.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_asm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_asm.c 2018-01-18 21:30:28.000000000 +0000
@@ -68,7 +68,9 @@
if (!check_uniforms(inst))
BUG("error: generating instruction that accesses two different uniforms");
- out[0] = VIV_ISA_WORD_0_OPCODE(inst->opcode) |
+ assert(!(inst->opcode&~0x7f));
+
+ out[0] = VIV_ISA_WORD_0_OPCODE(inst->opcode & 0x3f) |
VIV_ISA_WORD_0_COND(inst->cond) |
COND(inst->sat, VIV_ISA_WORD_0_SAT) |
COND(inst->dst.use, VIV_ISA_WORD_0_DST_USE) |
@@ -88,6 +90,7 @@
VIV_ISA_WORD_2_SRC0_RGROUP(inst->src[0].rgroup) |
COND(inst->src[1].use, VIV_ISA_WORD_2_SRC1_USE) |
VIV_ISA_WORD_2_SRC1_REG(inst->src[1].reg) |
+ COND(inst->opcode & 0x40, VIV_ISA_WORD_2_OPCODE_BIT6) |
VIV_ISA_WORD_2_SRC1_SWIZ(inst->src[1].swiz) |
COND(inst->src[1].neg, VIV_ISA_WORD_2_SRC1_NEG) |
COND(inst->src[1].abs, VIV_ISA_WORD_2_SRC1_ABS) |
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_blend.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_blend.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_blend.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_blend.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,7 +27,9 @@
#include "etnaviv_blend.h"
#include "etnaviv_context.h"
+#include "etnaviv_screen.h"
#include "etnaviv_translate.h"
+#include "hw/common.xml.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
@@ -35,8 +37,10 @@
etna_blend_state_create(struct pipe_context *pctx,
const struct pipe_blend_state *so)
{
+ struct etna_context *ctx = etna_context(pctx);
const struct pipe_rt_blend_state *rt0 = &so->rt[0];
struct etna_blend_state *co = CALLOC_STRUCT(etna_blend_state);
+ bool alpha_enable, logicop_enable;
if (!co)
return NULL;
@@ -48,7 +52,7 @@
* - NOT source factor is ONE and destination factor ZERO for both rgb and
* alpha (which would mean that blending is effectively disabled)
*/
- co->enable = rt0->blend_enable &&
+ alpha_enable = rt0->blend_enable &&
!(rt0->rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
rt0->rgb_dst_factor == PIPE_BLENDFACTOR_ZERO &&
rt0->alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
@@ -59,11 +63,11 @@
* - NOT source factor is equal to destination factor for both rgb abd
* alpha (which would effectively that mean alpha is not separate)
*/
- bool separate_alpha = co->enable &&
+ bool separate_alpha = alpha_enable &&
!(rt0->rgb_src_factor == rt0->alpha_src_factor &&
rt0->rgb_dst_factor == rt0->alpha_dst_factor);
- if (co->enable) {
+ if (alpha_enable) {
co->PE_ALPHA_CONFIG =
VIVS_PE_ALPHA_CONFIG_BLEND_ENABLE_COLOR |
COND(separate_alpha, VIVS_PE_ALPHA_CONFIG_BLEND_SEPARATE_ALPHA) |
@@ -77,10 +81,15 @@
co->PE_ALPHA_CONFIG = 0;
}
+ logicop_enable = so->logicop_enable &&
+ VIV_FEATURE(ctx->screen, chipMinorFeatures2, LOGIC_OP);
+
co->PE_LOGIC_OP =
- VIVS_PE_LOGIC_OP_OP(so->logicop_enable ? so->logicop_func : LOGIC_OP_COPY) |
+ VIVS_PE_LOGIC_OP_OP(logicop_enable ? so->logicop_func : LOGIC_OP_COPY) |
0x000E4000 /* ??? */;
+ co->fo_allowed = !alpha_enable && !logicop_enable;
+
/* independent_blend_enable not needed: only one rt supported */
/* XXX alpha_to_coverage / alpha_to_one? */
/* Set dither registers based on dither status. These registers set the
@@ -122,7 +131,8 @@
* - The color mask is 1111
* - No blending is used
*/
- bool full_overwrite = (rt0->colormask == 0xf) && !blend->enable;
+ bool full_overwrite = (rt0->colormask == 0xf) &&
+ blend->fo_allowed;
blend->PE_COLOR_FORMAT =
VIVS_PE_COLOR_FORMAT_COMPONENTS(colormask) |
COND(full_overwrite, VIVS_PE_COLOR_FORMAT_OVERWRITE);
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_blend.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_blend.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_blend.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_blend.h 2018-01-18 21:30:28.000000000 +0000
@@ -35,7 +35,7 @@
struct etna_blend_state {
struct pipe_blend_state base;
- bool enable;
+ bool fo_allowed;
uint32_t PE_ALPHA_CONFIG;
uint32_t PE_COLOR_FORMAT;
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c 2018-01-18 21:30:28.000000000 +0000
@@ -130,7 +130,7 @@
}
surf->level->ts_valid = true;
- ctx->dirty |= ETNA_DIRTY_TS;
+ ctx->dirty |= ETNA_DIRTY_TS | ETNA_DIRTY_DERIVE_TS;
} else if (unlikely(new_clear_value != surf->level->clear_value)) { /* Queue normal RS clear for non-TS surfaces */
/* If clear color changed, re-generate stored command */
etna_rs_gen_clear_surface(ctx, surf, new_clear_value);
@@ -189,7 +189,7 @@
}
surf->level->ts_valid = true;
- ctx->dirty |= ETNA_DIRTY_TS;
+ ctx->dirty |= ETNA_DIRTY_TS | ETNA_DIRTY_DERIVE_TS;
} else {
if (unlikely(new_clear_value != surf->level->clear_value)) { /* Queue normal RS clear for non-TS surfaces */
/* If clear depth value changed, re-generate stored command */
@@ -358,6 +358,59 @@
return true;
}
+static inline size_t
+etna_compute_tileoffset(const struct pipe_box *box, enum pipe_format format,
+ size_t stride, enum etna_surface_layout layout)
+{
+ size_t offset;
+ unsigned int x = box->x, y = box->y;
+ unsigned int blocksize = util_format_get_blocksize(format);
+
+ switch (layout) {
+ case ETNA_LAYOUT_LINEAR:
+ offset = y * stride + x * blocksize;
+ break;
+ case ETNA_LAYOUT_MULTI_TILED:
+ y >>= 1;
+ /* fall-through */
+ case ETNA_LAYOUT_TILED:
+ assert(!(x & 0x03) && !(y & 0x03));
+ offset = (y & ~0x03) * stride + blocksize * ((x & ~0x03) << 2);
+ break;
+ case ETNA_LAYOUT_MULTI_SUPERTILED:
+ y >>= 1;
+ /* fall-through */
+ case ETNA_LAYOUT_SUPER_TILED:
+ assert(!(x & 0x3f) && !(y & 0x3f));
+ offset = (y & ~0x3f) * stride + blocksize * ((x & ~0x3f) << 6);
+ break;
+ default:
+ unreachable("invalid resource layout");
+ }
+
+ return offset;
+}
+
+static inline void
+etna_get_rs_alignment_mask(const struct etna_context *ctx,
+ const enum etna_surface_layout layout,
+ unsigned int *width_mask, unsigned int *height_mask)
+{
+ unsigned int h_align, w_align;
+
+ if (layout & ETNA_LAYOUT_BIT_SUPER) {
+ w_align = h_align = 64;
+ } else {
+ w_align = ETNA_RS_WIDTH_MASK + 1;
+ h_align = ETNA_RS_HEIGHT_MASK + 1;
+ }
+
+ h_align *= ctx->screen->specs.pixel_pipes;
+
+ *width_mask = w_align - 1;
+ *height_mask = h_align -1;
+}
+
static bool
etna_try_rs_blit(struct pipe_context *pctx,
const struct pipe_blit_info *blit_info)
@@ -399,14 +452,22 @@
unsigned dst_format = etna_compatible_rs_format(blit_info->dst.format);
if (translate_rs_format(src_format) == ETNA_NO_MATCH ||
translate_rs_format(dst_format) == ETNA_NO_MATCH ||
- blit_info->scissor_enable || blit_info->src.box.x != 0 ||
- blit_info->src.box.y != 0 || blit_info->dst.box.x != 0 ||
- blit_info->dst.box.y != 0 ||
+ blit_info->scissor_enable ||
blit_info->dst.box.depth != blit_info->src.box.depth ||
blit_info->dst.box.depth != 1) {
return FALSE;
}
+ unsigned w_mask, h_mask;
+
+ etna_get_rs_alignment_mask(ctx, src->layout, &w_mask, &h_mask);
+ if ((blit_info->src.box.x & w_mask) || (blit_info->src.box.y & h_mask))
+ return FALSE;
+
+ etna_get_rs_alignment_mask(ctx, dst->layout, &w_mask, &h_mask);
+ if ((blit_info->dst.box.x & w_mask) || (blit_info->dst.box.y & h_mask))
+ return FALSE;
+
/* Ensure that the Z coordinate is sane */
if (dst->base.target != PIPE_TEXTURE_CUBE)
assert(blit_info->dst.box.z == 0);
@@ -426,10 +487,18 @@
assert(blit_info->dst.box.x + blit_info->dst.box.width <= dst_lev->padded_width);
assert(blit_info->dst.box.y + blit_info->dst.box.height <= dst_lev->padded_height);
- unsigned src_offset =
- src_lev->offset + blit_info->src.box.z * src_lev->layer_stride;
- unsigned dst_offset =
- dst_lev->offset + blit_info->dst.box.z * dst_lev->layer_stride;
+ unsigned src_offset = src_lev->offset +
+ blit_info->src.box.z * src_lev->layer_stride +
+ etna_compute_tileoffset(&blit_info->src.box,
+ blit_info->src.format,
+ src_lev->stride,
+ src->layout);
+ unsigned dst_offset = dst_lev->offset +
+ blit_info->dst.box.z * dst_lev->layer_stride +
+ etna_compute_tileoffset(&blit_info->dst.box,
+ blit_info->dst.format,
+ dst_lev->stride,
+ dst->layout);
if (src_lev->padded_width <= ETNA_RS_WIDTH_MASK ||
dst_lev->padded_width <= ETNA_RS_WIDTH_MASK ||
@@ -486,6 +555,7 @@
}
/* Set up color TS to source surface before blit, if needed */
+ bool source_ts_valid = false;
if (src->levels[blit_info->src.level].ts_size &&
src->levels[blit_info->src.level].ts_valid) {
struct etna_reloc reloc;
@@ -503,12 +573,15 @@
memset(&reloc, 0, sizeof(struct etna_reloc));
reloc.bo = src->bo;
- reloc.offset = src_offset;
+ reloc.offset = src_lev->offset +
+ blit_info->src.box.z * src_lev->layer_stride;
reloc.flags = ETNA_RELOC_READ;
etna_set_state_reloc(ctx->stream, VIVS_TS_COLOR_SURFACE_BASE, &reloc);
etna_set_state(ctx->stream, VIVS_TS_COLOR_CLEAR_VALUE,
src->levels[blit_info->src.level].clear_value);
+
+ source_ts_valid = true;
} else {
etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG, ts_mem_config);
}
@@ -521,7 +594,9 @@
.source = src->bo,
.source_offset = src_offset,
.source_stride = src_lev->stride,
+ .source_padded_width = src_lev->padded_width,
.source_padded_height = src_lev->padded_height,
+ .source_ts_valid = source_ts_valid,
.dest_format = translate_rs_format(dst_format),
.dest_tiling = dst->layout,
.dest = dst->bo,
@@ -541,6 +616,7 @@
resource_written(ctx, &dst->base);
dst->seqno++;
dst->levels[blit_info->dst.level].ts_valid = false;
+ ctx->dirty |= ETNA_DIRTY_DERIVE_TS;
return TRUE;
@@ -656,6 +732,33 @@
}
}
+void
+etna_copy_resource_box(struct pipe_context *pctx, struct pipe_resource *dst,
+ struct pipe_resource *src, int level,
+ struct pipe_box *box)
+{
+ assert(src->format == dst->format);
+ assert(src->array_size == dst->array_size);
+
+ struct pipe_blit_info blit = {};
+ blit.mask = util_format_get_mask(dst->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+ blit.src.resource = src;
+ blit.src.format = src->format;
+ blit.src.box = *box;
+ blit.dst.resource = dst;
+ blit.dst.format = dst->format;
+ blit.dst.box = *box;
+
+ blit.dst.box.depth = blit.src.box.depth = 1;
+ blit.src.level = blit.dst.level = level;
+
+ for (int layer = 0; layer < dst->array_size; layer++) {
+ blit.src.box.z = blit.dst.box.z = layer;
+ pctx->blit(pctx, &blit);
+ }
+}
+
void
etna_clear_blit_init(struct pipe_context *pctx)
{
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_clear_blit.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_clear_blit.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_clear_blit.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_clear_blit.h 2018-01-18 21:30:28.000000000 +0000
@@ -43,6 +43,11 @@
struct pipe_resource *src, int first_level, int last_level);
void
+etna_copy_resource_box(struct pipe_context *pctx, struct pipe_resource *dst,
+ struct pipe_resource *src, int level,
+ struct pipe_box *box);
+
+void
etna_clear_blit_init(struct pipe_context *pctx);
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_compiler.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_compiler.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_compiler.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_compiler.c 2018-01-18 21:30:28.000000000 +0000
@@ -1474,9 +1474,6 @@
* - Output an x and y component, which need to be multiplied to
* get the result
*/
- /* TGSI lowering should deal with SCS */
- assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
-
struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
emit_inst(c, &(struct etna_inst) {
.opcode = INST_OPCODE_MUL,
@@ -1503,9 +1500,6 @@
});
} else if (c->specs->has_sin_cos_sqrt) {
- /* TGSI lowering should deal with SCS */
- assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
-
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
/* add divide by PI/2, using a temp register. GC2000
* fails with src==dst for the trig instruction. */
@@ -1540,8 +1534,6 @@
* DP3 t.x___, t.xyww, C, void (for scs)
* MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
* MAD dst, t.ywyw, .2225, t.xzxz
- *
- * TODO: we don't set dst.zw correctly for SCS.
*/
struct etna_inst *p, ins[9] = { };
struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
@@ -1597,19 +1589,7 @@
ins[4].src[0] = swizzle(t0s, dp3_swiz);
ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
- if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
- ins[5] = ins[3];
- ins[6] = ins[4];
- ins[4].dst.comps = INST_COMPS_X;
- ins[6].dst.comps = INST_COMPS_Z;
- ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
- ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
- ins[5].src[1] = absolute(ins[5].src[0]);
- p = &ins[7];
- } else {
- p = &ins[5];
- }
-
+ p = &ins[5];
p->opcode = INST_OPCODE_MAD;
p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
@@ -1664,33 +1644,6 @@
}
static void
-trans_dph(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- /*
- DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void
- ADD dst.xyzw, tmp.xyzw, void, src1.wwww
- */
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
- struct etna_inst ins[2] = { };
-
- ins[0].opcode = INST_OPCODE_DP3;
- ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
- INST_COMPS_Z | INST_COMPS_W);
- ins[0].src[0] = src[0];
- ins[0].src[1] = src[1];
-
- ins[1].opcode = INST_OPCODE_ADD;
- ins[1].sat = inst->Instruction.Saturate;
- ins[1].dst = convert_dst(c, &inst->Dst[0]);
- ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
- ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W));
-
- emit_inst(c, &ins[0]);
- emit_inst(c, &ins[1]);
-}
-
-static void
trans_sampler(const struct instr_translater *t, struct etna_compile *c,
const struct tgsi_full_instruction *inst,
struct etna_inst_src *src)
@@ -1799,6 +1752,7 @@
INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
+ INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
@@ -1833,11 +1787,9 @@
INSTR(LRP, trans_lrp),
INSTR(LIT, trans_lit),
INSTR(SSG, trans_ssg),
- INSTR(DPH, trans_dph),
INSTR(SIN, trans_trig),
INSTR(COS, trans_trig),
- INSTR(SCS, trans_trig),
INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
@@ -2174,6 +2126,10 @@
for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
assert(sf->num_reg < ETNA_NUM_INPUTS);
+
+ if (!reg->native.valid)
+ continue;
+
/* XXX exclude inputs with special semantics such as gl_frontFacing */
sf->reg[sf->num_reg].reg = reg->native.id;
sf->reg[sf->num_reg].semantic = reg->semantic;
@@ -2277,7 +2233,7 @@
/* round up number of uniforms, including immediates, in units of four */
int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
- if (c->inst_ptr > c->specs->max_instructions) {
+ if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
c->specs->max_instructions);
return false;
@@ -2337,16 +2293,13 @@
const struct etna_specs *specs = v->shader->specs;
struct tgsi_lowering_config lconfig = {
- .lower_SCS = specs->has_sin_cos_sqrt,
.lower_FLR = !specs->has_sign_floor_ceil,
.lower_CEIL = !specs->has_sign_floor_ceil,
.lower_POW = true,
.lower_EXP = true,
.lower_LOG = true,
- .lower_DP2 = true,
- .lower_DP2A = true,
+ .lower_DP2 = !specs->has_halti2_instructions,
.lower_TRUNC = true,
- .lower_XPD = true
};
c = CALLOC_STRUCT(etna_compile);
@@ -2501,6 +2454,7 @@
v->vs_pointsize_out_reg = -1;
v->ps_color_out_reg = -1;
v->ps_depth_out_reg = -1;
+ v->needs_icache = c->inst_ptr > c->specs->max_instructions;
copy_uniform_state_to_shader(c, v);
if (c->info.processor == PIPE_SHADER_VERTEX) {
@@ -2607,6 +2561,7 @@
const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
struct etna_varying *varying;
+ bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR;
assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
@@ -2616,27 +2571,24 @@
varying = &info->varyings[fsio->reg - 1];
varying->num_components = fsio->num_components;
- if (fsio->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */
+ if (!interpolate_always) /* colors affected by flat shading */
varying->pa_attributes = 0x200;
else /* texture coord or other bypasses flat shading */
varying->pa_attributes = 0x2f1;
- if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
- varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
- varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
- varying->use[2] = VARYING_COMPONENT_USE_USED;
- varying->use[3] = VARYING_COMPONENT_USE_USED;
- varying->reg = 0; /* replaced by point coord -- doesn't matter */
+ varying->use[0] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_X : VARYING_COMPONENT_USE_USED;
+ varying->use[1] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_Y : VARYING_COMPONENT_USE_USED;
+ varying->use[2] = VARYING_COMPONENT_USE_USED;
+ varying->use[3] = VARYING_COMPONENT_USE_USED;
+
+
+ /* point coord is position output from VS, so has no dedicated reg */
+ if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD)
continue;
- }
if (vsio == NULL)
return true; /* not found -- link error */
- varying->use[0] = VARYING_COMPONENT_USE_USED;
- varying->use[1] = VARYING_COMPONENT_USE_USED;
- varying->use[2] = VARYING_COMPONENT_USE_USED;
- varying->use[3] = VARYING_COMPONENT_USE_USED;
varying->reg = vsio->reg;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_compiler.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_compiler.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_compiler.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_compiler.h 2018-01-18 21:30:28.000000000 +0000
@@ -94,12 +94,17 @@
/* unknown input property (XX_INPUT_COUNT, field UNK8) */
uint32_t input_count_unk8;
+ /* shader is larger than GPU instruction limit, thus needs icache */
+ bool needs_icache;
+
/* shader variants form a linked list */
struct etna_shader_variant *next;
/* replicated here to avoid passing extra ptrs everywhere */
struct etna_shader *shader;
struct etna_shader_key key;
+
+ struct etna_bo *bo; /* cached code memory bo handle (for icache) */
};
struct etna_varying {
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_context.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_context.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_context.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_context.c 2018-01-18 21:30:28.000000000 +0000
@@ -34,6 +34,7 @@
#include "etnaviv_emit.h"
#include "etnaviv_fence.h"
#include "etnaviv_query.h"
+#include "etnaviv_query_hw.h"
#include "etnaviv_rasterizer.h"
#include "etnaviv_screen.h"
#include "etnaviv_shader.h"
@@ -260,6 +261,9 @@
if (ctx->sampler_view[i])
resource_read(ctx, ctx->sampler_view[i]->texture);
+ list_for_each_entry(struct etna_hw_query, hq, &ctx->active_hw_queries, node)
+ resource_written(ctx, hq->prsc);
+
ctx->stats.prims_emitted += u_reduced_prims_for_vertices(info->mode, info->count);
ctx->stats.draw_calls++;
@@ -299,10 +303,16 @@
struct etna_context *ctx = etna_context(pctx);
int out_fence_fd = -1;
+ list_for_each_entry(struct etna_hw_query, hq, &ctx->active_hw_queries, node)
+ etna_hw_query_suspend(hq, ctx);
+
etna_cmd_stream_flush2(ctx->stream, ctx->in_fence_fd,
(flags & PIPE_FLUSH_FENCE_FD) ? &out_fence_fd :
NULL);
+ list_for_each_entry(struct etna_hw_query, hq, &ctx->active_hw_queries, node)
+ etna_hw_query_resume(hq, ctx);
+
if (fence)
*fence = etna_fence_create(pctx, out_fence_fd);
}
@@ -317,6 +327,19 @@
etna_set_state(stream, VIVS_GL_VERTEX_ELEMENT_CONFIG, 0x00000001);
etna_set_state(stream, VIVS_RA_EARLY_DEPTH, 0x00000031);
etna_set_state(stream, VIVS_PA_W_CLIP_LIMIT, 0x34000001);
+ etna_set_state(stream, VIVS_PA_FLAGS, 0x00000000); /* blob sets ZCONVERT_BYPASS on GC3000, this messes up z for us */
+ etna_set_state(stream, VIVS_RA_UNK00E0C, 0x00000000);
+ etna_set_state(stream, VIVS_PA_VIEWPORT_UNK00A80, 0x38a01404);
+ etna_set_state(stream, VIVS_PA_VIEWPORT_UNK00A84, fui(8192.0));
+ etna_set_state(stream, VIVS_PA_ZFARCLIPPING, 0x00000000);
+ etna_set_state(stream, VIVS_PE_ALPHA_COLOR_EXT0, 0x00000000);
+ etna_set_state(stream, VIVS_PE_ALPHA_COLOR_EXT1, 0x00000000);
+ etna_set_state(stream, VIVS_RA_HDEPTH_CONTROL, 0x00007000);
+ etna_set_state(stream, VIVS_PE_STENCIL_CONFIG_EXT2, 0x00000000);
+ etna_set_state(stream, VIVS_GL_UNK03834, 0x00000000);
+ etna_set_state(stream, VIVS_GL_UNK03838, 0x00000000);
+ etna_set_state(stream, VIVS_GL_UNK03854, 0x00000000);
+ etna_set_state(stream, VIVS_PS_CONTROL_EXT, 0x00000000);
/* Enable SINGLE_BUFFER for resolve, if supported */
etna_set_state(stream, VIVS_RS_SINGLE_BUFFER, COND(ctx->specs.single_buffer, VIVS_RS_SINGLE_BUFFER_ENABLE));
@@ -423,6 +446,7 @@
goto fail;
slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
+ list_inithead(&ctx->active_hw_queries);
return pctx;
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_context.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_context.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_context.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_context.h 2018-01-18 21:30:28.000000000 +0000
@@ -124,6 +124,7 @@
ETNA_DIRTY_SHADER = (1 << 16),
ETNA_DIRTY_TS = (1 << 17),
ETNA_DIRTY_TEXTURE_CACHES = (1 << 18),
+ ETNA_DIRTY_DERIVE_TS = (1 << 19),
} dirty;
uint32_t prim_hwsupport;
@@ -179,6 +180,9 @@
struct pipe_debug_callback debug;
int in_fence_fd;
+
+ /* list of active hardware queries */
+ struct list_head active_hw_queries;
};
static inline struct etna_context *
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_disasm.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_disasm.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_disasm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_disasm.c 2018-01-18 21:30:28.000000000 +0000
@@ -513,6 +513,7 @@
OPC(AND),
OPC(XOR),
OPC(NOT),
+ OPC(DP2),
};
static void
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_emit.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_emit.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_emit.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_emit.c 2018-01-18 21:30:28.000000000 +0000
@@ -171,9 +171,20 @@
struct etna_cmd_stream *stream = ctx->stream;
struct etna_coalesce coalesce;
+ if (cs->RS_KICKER_INPLACE && !cs->source_ts_valid)
+ /* Inplace resolve is no-op if TS is not configured */
+ return;
+
ctx->stats.rs_operations++;
- if (screen->specs.pixel_pipes == 1) {
+ if (cs->RS_KICKER_INPLACE) {
+ etna_cmd_stream_reserve(stream, 6);
+ etna_coalesce_start(stream, &coalesce);
+ /* 0/1 */ EMIT_STATE(RS_EXTRA_CONFIG, cs->RS_EXTRA_CONFIG);
+ /* 2/3 */ EMIT_STATE(RS_SOURCE_STRIDE, cs->RS_SOURCE_STRIDE);
+ /* 4/5 */ EMIT_STATE(RS_KICKER_INPLACE, cs->RS_KICKER_INPLACE);
+ etna_coalesce_end(stream, &coalesce);
+ } else if (screen->specs.pixel_pipes == 1) {
etna_cmd_stream_reserve(stream, 22);
etna_coalesce_start(stream, &coalesce);
/* 0/1 */ EMIT_STATE(RS_CONFIG, cs->RS_CONFIG);
@@ -421,9 +432,6 @@
if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
/*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
/*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
- if (ctx->specs.has_shader_range_registers) {
- /*0085C*/ EMIT_STATE(VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
- }
}
if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
/*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
@@ -534,10 +542,6 @@
: ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
/*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
/*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
- if (ctx->specs.has_shader_range_registers) {
- /*0101C*/ EMIT_STATE(PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
- 0x100);
- }
}
if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
@@ -739,18 +743,54 @@
if (dirty & (ETNA_DIRTY_SHADER)) {
/* Special case: a new shader was loaded; simply re-load all uniforms and
* shader code at once */
- /*04000 or 0C000*/
- etna_set_state_multi(stream, ctx->specs.vs_offset,
- ctx->shader_state.vs_inst_mem_size,
- ctx->shader_state.VS_INST_MEM);
- /*06000 or 0D000*/
- etna_set_state_multi(stream, ctx->specs.ps_offset,
- ctx->shader_state.ps_inst_mem_size,
- ctx->shader_state.PS_INST_MEM);
- /*05000*/ etna_set_state_multi(stream, VIVS_VS_UNIFORMS(0),
+ if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
+ assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
+ /* Set icache (VS) */
+ etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
+ etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
+ VIVS_VS_ICACHE_CONTROL_ENABLE |
+ VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
+ assert(ctx->shader_state.VS_INST_ADDR.bo);
+ etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
+
+ /* Set icache (PS) */
+ etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
+ etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
+ VIVS_VS_ICACHE_CONTROL_ENABLE |
+ VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
+ assert(ctx->shader_state.PS_INST_ADDR.bo);
+ etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
+ } else {
+ /* Upload shader directly, first flushing and disabling icache if
+ * supported on this hw */
+ if (ctx->specs.has_icache) {
+ etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
+ VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
+ VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
+ }
+ if (ctx->specs.has_shader_range_registers) {
+ etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
+ etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
+ 0x100);
+ }
+ etna_set_state_multi(stream, ctx->specs.vs_offset,
+ ctx->shader_state.vs_inst_mem_size,
+ ctx->shader_state.VS_INST_MEM);
+ etna_set_state_multi(stream, ctx->specs.ps_offset,
+ ctx->shader_state.ps_inst_mem_size,
+ ctx->shader_state.PS_INST_MEM);
+ }
+
+ if (ctx->specs.has_unified_uniforms) {
+ etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
+ etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
+ }
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
+ etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
ctx->shader_state.vs_uniforms_size,
ctx->shader_state.VS_UNIFORMS);
- /*07000*/ etna_set_state_multi(stream, VIVS_PS_UNIFORMS(0),
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
+ etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
ctx->shader_state.ps_uniforms_size,
ctx->shader_state.PS_UNIFORMS);
@@ -764,19 +804,23 @@
memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
ctx->shader_state.ps_uniforms_size * 4);
} else {
+ /* ideally this cache would only be flushed if there are VS uniform changes */
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
etna_coalesce_start(stream, &coalesce);
for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
- /*05000*/ EMIT_STATE(VS_UNIFORMS(x), ctx->shader_state.VS_UNIFORMS[x]);
+ etna_coalsence_emit(stream, &coalesce, ctx->specs.vs_uniforms_offset + x*4, ctx->shader_state.VS_UNIFORMS[x]);
ctx->gpu3d.VS_UNIFORMS[x] = ctx->shader_state.VS_UNIFORMS[x];
}
}
etna_coalesce_end(stream, &coalesce);
+ /* ideally this cache would only be flushed if there are PS uniform changes */
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
etna_coalesce_start(stream, &coalesce);
for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
- /*07000*/ EMIT_STATE(PS_UNIFORMS(x), ctx->shader_state.PS_UNIFORMS[x]);
+ etna_coalsence_emit(stream, &coalesce, ctx->specs.ps_uniforms_offset + x*4, ctx->shader_state.PS_UNIFORMS[x]);
ctx->gpu3d.PS_UNIFORMS[x] = ctx->shader_state.PS_UNIFORMS[x];
}
}
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_format.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_format.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_format.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_format.c 2018-01-18 21:30:28.000000000 +0000
@@ -90,7 +90,7 @@
static struct etna_format formats[PIPE_FORMAT_COUNT] = {
/* 8-bit */
- V_(R8_UNORM, UNSIGNED_BYTE, NONE),
+ VT(R8_UNORM, UNSIGNED_BYTE, L8, SWIZ(X, 0, 0, 1), NONE),
V_(R8_SNORM, BYTE, NONE),
V_(R8_UINT, UNSIGNED_BYTE, NONE),
V_(R8_SINT, BYTE, NONE),
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_internal.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_internal.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_internal.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_internal.h 2018-01-18 21:30:28.000000000 +0000
@@ -72,8 +72,14 @@
unsigned has_shader_range_registers : 1;
/* has the new sin/cos/log functions */
unsigned has_new_transcendentals : 1;
+ /* has the new dp2/dpX_norm instructions, among others */
+ unsigned has_halti2_instructions : 1;
/* supports single-buffer rendering with multiple pixel pipes */
unsigned single_buffer : 1;
+ /* has unified uniforms memory */
+ unsigned has_unified_uniforms : 1;
+ /* can load shader instructions from memory */
+ unsigned has_icache : 1;
/* can use any kind of wrapping mode on npot textures */
unsigned npot_tex_any_wrap;
/* number of bits per TS tile */
@@ -100,6 +106,10 @@
uint32_t vs_offset;
/* pixel shader memory address*/
uint32_t ps_offset;
+ /* vertex shader uniforms address*/
+ uint32_t vs_uniforms_offset;
+ /* pixel shader uniforms address*/
+ uint32_t ps_uniforms_offset;
/* vertex/fragment shader max instructions */
uint32_t max_instructions;
/* maximum number of varyings */
@@ -244,6 +254,8 @@
uint32_t VS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
uint32_t *PS_INST_MEM;
uint32_t PS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
+ struct etna_reloc PS_INST_ADDR;
+ struct etna_reloc VS_INST_ADDR;
};
/* state of some 3d and common registers relevant to etna driver */
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -26,9 +26,11 @@
*/
#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
#include "etnaviv_context.h"
#include "etnaviv_query.h"
+#include "etnaviv_query_hw.h"
#include "etnaviv_query_sw.h"
static struct pipe_query *
@@ -39,6 +41,8 @@
struct etna_query *q;
q = etna_sw_create_query(ctx, query_type);
+ if (!q)
+ q = etna_hw_create_query(ctx, query_type);
return (struct pipe_query *)q;
}
@@ -55,8 +59,15 @@
etna_begin_query(struct pipe_context *pctx, struct pipe_query *pq)
{
struct etna_query *q = etna_query(pq);
+ boolean ret;
- return q->funcs->begin_query(etna_context(pctx), q);
+ if (q->active)
+ return false;
+
+ ret = q->funcs->begin_query(etna_context(pctx), q);
+ q->active = ret;
+
+ return ret;
}
static bool
@@ -64,7 +75,12 @@
{
struct etna_query *q = etna_query(pq);
+ if (!q->active)
+ return false;
+
q->funcs->end_query(etna_context(pctx), q);
+ q->active = false;
+
return true;
}
@@ -74,6 +90,11 @@
{
struct etna_query *q = etna_query(pq);
+ if (q->active)
+ return false;
+
+ util_query_clear_result(result, q->type);
+
return q->funcs->get_query_result(etna_context(pctx), q, wait, result);
}
@@ -81,21 +102,12 @@
etna_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
struct pipe_driver_query_info *info)
{
- struct pipe_driver_query_info list[] = {
- {"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, { 0 }},
- {"draw-calls", ETNA_QUERY_DRAW_CALLS, { 0 }},
- {"rs-operations", ETNA_QUERY_RS_OPERATIONS, { 0 }},
- };
+ int nr_sw_queries = etna_sw_get_driver_query_info(pscreen, 0, NULL);
if (!info)
- return ARRAY_SIZE(list);
-
- if (index >= ARRAY_SIZE(list))
- return 0;
-
- *info = list[index];
+ return nr_sw_queries;
- return 1;
+ return etna_sw_get_driver_query_info(pscreen, index, info);
}
static void
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query.h 2018-01-18 21:30:28.000000000 +0000
@@ -44,7 +44,7 @@
struct etna_query {
const struct etna_query_funcs *funcs;
bool active;
- int type;
+ unsigned type;
};
static inline struct etna_query *
@@ -53,8 +53,7 @@
return (struct etna_query *)pq;
}
-#define ETNA_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define ETNA_QUERY_RS_OPERATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define ETNA_SW_QUERY_BASE (PIPE_QUERY_DRIVER_SPECIFIC + 0)
void
etna_query_screen_init(struct pipe_screen *pscreen);
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query_hw.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query_hw.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query_hw.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query_hw.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2017 Etnaviv Project
+ * Copyright (C) 2017 Zodiac Inflight Innovations
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark
+ * Christian Gmeiner
+ */
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#include "etnaviv_context.h"
+#include "etnaviv_debug.h"
+#include "etnaviv_emit.h"
+#include "etnaviv_query_hw.h"
+#include "etnaviv_screen.h"
+
+/*
+ * Occlusion Query:
+ *
+ * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
+ * interpret results
+ */
+
+static void
+occlusion_start(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ struct etna_resource *rsc = etna_resource(hq->prsc);
+ struct etna_reloc r = {
+ .bo = rsc->bo,
+ .flags = ETNA_RELOC_WRITE
+ };
+
+ if (hq->samples > 63) {
+ hq->samples = 63;
+ BUG("samples overflow");
+ }
+
+ r.offset = hq->samples * 8; /* 64bit value */
+
+ etna_set_state_reloc(ctx->stream, VIVS_GL_OCCLUSION_QUERY_ADDR, &r);
+}
+
+static void
+occlusion_stop(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ /* 0x1DF5E76 is the value used by blob - but any random value will work */
+ etna_set_state(ctx->stream, VIVS_GL_OCCLUSION_QUERY_CONTROL, 0x1DF5E76);
+}
+
+static void
+occlusion_suspend(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ occlusion_stop(hq, ctx);
+}
+
+static void
+occlusion_resume(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ hq->samples++;
+ occlusion_start(hq, ctx);
+}
+
+static void
+occlusion_result(struct etna_hw_query *hq, void *buf,
+ union pipe_query_result *result)
+{
+ uint64_t sum = 0;
+ uint64_t *ptr = (uint64_t *)buf;
+
+ for (unsigned i = 0; i <= hq->samples; i++)
+ sum += *(ptr + i);
+
+ if (hq->base.type == PIPE_QUERY_OCCLUSION_COUNTER)
+ result->u64 = sum;
+ else
+ result->b = !!sum;
+}
+
+static void
+etna_hw_destroy_query(struct etna_context *ctx, struct etna_query *q)
+{
+ struct etna_hw_query *hq = etna_hw_query(q);
+
+ pipe_resource_reference(&hq->prsc, NULL);
+ list_del(&hq->node);
+
+ FREE(hq);
+}
+
+static const struct etna_hw_sample_provider occlusion_provider = {
+ .start = occlusion_start,
+ .stop = occlusion_stop,
+ .suspend = occlusion_suspend,
+ .resume = occlusion_resume,
+ .result = occlusion_result,
+};
+
+static void
+realloc_query_bo(struct etna_context *ctx, struct etna_hw_query *hq)
+{
+ struct etna_resource *rsc;
+ void *map;
+
+ pipe_resource_reference(&hq->prsc, NULL);
+
+ /* allocate resource with space for 64 * 64bit values */
+ hq->prsc = pipe_buffer_create(&ctx->screen->base, PIPE_BIND_QUERY_BUFFER,
+ 0, 0x1000);
+
+ /* don't assume the buffer is zero-initialized */
+ rsc = etna_resource(hq->prsc);
+
+ etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_WRITE);
+
+ map = etna_bo_map(rsc->bo);
+ memset(map, 0, 0x1000);
+ etna_bo_cpu_fini(rsc->bo);
+}
+
+static boolean
+etna_hw_begin_query(struct etna_context *ctx, struct etna_query *q)
+{
+ struct etna_hw_query *hq = etna_hw_query(q);
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ /* ->begin_query() discards previous results, so realloc bo */
+ realloc_query_bo(ctx, hq);
+
+ p->start(hq, ctx);
+
+ /* add to active list */
+ assert(list_empty(&hq->node));
+ list_addtail(&hq->node, &ctx->active_hw_queries);
+
+ return true;
+}
+
+static void
+etna_hw_end_query(struct etna_context *ctx, struct etna_query *q)
+{
+ struct etna_hw_query *hq = etna_hw_query(q);
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ p->stop(hq, ctx);
+
+ /* remove from active list */
+ list_delinit(&hq->node);
+}
+
+static boolean
+etna_hw_get_query_result(struct etna_context *ctx, struct etna_query *q,
+ boolean wait, union pipe_query_result *result)
+{
+ struct etna_hw_query *hq = etna_hw_query(q);
+ struct etna_resource *rsc = etna_resource(hq->prsc);
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ assert(LIST_IS_EMPTY(&hq->node));
+
+ if (!wait) {
+ int ret;
+
+ if (rsc->status & ETNA_PENDING_WRITE) {
+ /* piglit spec@arb_occlusion_query@occlusion_query_conform
+ * test, and silly apps perhaps, get stuck in a loop trying
+ * to get query result forever with wait==false.. we don't
+ * wait to flush unnecessarily but we also don't want to
+ * spin forever.
+ */
+ if (hq->no_wait_cnt++ > 5)
+ ctx->base.flush(&ctx->base, NULL, 0);
+ return false;
+ }
+
+ ret = etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_READ | DRM_ETNA_PREP_NOSYNC);
+ if (ret)
+ return false;
+
+ etna_bo_cpu_fini(rsc->bo);
+ }
+
+ /* flush that GPU executes all query related actions */
+ ctx->base.flush(&ctx->base, NULL, 0);
+
+ /* get the result */
+ etna_bo_cpu_prep(rsc->bo, DRM_ETNA_PREP_READ);
+
+ void *ptr = etna_bo_map(rsc->bo);
+ p->result(hq, ptr, result);
+
+ etna_bo_cpu_fini(rsc->bo);
+
+ return true;
+}
+
+static const struct etna_query_funcs hw_query_funcs = {
+ .destroy_query = etna_hw_destroy_query,
+ .begin_query = etna_hw_begin_query,
+ .end_query = etna_hw_end_query,
+ .get_query_result = etna_hw_get_query_result,
+};
+
+static inline const struct etna_hw_sample_provider *
+query_sample_provider(unsigned query_type)
+{
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ /* fallthrough */
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ /* fallthrough */
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ return &occlusion_provider;
+ default:
+ return NULL;
+ }
+}
+
+struct etna_query *
+etna_hw_create_query(struct etna_context *ctx, unsigned query_type)
+{
+ struct etna_hw_query *hq;
+ struct etna_query *q;
+ const struct etna_hw_sample_provider *p;
+
+ p = query_sample_provider(query_type);
+ if (!p)
+ return NULL;
+
+ hq = CALLOC_STRUCT(etna_hw_query);
+ if (!hq)
+ return NULL;
+
+ hq->provider = p;
+
+ list_inithead(&hq->node);
+
+ q = &hq->base;
+ q->funcs = &hw_query_funcs;
+ q->type = query_type;
+
+ return q;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query_hw.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query_hw.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query_hw.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query_hw.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 Etnaviv Project
+ * Copyright (C) 2017 Zodiac Inflight Innovations
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark
+ * Christian Gmeiner
+ */
+
+#ifndef H_ETNAVIV_QUERY_HW
+#define H_ETNAVIV_QUERY_HW
+
+#include "etnaviv_query.h"
+
+struct etna_hw_query;
+
+struct etna_hw_sample_provider {
+ void (*start)(struct etna_hw_query *hq, struct etna_context *ctx);
+ void (*stop)(struct etna_hw_query *hq, struct etna_context *ctx);
+ void (*suspend)(struct etna_hw_query *hq, struct etna_context *ctx);
+ void (*resume)(struct etna_hw_query *hq, struct etna_context *ctx);
+
+ void (*result)(struct etna_hw_query *hq, void *buf,
+ union pipe_query_result *result);
+};
+
+struct etna_hw_query {
+ struct etna_query base;
+
+ struct pipe_resource *prsc;
+ unsigned samples; /* number of samples stored in resource */
+ unsigned no_wait_cnt; /* see etna_hw_get_query_result() */
+ struct list_head node; /* list-node in ctx->active_hw_queries */
+
+ const struct etna_hw_sample_provider *provider;
+};
+
+static inline struct etna_hw_query *
+etna_hw_query(struct etna_query *q)
+{
+ return (struct etna_hw_query *)q;
+}
+
+struct etna_query *
+etna_hw_create_query(struct etna_context *ctx, unsigned query_type);
+
+static inline void
+etna_hw_query_suspend(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ if (!hq->base.active)
+ return;
+
+ p->suspend(hq, ctx);
+}
+
+static inline void
+etna_hw_query_resume(struct etna_hw_query *hq, struct etna_context *ctx)
+{
+ const struct etna_hw_sample_provider *p = hq->provider;
+
+ if (!hq->base.active)
+ return;
+
+ p->resume(hq, ctx);
+}
+
+#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query_sw.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query_sw.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query_sw.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query_sw.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,7 +27,6 @@
#include "os/os_time.h"
#include "pipe/p_state.h"
-#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_string.h"
@@ -43,7 +42,7 @@
}
static uint64_t
-read_counter(struct etna_context *ctx, int type)
+read_counter(struct etna_context *ctx, unsigned type)
{
switch (type) {
case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -62,7 +61,6 @@
{
struct etna_sw_query *sq = etna_sw_query(q);
- q->active = true;
sq->begin_value = read_counter(ctx, q->type);
return true;
@@ -73,7 +71,6 @@
{
struct etna_sw_query *sq = etna_sw_query(q);
- q->active = false;
sq->end_value = read_counter(ctx, q->type);
}
@@ -83,10 +80,6 @@
{
struct etna_sw_query *sq = etna_sw_query(q);
- if (q->active)
- return false;
-
- util_query_clear_result(result, q->type);
result->u64 = sq->end_value - sq->begin_value;
return true;
@@ -124,3 +117,24 @@
return q;
}
+
+int
+etna_sw_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ static const struct pipe_driver_query_info list[] = {
+ {"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, { 0 }},
+ {"draw-calls", ETNA_QUERY_DRAW_CALLS, { 0 }},
+ {"rs-operations", ETNA_QUERY_RS_OPERATIONS, { 0 }},
+ };
+
+ if (!info)
+ return ARRAY_SIZE(list);
+
+ if (index >= ARRAY_SIZE(list))
+ return 0;
+
+ *info = list[index];
+
+ return 1;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query_sw.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query_sw.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_query_sw.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_query_sw.h 2018-01-18 21:30:28.000000000 +0000
@@ -30,6 +30,9 @@
#include "etnaviv_query.h"
+#define ETNA_QUERY_DRAW_CALLS (ETNA_SW_QUERY_BASE + 0)
+#define ETNA_QUERY_RS_OPERATIONS (ETNA_SW_QUERY_BASE + 1)
+
struct etna_sw_query {
struct etna_query base;
uint64_t begin_value, end_value;
@@ -44,4 +47,8 @@
struct etna_query *
etna_sw_create_query(struct etna_context *ctx, unsigned query_type);
+int
+etna_sw_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+ struct pipe_driver_query_info *info);
+
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_rasterizer.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_rasterizer.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_rasterizer.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_rasterizer.c 2018-01-18 21:30:28.000000000 +0000
@@ -38,10 +38,6 @@
struct etna_rasterizer_state *cs;
struct etna_context *ctx = etna_context(pctx);
- /* Disregard flatshading on GC880+, as a HW bug there seem to disable all
- * varying interpolation if it's enabled */
- bool flatshade = ctx->screen->model < 880 ? so->flatshade : false;
-
if (so->fill_front != so->fill_back)
DBG("Different front and back fill mode not supported");
@@ -51,7 +47,7 @@
cs->base = *so;
- cs->PA_CONFIG = (flatshade ? VIVS_PA_CONFIG_SHADE_MODEL_FLAT : VIVS_PA_CONFIG_SHADE_MODEL_SMOOTH) |
+ cs->PA_CONFIG = (so->flatshade ? VIVS_PA_CONFIG_SHADE_MODEL_FLAT : VIVS_PA_CONFIG_SHADE_MODEL_SMOOTH) |
translate_cull_face(so->cull_face, so->front_ccw) |
translate_polygon_mode(so->fill_front) |
COND(so->point_quad_rasterization, VIVS_PA_CONFIG_POINT_SPRITE_ENABLE) |
@@ -65,7 +61,8 @@
/* XXX anything else? */
/* XXX bottom_edge_rule */
cs->PA_SYSTEM_MODE =
- COND(so->half_pixel_center, VIVS_PA_SYSTEM_MODE_UNK0 | VIVS_PA_SYSTEM_MODE_UNK4);
+ COND(!so->flatshade_first, VIVS_PA_SYSTEM_MODE_PROVOKING_VERTEX_LAST) |
+ COND(so->half_pixel_center, VIVS_PA_SYSTEM_MODE_HALF_PIXEL_CENTER);
/* so->scissor overrides the scissor, defaulting to the whole framebuffer,
* with the scissor state */
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_rs.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_rs.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_rs.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_rs.c 2018-01-18 21:30:28.000000000 +0000
@@ -118,10 +118,23 @@
cs->RS_FILL_VALUE[3] = rs->clear_value[3];
cs->RS_EXTRA_CONFIG = VIVS_RS_EXTRA_CONFIG_AA(rs->aa) |
VIVS_RS_EXTRA_CONFIG_ENDIAN(rs->endian_mode);
- /* TODO: cs->RS_UNK016B0 = s->size / 64 ?
- * The blob does this consistently but there seems to be no currently supported
- * model that needs it.
+
+ /* If source the same as destination, and the hardware supports this,
+ * do an in-place resolve to fill in unrendered tiles.
*/
+ if (ctx->specs.single_buffer && rs->source == rs->dest &&
+ rs->source_offset == rs->dest_offset &&
+ rs->source_format == rs->dest_format &&
+ rs->source_tiling == rs->dest_tiling &&
+ (rs->source_tiling & ETNA_LAYOUT_BIT_SUPER) &&
+ rs->source_stride == rs->dest_stride &&
+ !rs->downsample_x && !rs->downsample_y &&
+ !rs->swap_rb && !rs->flip &&
+ !rs->clear_mode && rs->source_padded_width) {
+ /* Total number of tiles (same as for autodisable) */
+ cs->RS_KICKER_INPLACE = rs->source_padded_width * rs->source_padded_height / 16;
+ }
+ cs->source_ts_valid = rs->source_ts_valid;
}
void
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_rs.h mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_rs.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_rs.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_rs.h 2018-01-18 21:30:28.000000000 +0000
@@ -33,6 +33,7 @@
struct rs_state {
uint8_t downsample_x : 1; /* Downsample in x direction */
uint8_t downsample_y : 1; /* Downsample in y direction */
+ uint8_t source_ts_valid : 1;
uint8_t source_format; /* RS_FORMAT_XXX */
uint8_t source_tiling; /* ETNA_LAYOUT_XXX */
@@ -43,6 +44,7 @@
struct etna_bo *source;
uint32_t source_offset;
uint32_t source_stride;
+ uint32_t source_padded_width; /* total padded width (only needed for source) */
uint32_t source_padded_height; /* total padded height */
struct etna_bo *dest;
uint32_t dest_offset;
@@ -60,6 +62,7 @@
/* treat this as opaque structure */
struct compiled_rs_state {
+ uint8_t source_ts_valid : 1;
uint32_t RS_CONFIG;
uint32_t RS_SOURCE_STRIDE;
uint32_t RS_DEST_STRIDE;
@@ -69,6 +72,7 @@
uint32_t RS_FILL_VALUE[4];
uint32_t RS_EXTRA_CONFIG;
uint32_t RS_PIPE_OFFSET[2];
+ uint32_t RS_KICKER_INPLACE; /* Set if source is destination */
struct etna_reloc source[2];
struct etna_reloc dest[2];
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_screen.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_screen.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -260,6 +260,12 @@
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
/* Stream output. */
@@ -313,8 +319,9 @@
/* Timer queries. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
- case PIPE_CAP_OCCLUSION_QUERY:
return 0;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return VIV_FEATURE(screen, chipMinorFeatures1, HALTI0);
case PIPE_CAP_QUERY_TIMESTAMP:
return 1;
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
@@ -424,6 +431,8 @@
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return VIV_FEATURE(screen, chipMinorFeatures0, HAS_SQRT_TRIG);
case PIPE_SHADER_CAP_INTEGERS:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
@@ -436,6 +445,7 @@
return 4096;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return false;
@@ -693,6 +703,8 @@
VIV_FEATURE(screen, chipMinorFeatures1, NON_POWER_OF_TWO);
screen->specs.has_new_transcendentals =
VIV_FEATURE(screen, chipMinorFeatures3, HAS_FAST_TRANSCENDENTALS);
+ screen->specs.has_halti2_instructions =
+ VIV_FEATURE(screen, chipMinorFeatures4, HALTI2);
if (VIV_FEATURE(screen, chipMinorFeatures3, INSTRUCTION_CACHE)) {
/* GC3000 - this core is capable of loading shaders from
@@ -707,7 +719,8 @@
* same.
*/
screen->specs.ps_offset = 0x8000 + 0x1000;
- screen->specs.max_instructions = 256;
+ screen->specs.max_instructions = 256; /* maximum number instructions for non-icache use */
+ screen->specs.has_icache = true;
} else {
if (instruction_count > 256) { /* unified instruction memory? */
screen->specs.vs_offset = 0xC000;
@@ -718,6 +731,7 @@
screen->specs.ps_offset = 0x6000;
screen->specs.max_instructions = instruction_count / 2;
}
+ screen->specs.has_icache = false;
}
if (VIV_FEATURE(screen, chipMinorFeatures1, HALTI0)) {
@@ -745,6 +759,21 @@
screen->specs.max_vs_uniforms = 256;
screen->specs.max_ps_uniforms = 256;
}
+ /* unified uniform memory on GC3000 - HALTI1 feature bit is just a guess
+ */
+ if (VIV_FEATURE(screen, chipMinorFeatures2, HALTI1)) {
+ screen->specs.has_unified_uniforms = true;
+ screen->specs.vs_uniforms_offset = VIVS_SH_UNIFORMS(0);
+ /* hardcode PS uniforms to start after end of VS uniforms -
+ * for more flexibility this offset could be variable based on the
+ * shader.
+ */
+ screen->specs.ps_uniforms_offset = VIVS_SH_UNIFORMS(screen->specs.max_vs_uniforms*4);
+ } else {
+ screen->specs.has_unified_uniforms = false;
+ screen->specs.vs_uniforms_offset = VIVS_VS_UNIFORMS(0);
+ screen->specs.ps_uniforms_offset = VIVS_PS_UNIFORMS(0);
+ }
screen->specs.max_texture_size =
VIV_FEATURE(screen, chipMinorFeatures0, TEXTURE_8K) ? 8192 : 2048;
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_shader.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_shader.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_shader.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_shader.c 2018-01-18 21:30:28.000000000 +0000
@@ -29,12 +29,30 @@
#include "etnaviv_compiler.h"
#include "etnaviv_context.h"
#include "etnaviv_debug.h"
+#include "etnaviv_screen.h"
#include "etnaviv_util.h"
#include "tgsi/tgsi_parse.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+/* Upload shader code to bo, if not already done */
+static bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shader_variant *v)
+{
+ if (v->bo)
+ return true;
+ v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, DRM_ETNA_GEM_CACHE_UNCACHED);
+ if (!v->bo)
+ return false;
+
+ void *buf = etna_bo_map(v->bo);
+ etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE);
+ memcpy(buf, v->code, v->code_size*4);
+ etna_bo_cpu_fini(v->bo);
+ DBG("Uploaded %s of %u words to bo %p", v->processor == PIPE_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo);
+ return true;
+}
+
/* Link vs and fs together: fill in shader_state from vs and fs
* as this function is called every time a new fs or vs is bound, the goal is to
* do little processing as possible here, and to precompute as much as possible in
@@ -45,7 +63,7 @@
*/
static bool
etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs,
- const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
+ struct etna_shader_variant *vs, struct etna_shader_variant *fs)
{
struct etna_shader_link_info link = { };
@@ -164,9 +182,32 @@
/* reference instruction memory */
cs->vs_inst_mem_size = vs->code_size;
cs->VS_INST_MEM = vs->code;
+
cs->ps_inst_mem_size = fs->code_size;
cs->PS_INST_MEM = fs->code;
+ if (vs->needs_icache | fs->needs_icache) {
+ /* If either of the shaders needs ICACHE, we use it for both. It is
+ * either switched on or off for the entire shader processor.
+ */
+ if (!etna_icache_upload_shader(ctx, vs) ||
+ !etna_icache_upload_shader(ctx, fs)) {
+ assert(0);
+ return false;
+ }
+
+ cs->VS_INST_ADDR.bo = vs->bo;
+ cs->VS_INST_ADDR.offset = 0;
+ cs->VS_INST_ADDR.flags = ETNA_RELOC_READ;
+ cs->PS_INST_ADDR.bo = fs->bo;
+ cs->PS_INST_ADDR.offset = 0;
+ cs->PS_INST_ADDR.flags = ETNA_RELOC_READ;
+ } else {
+ /* clear relocs */
+ memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR));
+ memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR));
+ }
+
return true;
}
@@ -352,6 +393,8 @@
while (v) {
t = v;
v = v->next;
+ if (t->bo)
+ etna_bo_del(t->bo);
etna_destroy_shader(t);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_state.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_state.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_state.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_state.c 2018-01-18 21:30:28.000000000 +0000
@@ -165,7 +165,6 @@
cs->PE_COLOR_STRIDE = cbuf->surf.stride;
if (cbuf->surf.ts_size) {
- ts_mem_config |= VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR;
cs->TS_COLOR_CLEAR_VALUE = cbuf->level->clear_value;
cs->TS_COLOR_STATUS_BASE = cbuf->ts_reloc;
@@ -231,7 +230,6 @@
cs->PE_DEPTH_NORMALIZE = fui(exp2f(depth_bits) - 1.0f);
if (zsbuf->surf.ts_size) {
- ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR;
cs->TS_DEPTH_CLEAR_VALUE = zsbuf->level->clear_value;
cs->TS_DEPTH_STATUS_BASE = zsbuf->ts_reloc;
@@ -325,7 +323,7 @@
cs->PE_LOGIC_OP = VIVS_PE_LOGIC_OP_SINGLE_BUFFER(ctx->specs.single_buffer ? 2 : 0);
ctx->framebuffer_s = *sv; /* keep copy of original structure */
- ctx->dirty |= ETNA_DIRTY_FRAMEBUFFER;
+ ctx->dirty |= ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_DERIVE_TS;
}
static void
@@ -572,6 +570,41 @@
ctx->dirty |= ETNA_DIRTY_VERTEX_ELEMENTS;
}
+static bool
+etna_update_ts_config(struct etna_context *ctx)
+{
+ uint32_t new_ts_config = ctx->framebuffer.TS_MEM_CONFIG;
+
+ if (ctx->framebuffer_s.nr_cbufs > 0) {
+ struct etna_surface *c_surf = etna_surface(ctx->framebuffer_s.cbufs[0]);
+
+ if(c_surf->level->ts_size && c_surf->level->ts_valid) {
+ new_ts_config |= VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR;
+ } else {
+ new_ts_config &= ~VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR;
+ }
+ }
+
+ if (ctx->framebuffer_s.zsbuf) {
+ struct etna_surface *zs_surf = etna_surface(ctx->framebuffer_s.zsbuf);
+
+ if(zs_surf->level->ts_size && zs_surf->level->ts_valid) {
+ new_ts_config |= VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR;
+ } else {
+ new_ts_config &= ~VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR;
+ }
+ }
+
+ if (new_ts_config != ctx->framebuffer.TS_MEM_CONFIG) {
+ ctx->framebuffer.TS_MEM_CONFIG = new_ts_config;
+ ctx->dirty |= ETNA_DIRTY_TS;
+ }
+
+ ctx->dirty &= ~ETNA_DIRTY_DERIVE_TS;
+
+ return true;
+}
+
struct etna_state_updater {
bool (*update)(struct etna_context *ctx);
uint32_t dirty;
@@ -589,6 +622,9 @@
},
{
etna_update_blend_color, ETNA_DIRTY_BLEND_COLOR | ETNA_DIRTY_FRAMEBUFFER,
+ },
+ {
+ etna_update_ts_config, ETNA_DIRTY_DERIVE_TS,
}
};
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_transfer.c mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_transfer.c
--- mesa-17.2.4/src/gallium/drivers/etnaviv/etnaviv_transfer.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/etnaviv_transfer.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,6 +28,7 @@
#include "etnaviv_clear_blit.h"
#include "etnaviv_context.h"
#include "etnaviv_debug.h"
+#include "etnaviv_screen.h"
#include "pipe/p_defines.h"
#include "pipe/p_format.h"
@@ -84,8 +85,7 @@
/* We have a temporary resource due to either tile status or
* tiling format. Write back the updated buffer contents.
* FIXME: we need to invalidate the tile status. */
- etna_copy_resource(pctx, ptrans->resource, trans->rsc, ptrans->level,
- trans->rsc->last_level);
+ etna_copy_resource_box(pctx, ptrans->resource, trans->rsc, ptrans->level, &ptrans->box);
} else if (trans->staging) {
/* map buffer object */
struct etna_resource_level *res_level = &rsc->levels[ptrans->level];
@@ -212,9 +212,30 @@
return NULL;
}
+ /* Need to align the transfer region to satisfy RS restrictions, as we
+ * really want to hit the RS blit path here.
+ */
+ unsigned w_align, h_align;
+
+ if (rsc->layout & ETNA_LAYOUT_BIT_SUPER) {
+ w_align = h_align = 64;
+ } else {
+ w_align = ETNA_RS_WIDTH_MASK + 1;
+ h_align = ETNA_RS_HEIGHT_MASK + 1;
+ }
+ h_align *= ctx->screen->specs.pixel_pipes;
+
+ ptrans->box.width += ptrans->box.x & (w_align - 1);
+ ptrans->box.x = ptrans->box.x & ~(w_align - 1);
+ ptrans->box.width = align(ptrans->box.width, (ETNA_RS_WIDTH_MASK + 1));
+ ptrans->box.height += ptrans->box.y & (h_align - 1);
+ ptrans->box.y = ptrans->box.y & ~(h_align - 1);
+ ptrans->box.height = align(ptrans->box.height,
+ (ETNA_RS_HEIGHT_MASK + 1) *
+ ctx->screen->specs.pixel_pipes);
+
if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
- etna_copy_resource(pctx, trans->rsc, prsc, level,
- trans->rsc->last_level);
+ etna_copy_resource_box(pctx, trans->rsc, prsc, level, &ptrans->box);
/* Switch to using the temporary resource instead */
rsc = etna_resource(trans->rsc);
@@ -222,11 +243,16 @@
struct etna_resource_level *res_level = &rsc->levels[level];
- /* Always sync if we have the temporary resource. The PIPE_TRANSFER_READ
- * case could be optimised if we knew whether the resource has outstanding
- * rendering. */
- if ((usage & PIPE_TRANSFER_READ || trans->rsc) &&
- rsc->status & ETNA_PENDING_WRITE)
+ /*
+ * Always flush if we have the temporary resource and have a copy to this
+ * outstanding. Otherwise infer flush requirement from resource access and
+ * current GPU usage (reads must wait for GPU writes, writes must have
+ * exclusive access to the buffer).
+ */
+ if ((trans->rsc && (etna_resource(trans->rsc)->status & ETNA_PENDING_WRITE)) ||
+ (!trans->rsc &&
+ (((usage & PIPE_TRANSFER_READ) && (rsc->status & ETNA_PENDING_WRITE)) ||
+ ((usage & PIPE_TRANSFER_WRITE) && rsc->status))))
pctx->flush(pctx, NULL, 0);
/* XXX we don't handle PIPE_TRANSFER_FLUSH_EXPLICIT; this flag can be ignored
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h mesa-17.3.3/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h 2018-01-18 21:30:28.000000000 +0000
@@ -8,11 +8,11 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- cmdstream.xml ( 14313 bytes, from 2016-11-17 18:46:23)
-- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22)
-- common.xml ( 23473 bytes, from 2017-01-07 14:27:54)
+- cmdstream.xml ( 16595 bytes, from 2017-10-05 21:20:32)
+- copyright.xml ( 1597 bytes, from 2016-11-13 13:46:17)
+- common.xml ( 26135 bytes, from 2017-10-05 21:20:32)
-Copyright (C) 2012-2016 by the following authors:
+Copyright (C) 2012-2017 by the following authors:
- Wladimir J. van der Laan
- Christian Gmeiner
- Lucas Stach
@@ -52,6 +52,8 @@
#define FE_OPCODE_RETURN 0x0000000b
#define FE_OPCODE_DRAW_INSTANCED 0x0000000c
#define FE_OPCODE_CHIP_SELECT 0x0000000d
+#define FE_OPCODE_WAIT_FENCE 0x0000000f
+#define FE_OPCODE_SNAP_PAGES 0x00000013
#define PRIMITIVE_TYPE_POINTS 0x00000001
#define PRIMITIVE_TYPE_LINES 0x00000002
#define PRIMITIVE_TYPE_LINE_STRIP 0x00000003
@@ -192,6 +194,9 @@
#define VIV_FE_STALL_TOKEN_TO__MASK 0x00001f00
#define VIV_FE_STALL_TOKEN_TO__SHIFT 8
#define VIV_FE_STALL_TOKEN_TO(x) (((x) << VIV_FE_STALL_TOKEN_TO__SHIFT) & VIV_FE_STALL_TOKEN_TO__MASK)
+#define VIV_FE_STALL_TOKEN_UNK28__MASK 0x30000000
+#define VIV_FE_STALL_TOKEN_UNK28__SHIFT 28
+#define VIV_FE_STALL_TOKEN_UNK28(x) (((x) << VIV_FE_STALL_TOKEN_UNK28__SHIFT) & VIV_FE_STALL_TOKEN_UNK28__MASK)
#define VIV_FE_CALL 0x00000000
@@ -266,5 +271,30 @@
#define VIV_FE_DRAW_INSTANCED_START_INDEX__SHIFT 0
#define VIV_FE_DRAW_INSTANCED_START_INDEX(x) (((x) << VIV_FE_DRAW_INSTANCED_START_INDEX__SHIFT) & VIV_FE_DRAW_INSTANCED_START_INDEX__MASK)
+#define VIV_FE_WAIT_FENCE 0x00000000
+
+#define VIV_FE_WAIT_FENCE_HEADER 0x00000000
+#define VIV_FE_WAIT_FENCE_HEADER_OP__MASK 0xf8000000
+#define VIV_FE_WAIT_FENCE_HEADER_OP__SHIFT 27
+#define VIV_FE_WAIT_FENCE_HEADER_OP_WAIT_FENCE 0x78000000
+#define VIV_FE_WAIT_FENCE_HEADER_UNK16__MASK 0x00030000
+#define VIV_FE_WAIT_FENCE_HEADER_UNK16__SHIFT 16
+#define VIV_FE_WAIT_FENCE_HEADER_UNK16(x) (((x) << VIV_FE_WAIT_FENCE_HEADER_UNK16__SHIFT) & VIV_FE_WAIT_FENCE_HEADER_UNK16__MASK)
+#define VIV_FE_WAIT_FENCE_HEADER_WAITCOUNT__MASK 0x0000ffff
+#define VIV_FE_WAIT_FENCE_HEADER_WAITCOUNT__SHIFT 0
+#define VIV_FE_WAIT_FENCE_HEADER_WAITCOUNT(x) (((x) << VIV_FE_WAIT_FENCE_HEADER_WAITCOUNT__SHIFT) & VIV_FE_WAIT_FENCE_HEADER_WAITCOUNT__MASK)
+
+#define VIV_FE_WAIT_FENCE_ADDRESS 0x00000004
+
+#define VIV_FE_SNAP_PAGES 0x00000000
+
+#define VIV_FE_SNAP_PAGES_HEADER 0x00000000
+#define VIV_FE_SNAP_PAGES_HEADER_OP__MASK 0xf8000000
+#define VIV_FE_SNAP_PAGES_HEADER_OP__SHIFT 27
+#define VIV_FE_SNAP_PAGES_HEADER_OP_SNAP_PAGES 0x98000000
+#define VIV_FE_SNAP_PAGES_HEADER_UNK0__MASK 0x0000001f
+#define VIV_FE_SNAP_PAGES_HEADER_UNK0__SHIFT 0
+#define VIV_FE_SNAP_PAGES_HEADER_UNK0(x) (((x) << VIV_FE_SNAP_PAGES_HEADER_UNK0__SHIFT) & VIV_FE_SNAP_PAGES_HEADER_UNK0__MASK)
+
#endif /* CMDSTREAM_XML */
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/hw/common.xml.h mesa-17.3.3/src/gallium/drivers/etnaviv/hw/common.xml.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/hw/common.xml.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/hw/common.xml.h 2018-01-18 21:30:28.000000000 +0000
@@ -8,13 +8,13 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state.xml ( 19930 bytes, from 2017-01-07 14:27:54)
-- common.xml ( 23473 bytes, from 2017-01-07 14:27:54)
-- state_hi.xml ( 26403 bytes, from 2017-01-07 14:27:54)
-- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22)
-- state_2d.xml ( 51552 bytes, from 2016-10-29 07:29:22)
-- state_3d.xml ( 66964 bytes, from 2017-04-13 12:38:05)
-- state_vg.xml ( 5975 bytes, from 2016-10-29 07:29:22)
+- state.xml ( 26245 bytes, from 2017-10-05 21:32:06)
+- common.xml ( 26135 bytes, from 2017-10-05 21:20:32)
+- state_hi.xml ( 27733 bytes, from 2017-10-05 21:20:32)
+- copyright.xml ( 1597 bytes, from 2016-11-13 13:46:17)
+- state_2d.xml ( 51552 bytes, from 2016-11-13 13:46:17)
+- state_3d.xml ( 80819 bytes, from 2017-10-05 21:20:32)
+- state_vg.xml ( 5975 bytes, from 2016-11-13 13:46:17)
Copyright (C) 2012-2017 by the following authors:
- Wladimir J. van der Laan
@@ -49,12 +49,7 @@
#define SYNC_RECIPIENT_RA 0x00000005
#define SYNC_RECIPIENT_PE 0x00000007
#define SYNC_RECIPIENT_DE 0x0000000b
-#define SYNC_RECIPIENT_VG 0x0000000f
-#define SYNC_RECIPIENT_TESSELATOR 0x00000010
-#define SYNC_RECIPIENT_VG2 0x00000011
-#define SYNC_RECIPIENT_TESSELATOR2 0x00000012
-#define SYNC_RECIPIENT_VG3 0x00000013
-#define SYNC_RECIPIENT_TESSELATOR3 0x00000014
+#define SYNC_RECIPIENT_BLT 0x00000010
#define ENDIAN_MODE_NO_SWAP 0x00000000
#define ENDIAN_MODE_SWAP_16 0x00000001
#define ENDIAN_MODE_SWAP_32 0x00000002
@@ -77,6 +72,7 @@
#define chipModel_GC800 0x00000800
#define chipModel_GC860 0x00000860
#define chipModel_GC880 0x00000880
+#define chipModel_GC900 0x00000900
#define chipModel_GC1000 0x00001000
#define chipModel_GC1500 0x00001500
#define chipModel_GC2000 0x00002000
@@ -88,6 +84,12 @@
#define chipModel_GC5000 0x00005000
#define chipModel_GC5200 0x00005200
#define chipModel_GC6400 0x00006400
+#define chipModel_GC7000 0x00007000
+#define chipModel_GC7400 0x00007400
+#define chipModel_GC8000 0x00008000
+#define chipModel_GC8100 0x00008100
+#define chipModel_GC8200 0x00008200
+#define chipModel_GC8400 0x00008400
#define RGBA_BITS_R 0x00000001
#define RGBA_BITS_G 0x00000002
#define RGBA_BITS_B 0x00000004
@@ -203,7 +205,7 @@
#define chipMinorFeatures2_RGB888 0x00001000
#define chipMinorFeatures2_TX__YUV_ASSEMBLER 0x00002000
#define chipMinorFeatures2_DYNAMIC_FREQUENCY_SCALING 0x00004000
-#define chipMinorFeatures2_EXTRA_TEXTURE_STATE 0x00008000
+#define chipMinorFeatures2_TX_FILTER 0x00008000
#define chipMinorFeatures2_FULL_DIRECTFB 0x00010000
#define chipMinorFeatures2_2D_TILING 0x00020000
#define chipMinorFeatures2_THREAD_WALKER_IN_PS 0x00040000
@@ -242,36 +244,36 @@
#define chipMinorFeatures3_TX_ENHANCEMENTS1 0x00080000
#define chipMinorFeatures3_SH_ENHANCEMENTS1 0x00100000
#define chipMinorFeatures3_SH_ENHANCEMENTS2 0x00200000
-#define chipMinorFeatures3_UNK22 0x00400000
+#define chipMinorFeatures3_PE_ENHANCEMENTS1 0x00400000
#define chipMinorFeatures3_2D_FC_SOURCE 0x00800000
-#define chipMinorFeatures3_UNK24 0x01000000
-#define chipMinorFeatures3_UNK25 0x02000000
+#define chipMinorFeatures3_BUG_FIXES_14 0x01000000
+#define chipMinorFeatures3_POWER_OPTIMIZATIONS_0 0x02000000
#define chipMinorFeatures3_NEW_HZ 0x04000000
-#define chipMinorFeatures3_UNK27 0x08000000
-#define chipMinorFeatures3_UNK28 0x10000000
+#define chipMinorFeatures3_PE_DITHER_FIX 0x08000000
+#define chipMinorFeatures3_DE_ENHANCEMENTS3 0x10000000
#define chipMinorFeatures3_SH_ENHANCEMENTS3 0x20000000
-#define chipMinorFeatures3_UNK30 0x40000000
-#define chipMinorFeatures3_UNK31 0x80000000
-#define chipMinorFeatures4_UNK0 0x00000001
+#define chipMinorFeatures3_SH_ENHANCEMENTS4 0x40000000
+#define chipMinorFeatures3_TX_ENHANCEMENTS2 0x80000000
+#define chipMinorFeatures4_FE_ENHANCEMENTS1 0x00000001
#define chipMinorFeatures4_PE_ENHANCEMENTS2 0x00000002
#define chipMinorFeatures4_FRUSTUM_CLIP_FIX 0x00000004
-#define chipMinorFeatures4_UNK3 0x00000008
-#define chipMinorFeatures4_UNK4 0x00000010
+#define chipMinorFeatures4_DE_NO_GAMMA 0x00000008
+#define chipMinorFeatures4_PA_ENHANCEMENTS_2 0x00000010
#define chipMinorFeatures4_2D_GAMMA 0x00000020
#define chipMinorFeatures4_SINGLE_BUFFER 0x00000040
-#define chipMinorFeatures4_UNK7 0x00000080
-#define chipMinorFeatures4_UNK8 0x00000100
-#define chipMinorFeatures4_UNK9 0x00000200
-#define chipMinorFeatures4_UNK10 0x00000400
+#define chipMinorFeatures4_HI_ENHANCEMENTS_1 0x00000080
+#define chipMinorFeatures4_TX_ENHANCEMENTS_3 0x00000100
+#define chipMinorFeatures4_SH_ENHANCEMENTS_5 0x00000200
+#define chipMinorFeatures4_FE_ENHANCEMENTS_2 0x00000400
#define chipMinorFeatures4_TX_LERP_PRECISION_FIX 0x00000800
#define chipMinorFeatures4_2D_COLOR_SPACE_CONVERSION 0x00001000
#define chipMinorFeatures4_TEXTURE_ASTC 0x00002000
-#define chipMinorFeatures4_UNK14 0x00004000
-#define chipMinorFeatures4_UNK15 0x00008000
+#define chipMinorFeatures4_PE_ENHANCEMENTS_4 0x00004000
+#define chipMinorFeatures4_MC_ENHANCEMENTS_1 0x00008000
#define chipMinorFeatures4_HALTI2 0x00010000
-#define chipMinorFeatures4_UNK17 0x00020000
+#define chipMinorFeatures4_2D_MIRROR_EXTENSION 0x00020000
#define chipMinorFeatures4_SMALL_MSAA 0x00040000
-#define chipMinorFeatures4_UNK19 0x00080000
+#define chipMinorFeatures4_BUG_FIXES_17 0x00080000
#define chipMinorFeatures4_NEW_RA 0x00100000
#define chipMinorFeatures4_2D_OPF_YUV_OUTPUT 0x00200000
#define chipMinorFeatures4_2D_MULTI_SOURCE_BLT_EX2 0x00400000
@@ -280,41 +282,46 @@
#define chipMinorFeatures4_BUG_FIXES18 0x02000000
#define chipMinorFeatures4_2D_COMPRESSION 0x04000000
#define chipMinorFeatures4_PROBE 0x08000000
-#define chipMinorFeatures4_UNK28 0x10000000
+#define chipMinorFeatures4_MEDIUM_PRECISION 0x10000000
#define chipMinorFeatures4_2D_SUPER_TILE_VERSION 0x20000000
-#define chipMinorFeatures4_UNK30 0x40000000
-#define chipMinorFeatures4_UNK31 0x80000000
-#define chipMinorFeatures5_UNK0 0x00000001
-#define chipMinorFeatures5_UNK1 0x00000002
-#define chipMinorFeatures5_UNK2 0x00000004
-#define chipMinorFeatures5_UNK3 0x00000008
+#define chipMinorFeatures4_BUG_FIXES19 0x40000000
+#define chipMinorFeatures4_SH_ENHANCEMENTS6 0x80000000
+#define chipMinorFeatures5_SH_ENHANCEMENTS7 0x00000001
+#define chipMinorFeatures5_BUG_FIXES20 0x00000002
+#define chipMinorFeatures5_DE_ADDRESS_40 0x00000004
+#define chipMinorFeatures5_MINI_MMU_FIX 0x00000008
#define chipMinorFeatures5_EEZ 0x00000010
-#define chipMinorFeatures5_UNK5 0x00000020
-#define chipMinorFeatures5_UNK6 0x00000040
-#define chipMinorFeatures5_UNK7 0x00000080
-#define chipMinorFeatures5_UNK8 0x00000100
+#define chipMinorFeatures5_BUG_FIXES21 0x00000020
+#define chipMinorFeatures5_EXTRA_VG_CAPS 0x00000040
+#define chipMinorFeatures5_MULTI_SRC_V15 0x00000080
+#define chipMinorFeatures5_BUG_FIXES22 0x00000100
#define chipMinorFeatures5_HALTI3 0x00000200
-#define chipMinorFeatures5_UNK10 0x00000400
+#define chipMinorFeatures5_TESSELATION_SHADERS 0x00000400
#define chipMinorFeatures5_2D_ONE_PASS_FILTER_TAP 0x00000800
-#define chipMinorFeatures5_UNK12 0x00001000
+#define chipMinorFeatures5_MULTI_SRC_V2_STR_QUAD 0x00001000
#define chipMinorFeatures5_SEPARATE_SRC_DST 0x00002000
#define chipMinorFeatures5_HALTI4 0x00004000
-#define chipMinorFeatures5_UNK15 0x00008000
+#define chipMinorFeatures5_RA_WRITE_DEPTH 0x00008000
#define chipMinorFeatures5_ANDROID_ONLY 0x00010000
#define chipMinorFeatures5_HAS_PRODUCTID 0x00020000
-#define chipMinorFeatures5_UNK18 0x00040000
-#define chipMinorFeatures5_UNK19 0x00080000
+#define chipMinorFeatures5_TX_SUPPORT_DEC 0x00040000
+#define chipMinorFeatures5_S8_MSAA_COMPRESSION 0x00080000
#define chipMinorFeatures5_PE_DITHER_FIX2 0x00100000
-#define chipMinorFeatures5_UNK21 0x00200000
-#define chipMinorFeatures5_UNK22 0x00400000
-#define chipMinorFeatures5_UNK23 0x00800000
-#define chipMinorFeatures5_UNK24 0x01000000
-#define chipMinorFeatures5_UNK25 0x02000000
-#define chipMinorFeatures5_UNK26 0x04000000
+#define chipMinorFeatures5_L2_CACHE_REMOVE 0x00200000
+#define chipMinorFeatures5_FE_ALLOW_RND_VTX_CNT 0x00400000
+#define chipMinorFeatures5_CUBE_MAP_FL28 0x00800000
+#define chipMinorFeatures5_TX_6BIT_FRAC 0x01000000
+#define chipMinorFeatures5_FE_ALLOW_STALL_PREFETCH_ENG 0x02000000
+#define chipMinorFeatures5_THIRD_PARTY_COMPRESSION 0x04000000
#define chipMinorFeatures5_RS_DEPTHSTENCIL_NATIVE_SUPPORT 0x08000000
#define chipMinorFeatures5_V2_MSAA_COMP_FIX 0x10000000
-#define chipMinorFeatures5_UNK29 0x20000000
-#define chipMinorFeatures5_UNK30 0x40000000
-#define chipMinorFeatures5_UNK31 0x80000000
+#define chipMinorFeatures5_HALTI5 0x20000000
+#define chipMinorFeatures5_EVIS 0x40000000
+#define chipMinorFeatures5_BLT_ENGINE 0x80000000
+#define chipMinorFeatures6_BUG_FIXES_23 0x00000001
+#define chipMinorFeatures6_BUG_FIXES_24 0x00000002
+#define chipMinorFeatures6_DEC 0x00000004
+#define chipMinorFeatures6_VS_TILE_NV12 0x00000008
+#define chipMinorFeatures6_VS_TILE_NV12_10BIT 0x00000010
#endif /* COMMON_XML */
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/hw/isa.xml.h mesa-17.3.3/src/gallium/drivers/etnaviv/hw/isa.xml.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/hw/isa.xml.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/hw/isa.xml.h 2018-01-18 21:30:28.000000000 +0000
@@ -8,8 +8,8 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- isa.xml ( 34392 bytes, from 2017-04-13 12:44:36)
-- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22)
+- isa.xml ( 35432 bytes, from 2017-10-05 21:20:32)
+- copyright.xml ( 1597 bytes, from 2016-11-13 13:46:17)
Copyright (C) 2012-2017 by the following authors:
- Wladimir J. van der Laan
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/hw/state_3d.xml.h mesa-17.3.3/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/hw/state_3d.xml.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/hw/state_3d.xml.h 2018-01-18 21:30:28.000000000 +0000
@@ -8,13 +8,13 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state.xml ( 19930 bytes, from 2017-01-07 14:27:54)
-- common.xml ( 23473 bytes, from 2017-01-07 14:27:54)
-- state_hi.xml ( 26403 bytes, from 2017-01-07 14:27:54)
-- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22)
-- state_2d.xml ( 51552 bytes, from 2016-10-29 07:29:22)
-- state_3d.xml ( 66964 bytes, from 2017-04-13 12:38:05)
-- state_vg.xml ( 5975 bytes, from 2016-10-29 07:29:22)
+- state.xml ( 26245 bytes, from 2017-10-05 21:32:06)
+- common.xml ( 26135 bytes, from 2017-10-05 21:20:32)
+- state_hi.xml ( 27733 bytes, from 2017-10-05 21:20:32)
+- copyright.xml ( 1597 bytes, from 2016-11-13 13:46:17)
+- state_2d.xml ( 51552 bytes, from 2016-11-13 13:46:17)
+- state_3d.xml ( 80819 bytes, from 2017-10-05 21:20:32)
+- state_vg.xml ( 5975 bytes, from 2016-11-13 13:46:17)
Copyright (C) 2012-2017 by the following authors:
- Wladimir J. van der Laan
@@ -88,20 +88,21 @@
#define RS_FORMAT_A8R8G8B8 0x00000006
#define RS_FORMAT_YUY2 0x00000007
#define RS_FORMAT_R16F 0x00000011
-#define RS_FORMAT_X16R16F 0x00000012
-#define RS_FORMAT_X16B16G16R16F 0x00000013
+#define RS_FORMAT_G16R16F 0x00000012
+#define RS_FORMAT_A16B16G16R16F 0x00000013
#define RS_FORMAT_R32F 0x00000014
-#define RS_FORMAT_X32R32F 0x00000015
+#define RS_FORMAT_G32R32F 0x00000015
#define RS_FORMAT_A2B10G10R10 0x00000016
#define RS_FORMAT_R8I 0x00000017
-#define RS_FORMAT_X8R8I 0x00000018
-#define RS_FORMAT_X8B8G8R8I 0x00000019
+#define RS_FORMAT_G8R8I 0x00000018
+#define RS_FORMAT_A8B8G8R8I 0x00000019
#define RS_FORMAT_R16I 0x0000001a
-#define RS_FORMAT_X16R16I 0x0000001b
-#define RS_FORMAT_X16B16G16R16I 0x0000001c
+#define RS_FORMAT_G16R16I 0x0000001b
+#define RS_FORMAT_A16B16G16R16I 0x0000001c
#define RS_FORMAT_B10G11R11F 0x0000001d
#define RS_FORMAT_A2B10G10R10UI 0x0000001e
#define RS_FORMAT_G8R8 0x0000001f
+#define RS_FORMAT_R8 0x00000023
#define TEXTURE_FORMAT_NONE 0x00000000
#define TEXTURE_FORMAT_A8 0x00000001
#define TEXTURE_FORMAT_L8 0x00000002
@@ -132,11 +133,11 @@
#define TEXTURE_FORMAT_EXT_RG11_EAC 0x00000004
#define TEXTURE_FORMAT_EXT_SIGNED_RG11_EAC 0x00000005
#define TEXTURE_FORMAT_EXT_G8R8 0x00000006
-#define TEXTURE_FORMAT_EXT_A16F 0x00000007
-#define TEXTURE_FORMAT_EXT_A16L16F 0x00000008
+#define TEXTURE_FORMAT_EXT_R16F 0x00000007
+#define TEXTURE_FORMAT_EXT_G16R16F 0x00000008
#define TEXTURE_FORMAT_EXT_A16B16G16R16F 0x00000009
-#define TEXTURE_FORMAT_EXT_A32F 0x0000000a
-#define TEXTURE_FORMAT_EXT_A32L32F 0x0000000b
+#define TEXTURE_FORMAT_EXT_R32F 0x0000000a
+#define TEXTURE_FORMAT_EXT_G32R32F 0x0000000b
#define TEXTURE_FORMAT_EXT_A2B10G10R10 0x0000000c
#define TEXTURE_FORMAT_EXT_SIGNED_R11_EAC 0x0000000d
#define TEXTURE_FORMAT_EXT_R8_SNORM 0x0000000e
@@ -157,7 +158,9 @@
#define TEXTURE_FILTER_LINEAR 0x00000002
#define TEXTURE_FILTER_ANISOTROPIC 0x00000003
#define TEXTURE_TYPE_NONE 0x00000000
+#define TEXTURE_TYPE_1D 0x00000001
#define TEXTURE_TYPE_2D 0x00000002
+#define TEXTURE_TYPE_3D 0x00000003
#define TEXTURE_TYPE_CUBE_MAP 0x00000005
#define TEXTURE_WRAPMODE_REPEAT 0x00000000
#define TEXTURE_WRAPMODE_MIRRORED_REPEAT 0x00000001
@@ -278,10 +281,10 @@
#define VIVS_VS_RANGE_HIGH__SHIFT 16
#define VIVS_VS_RANGE_HIGH(x) (((x) << VIVS_VS_RANGE_HIGH__SHIFT) & VIVS_VS_RANGE_HIGH__MASK)
-#define VIVS_VS_NEW_UNK00860 0x00000860
-#define VIVS_VS_NEW_UNK00860_UNK0 0x00000001
-#define VIVS_VS_NEW_UNK00860_PS 0x00000010
-#define VIVS_VS_NEW_UNK00860_UNK12 0x00001000
+#define VIVS_VS_UNIFORM_CACHE 0x00000860
+#define VIVS_VS_UNIFORM_CACHE_FLUSH 0x00000001
+#define VIVS_VS_UNIFORM_CACHE_PS 0x00000010
+#define VIVS_VS_UNIFORM_CACHE_UNK12 0x00001000
#define VIVS_VS_UNIFORM_BASE 0x00000864
@@ -292,6 +295,49 @@
#define VIVS_VS_INST_ADDR 0x0000086c
+#define VIVS_VS_HALTI5_UNK00870 0x00000870
+
+#define VIVS_VS_HALTI5_UNK00874 0x00000874
+
+#define VIVS_VS_HALTI5_UNK00878 0x00000878
+
+#define VIVS_VS_HALTI5_UNK0087C 0x0000087c
+
+#define VIVS_VS_HALTI5_UNK00880 0x00000880
+
+#define VIVS_VS_HALTI1_UNK00884 0x00000884
+
+#define VIVS_VS_UNK0088C 0x0000088c
+
+#define VIVS_VS_ICACHE_UNK00890 0x00000890
+
+#define VIVS_VS_HALTI5_UNK00898(i0) (0x00000898 + 0x4*(i0))
+#define VIVS_VS_HALTI5_UNK00898__ESIZE 0x00000004
+#define VIVS_VS_HALTI5_UNK00898__LEN 0x00000002
+
+#define VIVS_VS_HALTI5_UNK008A0 0x000008a0
+
+#define VIVS_VS_HALTI5_UNK008A8 0x000008a8
+
+#define VIVS_VS_ICACHE_INVALIDATE 0x000008b0
+#define VIVS_VS_ICACHE_INVALIDATE_UNK0 0x00000001
+#define VIVS_VS_ICACHE_INVALIDATE_UNK1 0x00000002
+#define VIVS_VS_ICACHE_INVALIDATE_UNK2 0x00000004
+#define VIVS_VS_ICACHE_INVALIDATE_UNK3 0x00000008
+#define VIVS_VS_ICACHE_INVALIDATE_UNK4 0x00000010
+
+#define VIVS_VS_HALTI5_UNK008B8 0x000008b8
+
+#define VIVS_VS_HALTI5_UNK008BC 0x000008bc
+
+#define VIVS_VS_HALTI5_UNK008C0(i0) (0x000008c0 + 0x4*(i0))
+#define VIVS_VS_HALTI5_UNK008C0__ESIZE 0x00000004
+#define VIVS_VS_HALTI5_UNK008C0__LEN 0x00000008
+
+#define VIVS_VS_HALTI5_UNK008E0(i0) (0x000008e0 + 0x4*(i0))
+#define VIVS_VS_HALTI5_UNK008E0__ESIZE 0x00000004
+#define VIVS_VS_HALTI5_UNK008E0__LEN 0x00000008
+
#define VIVS_VS_INST_MEM(i0) (0x00004000 + 0x4*(i0))
#define VIVS_VS_INST_MEM__ESIZE 0x00000004
#define VIVS_VS_INST_MEM__LEN 0x00000400
@@ -300,6 +346,10 @@
#define VIVS_VS_UNIFORMS__ESIZE 0x00000004
#define VIVS_VS_UNIFORMS__LEN 0x00000400
+#define VIVS_VS_HALTI5_UNK15600 0x00015600
+
+#define VIVS_VS_HALTI5_UNK15604 0x00015604
+
#define VIVS_CL 0x00000000
#define VIVS_CL_CONFIG 0x00000900
@@ -391,6 +441,12 @@
#define VIVS_CL_UNK00954 0x00000954
+#define VIVS_CL_HALTI5_UNK00958 0x00000958
+
+#define VIVS_CL_HALTI5_UNK0095C 0x0000095c
+
+#define VIVS_CL_HALTI5_UNK00960 0x00000960
+
#define VIVS_PA 0x00000000
#define VIVS_PA_VIEWPORT_SCALE_X 0x00000a00
@@ -409,9 +465,11 @@
#define VIVS_PA_POINT_SIZE 0x00000a1c
+#define VIVS_PA_UNK00A24 0x00000a24
+
#define VIVS_PA_SYSTEM_MODE 0x00000a28
-#define VIVS_PA_SYSTEM_MODE_UNK0 0x00000001
-#define VIVS_PA_SYSTEM_MODE_UNK4 0x00000010
+#define VIVS_PA_SYSTEM_MODE_PROVOKING_VERTEX_LAST 0x00000001
+#define VIVS_PA_SYSTEM_MODE_HALF_PIXEL_CENTER 0x00000010
#define VIVS_PA_W_CLIP_LIMIT 0x00000a2c
@@ -473,6 +531,12 @@
#define VIVS_PA_ZFARCLIPPING 0x00000a8c
+#define VIVS_PA_HALTI5_UNK00A90(i0) (0x00000a90 + 0x4*(i0))
+#define VIVS_PA_HALTI5_UNK00A90__ESIZE 0x00000004
+#define VIVS_PA_HALTI5_UNK00A90__LEN 0x00000004
+
+#define VIVS_PA_HALTI5_UNK00AA8 0x00000aa8
+
#define VIVS_SE 0x00000000
#define VIVS_SE_SCISSOR_LEFT 0x00000c00
@@ -518,6 +582,10 @@
#define VIVS_RA_HDEPTH_CONTROL_COMPARE__SHIFT 12
#define VIVS_RA_HDEPTH_CONTROL_COMPARE(x) (((x) << VIVS_RA_HDEPTH_CONTROL_COMPARE__SHIFT) & VIVS_RA_HDEPTH_CONTROL_COMPARE__MASK)
+#define VIVS_RA_UNK00E24 0x00000e24
+
+#define VIVS_RA_HALTI5_UNK00E34 0x00000e34
+
#define VIVS_RA_CENTROID_TABLE(i0) (0x00000e40 + 0x4*(i0))
#define VIVS_RA_CENTROID_TABLE__ESIZE 0x00000004
#define VIVS_RA_CENTROID_TABLE__LEN 0x00000010
@@ -561,11 +629,41 @@
#define VIVS_PS_INST_ADDR 0x00001028
+#define VIVS_PS_UNK0102C 0x0000102c
+
#define VIVS_PS_CONTROL_EXT 0x00001030
#define VIVS_PS_CONTROL_EXT_COLOR_OUTPUT_COUNT__MASK 0x00000003
#define VIVS_PS_CONTROL_EXT_COLOR_OUTPUT_COUNT__SHIFT 0
#define VIVS_PS_CONTROL_EXT_COLOR_OUTPUT_COUNT(x) (((x) << VIVS_PS_CONTROL_EXT_COLOR_OUTPUT_COUNT__SHIFT) & VIVS_PS_CONTROL_EXT_COLOR_OUTPUT_COUNT__MASK)
+#define VIVS_PS_UNK01034 0x00001034
+
+#define VIVS_PS_UNK01038 0x00001038
+
+#define VIVS_PS_HALTI3_UNK0103C 0x0000103c
+
+#define VIVS_PS_UNK01040(i0) (0x00001040 + 0x4*(i0))
+#define VIVS_PS_UNK01040__ESIZE 0x00000004
+#define VIVS_PS_UNK01040__LEN 0x00000002
+
+#define VIVS_PS_UNK01048 0x00001048
+
+#define VIVS_PS_ICACHE_UNK0104C 0x0000104c
+
+#define VIVS_PS_HALTI4_UNK01054 0x00001054
+
+#define VIVS_PS_HALTI5_UNK01058 0x00001058
+
+#define VIVS_PS_HALTI5_UNK01080(i0) (0x00001080 + 0x4*(i0))
+#define VIVS_PS_HALTI5_UNK01080__ESIZE 0x00000004
+#define VIVS_PS_HALTI5_UNK01080__LEN 0x00000004
+
+#define VIVS_PS_HALTI5_UNK01090 0x00001090
+
+#define VIVS_PS_HALTI5_UNK01094 0x00001094
+
+#define VIVS_PS_HALTI5_UNK01098 0x00001098
+
#define VIVS_PS_INST_MEM(i0) (0x00006000 + 0x4*(i0))
#define VIVS_PS_INST_MEM__ESIZE 0x00000004
#define VIVS_PS_INST_MEM__LEN 0x00000400
@@ -574,6 +672,122 @@
#define VIVS_PS_UNIFORMS__ESIZE 0x00000004
#define VIVS_PS_UNIFORMS__LEN 0x00000400
+#define VIVS_GS 0x00000000
+
+#define VIVS_GS_UNK01100 0x00001100
+
+#define VIVS_GS_UNK01104 0x00001104
+
+#define VIVS_GS_UNK01108 0x00001108
+
+#define VIVS_GS_UNK0110C 0x0000110c
+
+#define VIVS_GS_UNK01110 0x00001110
+
+#define VIVS_GS_UNK01114 0x00001114
+
+#define VIVS_GS_UNK0111C 0x0000111c
+
+#define VIVS_GS_UNK01120(i0) (0x00001120 + 0x4*(i0))
+#define VIVS_GS_UNK01120__ESIZE 0x00000004
+#define VIVS_GS_UNK01120__LEN 0x00000008
+
+#define VIVS_GS_UNK01140 0x00001140
+
+#define VIVS_GS_UNK01144 0x00001144
+
+#define VIVS_GS_UNK01148 0x00001148
+
+#define VIVS_GS_UNK0114C 0x0000114c
+
+#define VIVS_GS_UNK01154 0x00001154
+
+#define VIVS_TCS 0x00000000
+
+#define VIVS_TCS_UNK007C0 0x000007c0
+
+#define VIVS_TCS_UNK14A00 0x00014a00
+
+#define VIVS_TCS_UNK14A04 0x00014a04
+
+#define VIVS_TCS_UNK14A08 0x00014a08
+
+#define VIVS_TCS_UNK14A10 0x00014a10
+
+#define VIVS_TCS_UNK14A14 0x00014a14
+
+#define VIVS_TCS_UNK14A18 0x00014a18
+
+#define VIVS_TCS_UNK14A1C 0x00014a1c
+
+#define VIVS_TCS_UNK14A20(i0) (0x00014a20 + 0x4*(i0))
+#define VIVS_TCS_UNK14A20__ESIZE 0x00000004
+#define VIVS_TCS_UNK14A20__LEN 0x00000008
+
+#define VIVS_TCS_UNK14A40 0x00014a40
+
+#define VIVS_TCS_UNK14A44 0x00014a44
+
+#define VIVS_TCS_UNK14A4C 0x00014a4c
+
+#define VIVS_TES 0x00000000
+
+#define VIVS_TES_UNK14B00 0x00014b00
+
+#define VIVS_TES_UNK14B04 0x00014b04
+
+#define VIVS_TES_UNK14B08 0x00014b08
+
+#define VIVS_TES_UNK14B0C 0x00014b0c
+
+#define VIVS_TES_UNK14B14 0x00014b14
+
+#define VIVS_TES_UNK14B18 0x00014b18
+
+#define VIVS_TES_UNK14B1C 0x00014b1c
+
+#define VIVS_TES_UNK14B20 0x00014b20
+
+#define VIVS_TES_UNK14B24 0x00014b24
+
+#define VIVS_TES_UNK14B2C 0x00014b2c
+
+#define VIVS_TES_UNK14B34 0x00014b34
+
+#define VIVS_TES_UNK14B40(i0) (0x00014b40 + 0x4*(i0))
+#define VIVS_TES_UNK14B40__ESIZE 0x00000004
+#define VIVS_TES_UNK14B40__LEN 0x00000008
+
+#define VIVS_TFB 0x00000000
+
+#define VIVS_TFB_UNK1C000 0x0001c000
+
+#define VIVS_TFB_UNK1C008 0x0001c008
+
+#define VIVS_TFB_FLUSH 0x0001c00c
+
+#define VIVS_TFB_UNK1C014 0x0001c014
+
+#define VIVS_TFB_UNK1C040(i0) (0x0001c040 + 0x4*(i0))
+#define VIVS_TFB_UNK1C040__ESIZE 0x00000004
+#define VIVS_TFB_UNK1C040__LEN 0x00000004
+
+#define VIVS_TFB_UNK1C080(i0) (0x0001c080 + 0x4*(i0))
+#define VIVS_TFB_UNK1C080__ESIZE 0x00000004
+#define VIVS_TFB_UNK1C080__LEN 0x00000004
+
+#define VIVS_TFB_UNK1C0C0(i0) (0x0001c0c0 + 0x4*(i0))
+#define VIVS_TFB_UNK1C0C0__ESIZE 0x00000004
+#define VIVS_TFB_UNK1C0C0__LEN 0x00000004
+
+#define VIVS_TFB_UNK1C100(i0) (0x0001c100 + 0x4*(i0))
+#define VIVS_TFB_UNK1C100__ESIZE 0x00000004
+#define VIVS_TFB_UNK1C100__LEN 0x00000004
+
+#define VIVS_TFB_UNK1C800(i0) (0x0001c800 + 0x4*(i0))
+#define VIVS_TFB_UNK1C800__ESIZE 0x00000004
+#define VIVS_TFB_UNK1C800__LEN 0x00000200
+
#define VIVS_PE 0x00000000
#define VIVS_PE_DEPTH_CONFIG 0x00001400
@@ -738,7 +952,7 @@
#define VIVS_PE_COLOR_FORMAT_OVERWRITE_MASK 0x00020000
#define VIVS_PE_COLOR_FORMAT_SUPER_TILED 0x00100000
#define VIVS_PE_COLOR_FORMAT_SUPER_TILED_MASK 0x00200000
-#define VIVS_PE_COLOR_FORMAT_FORMAT_EXT__MASK 0x3f000000
+#define VIVS_PE_COLOR_FORMAT_FORMAT_EXT__MASK 0x7f000000
#define VIVS_PE_COLOR_FORMAT_FORMAT_EXT__SHIFT 24
#define VIVS_PE_COLOR_FORMAT_FORMAT_EXT(x) (((x) << VIVS_PE_COLOR_FORMAT_FORMAT_EXT__SHIFT) & VIVS_PE_COLOR_FORMAT_FORMAT_EXT__MASK)
#define VIVS_PE_COLOR_FORMAT_FORMAT_EXT_MASK 0x80000000
@@ -770,6 +984,8 @@
#define VIVS_PE_PIPE_ADDR_UNK01520(i0) (0x00001520 + 0x4*(i0))
+#define VIVS_PE_PIPE_ADDR_UNK01540(i0) (0x00001540 + 0x4*(i0))
+
#define VIVS_PE_STENCIL_CONFIG_EXT 0x000014a0
#define VIVS_PE_STENCIL_CONFIG_EXT_REF_BACK__MASK 0x000000ff
#define VIVS_PE_STENCIL_CONFIG_EXT_REF_BACK__SHIFT 0
@@ -799,6 +1015,8 @@
#define VIVS_PE_LOGIC_OP_UNK24__SHIFT 24
#define VIVS_PE_LOGIC_OP_UNK24(x) (((x) << VIVS_PE_LOGIC_OP_UNK24__SHIFT) & VIVS_PE_LOGIC_OP_UNK24__MASK)
#define VIVS_PE_LOGIC_OP_UNK24_MASK 0x08000000
+#define VIVS_PE_LOGIC_OP_UNK31_MASK 0x40000000
+#define VIVS_PE_LOGIC_OP_UNK31 0x80000000
#define VIVS_PE_DITHER(i0) (0x000014a8 + 0x4*(i0))
#define VIVS_PE_DITHER__ESIZE 0x00000004
@@ -828,6 +1046,12 @@
#define VIVS_PE_STENCIL_CONFIG_EXT2_WRITE_MASK_BACK__SHIFT 8
#define VIVS_PE_STENCIL_CONFIG_EXT2_WRITE_MASK_BACK(x) (((x) << VIVS_PE_STENCIL_CONFIG_EXT2_WRITE_MASK_BACK__SHIFT) & VIVS_PE_STENCIL_CONFIG_EXT2_WRITE_MASK_BACK__MASK)
+#define VIVS_PE_HALTI3_UNK014BC 0x000014bc
+
+#define VIVS_PE_HALTI4_UNK014C0 0x000014c0
+
+#define VIVS_PE_ROBUSTNESS_UNK014C4 0x000014c4
+
#define VIVS_PE_UNK01580(i0) (0x00001580 + 0x4*(i0))
#define VIVS_PE_UNK01580__ESIZE 0x00000004
#define VIVS_PE_UNK01580__LEN 0x00000003
@@ -850,6 +1074,30 @@
#define VIVS_PE_RT_CONFIG_UNK16__SHIFT 16
#define VIVS_PE_RT_CONFIG_UNK16(x) (((x) << VIVS_PE_RT_CONFIG_UNK16__SHIFT) & VIVS_PE_RT_CONFIG_UNK16__MASK)
+#define VIVS_PE_HALTI5_UNK14920(i0) (0x00014920 + 0x4*(i0))
+#define VIVS_PE_HALTI5_UNK14920__ESIZE 0x00000004
+#define VIVS_PE_HALTI5_UNK14920__LEN 0x00000007
+
+#define VIVS_PE_HALTI5_UNK14940(i0) (0x00014940 + 0x4*(i0))
+#define VIVS_PE_HALTI5_UNK14940__ESIZE 0x00000004
+#define VIVS_PE_HALTI5_UNK14940__LEN 0x00000007
+
+#define VIVS_PE_HALTI5_UNK14960(i0) (0x00014960 + 0x4*(i0))
+#define VIVS_PE_HALTI5_UNK14960__ESIZE 0x00000004
+#define VIVS_PE_HALTI5_UNK14960__LEN 0x00000007
+
+#define VIVS_PE_HALTI5_UNK14980(i0) (0x00014980 + 0x4*(i0))
+#define VIVS_PE_HALTI5_UNK14980__ESIZE 0x00000004
+#define VIVS_PE_HALTI5_UNK14980__LEN 0x00000007
+
+#define VIVS_PE_HALTI5_UNK149A0(i0) (0x000149a0 + 0x4*(i0))
+#define VIVS_PE_HALTI5_UNK149A0__ESIZE 0x00000004
+#define VIVS_PE_HALTI5_UNK149A0__LEN 0x00000007
+
+#define VIVS_PE_ROBUSTNESS_UNK149C0(i0) (0x000149c0 + 0x4*(i0))
+#define VIVS_PE_ROBUSTNESS_UNK149C0__ESIZE 0x00000004
+#define VIVS_PE_ROBUSTNESS_UNK149C0__LEN 0x00000008
+
#define VIVS_CO 0x00000000
#define VIVS_CO_UNK03008 0x00003008
@@ -874,6 +1122,8 @@
#define VIVS_CO_UNK03048 0x00003048
+#define VIVS_CO_ICACHE_UNK0304C 0x0000304c
+
#define VIVS_CO_SAMPLER(i0) (0x00000000 + 0x4*(i0))
#define VIVS_CO_SAMPLER__ESIZE 0x00000004
#define VIVS_CO_SAMPLER__LEN 0x00000008
@@ -985,15 +1235,13 @@
#define VIVS_RS_EXTRA_CONFIG_UNK20 0x00100000
#define VIVS_RS_EXTRA_CONFIG_UNK28 0x10000000
-#define VIVS_RS_UNK016B0 0x000016b0
+#define VIVS_RS_KICKER_INPLACE 0x000016b0
#define VIVS_RS_UNK016B4 0x000016b4
#define VIVS_RS_SINGLE_BUFFER 0x000016b8
#define VIVS_RS_SINGLE_BUFFER_ENABLE 0x00000001
-#define VIVS_RS_UNK016BC 0x000016bc
-
#define VIVS_RS_PIPE(i0) (0x00000000 + 0x4*(i0))
#define VIVS_RS_PIPE__ESIZE 0x00000004
#define VIVS_RS_PIPE__LEN 0x00000008
@@ -1032,6 +1280,8 @@
#define VIVS_TS_MEM_CONFIG_MSAA_FORMAT_X8R8G8B8 0x00000400
#define VIVS_TS_MEM_CONFIG_UNK12 0x00001000
#define VIVS_TS_MEM_CONFIG_HDEPTH_AUTO_DISABLE 0x00002000
+#define VIVS_TS_MEM_CONFIG_UNK14 0x00004000
+#define VIVS_TS_MEM_CONFIG_UNK21 0x00200000
#define VIVS_TS_COLOR_STATUS_BASE 0x00001658
@@ -1055,6 +1305,8 @@
#define VIVS_TS_HDEPTH_SIZE 0x000016ac
+#define VIVS_TS_COLOR_CLEAR_VALUE_EXT 0x000016bc
+
#define VIVS_TS_SAMPLER(i0) (0x00000000 + 0x4*(i0))
#define VIVS_TS_SAMPLER__ESIZE 0x00000004
#define VIVS_TS_SAMPLER__LEN 0x00000008
@@ -1066,6 +1318,9 @@
#define VIVS_TS_SAMPLER_CONFIG_FORMAT__MASK 0x000000f0
#define VIVS_TS_SAMPLER_CONFIG_FORMAT__SHIFT 4
#define VIVS_TS_SAMPLER_CONFIG_FORMAT(x) (((x) << VIVS_TS_SAMPLER_CONFIG_FORMAT__SHIFT) & VIVS_TS_SAMPLER_CONFIG_FORMAT__MASK)
+#define VIVS_TS_SAMPLER_CONFIG_UNK11__MASK 0x00003800
+#define VIVS_TS_SAMPLER_CONFIG_UNK11__SHIFT 11
+#define VIVS_TS_SAMPLER_CONFIG_UNK11(x) (((x) << VIVS_TS_SAMPLER_CONFIG_UNK11__SHIFT) & VIVS_TS_SAMPLER_CONFIG_UNK11__MASK)
#define VIVS_TS_SAMPLER_STATUS_BASE(i0) (0x00001740 + 0x4*(i0))
@@ -1073,6 +1328,8 @@
#define VIVS_TS_SAMPLER_CLEAR_VALUE2(i0) (0x00001780 + 0x4*(i0))
+#define VIVS_TS_SAMPLER_SURFACE_BASE(i0) (0x00001a80 + 0x4*(i0))
+
#define VIVS_TS_RT(i0) (0x00000000 + 0x4*(i0))
#define VIVS_TS_RT__ESIZE 0x00000004
#define VIVS_TS_RT__LEN 0x00000008
@@ -1162,6 +1419,8 @@
#define VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT__MASK 0x000ffc00
#define VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT__SHIFT 10
#define VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(x) (((x) << VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT__SHIFT) & VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT__MASK)
+#define VIVS_TE_SAMPLER_LOG_SIZE_RGB 0x20000000
+#define VIVS_TE_SAMPLER_LOG_SIZE_SRGB 0x80000000
#define VIVS_TE_SAMPLER_LOD_CONFIG(i0) (0x000020c0 + 0x4*(i0))
#define VIVS_TE_SAMPLER_LOD_CONFIG_BIAS_ENABLE 0x00000001
@@ -1179,7 +1438,16 @@
#define VIVS_TE_SAMPLER_UNK02140(i0) (0x00002140 + 0x4*(i0))
-#define VIVS_TE_SAMPLER_UNK02180(i0) (0x00002180 + 0x4*(i0))
+#define VIVS_TE_SAMPLER_3D_CONFIG(i0) (0x00002180 + 0x4*(i0))
+#define VIVS_TE_SAMPLER_3D_CONFIG_DEPTH__MASK 0x00003fff
+#define VIVS_TE_SAMPLER_3D_CONFIG_DEPTH__SHIFT 0
+#define VIVS_TE_SAMPLER_3D_CONFIG_DEPTH(x) (((x) << VIVS_TE_SAMPLER_3D_CONFIG_DEPTH__SHIFT) & VIVS_TE_SAMPLER_3D_CONFIG_DEPTH__MASK)
+#define VIVS_TE_SAMPLER_3D_CONFIG_LOG_DEPTH__MASK 0x03ff0000
+#define VIVS_TE_SAMPLER_3D_CONFIG_LOG_DEPTH__SHIFT 16
+#define VIVS_TE_SAMPLER_3D_CONFIG_LOG_DEPTH(x) (((x) << VIVS_TE_SAMPLER_3D_CONFIG_LOG_DEPTH__SHIFT) & VIVS_TE_SAMPLER_3D_CONFIG_LOG_DEPTH__MASK)
+#define VIVS_TE_SAMPLER_3D_CONFIG_WRAP__MASK 0x30000000
+#define VIVS_TE_SAMPLER_3D_CONFIG_WRAP__SHIFT 28
+#define VIVS_TE_SAMPLER_3D_CONFIG_WRAP(x) (((x) << VIVS_TE_SAMPLER_3D_CONFIG_WRAP__SHIFT) & VIVS_TE_SAMPLER_3D_CONFIG_WRAP__MASK)
#define VIVS_TE_SAMPLER_CONFIG1(i0) (0x000021c0 + 0x4*(i0))
#define VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT__MASK 0x0000001f
@@ -1197,6 +1465,8 @@
#define VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A__MASK 0x00700000
#define VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A__SHIFT 20
#define VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A(x) (((x) << VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A__SHIFT) & VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A__MASK)
+#define VIVS_TE_SAMPLER_CONFIG1_TEXTURE_ARRAY 0x01000000
+#define VIVS_TE_SAMPLER_CONFIG1_UNK25 0x02000000
#define VIVS_TE_SAMPLER_CONFIG1_HALIGN__MASK 0x1c000000
#define VIVS_TE_SAMPLER_CONFIG1_HALIGN__SHIFT 26
#define VIVS_TE_SAMPLER_CONFIG1_HALIGN(x) (((x) << VIVS_TE_SAMPLER_CONFIG1_HALIGN__SHIFT) & VIVS_TE_SAMPLER_CONFIG1_HALIGN__MASK)
@@ -1209,6 +1479,10 @@
#define VIVS_TE_SAMPLER_LOD_ADDR__ESIZE 0x00000040
#define VIVS_TE_SAMPLER_LOD_ADDR__LEN 0x0000000e
+#define VIVS_TE_SAMPLER_LINEAR_STRIDE(i0, i1) (0x00002c00 + 0x4*(i0) + 0x40*(i1))
+#define VIVS_TE_SAMPLER_LINEAR_STRIDE__ESIZE 0x00000040
+#define VIVS_TE_SAMPLER_LINEAR_STRIDE__LEN 0x0000000e
+
#define VIVS_NTE 0x00000000
#define VIVS_NTE_SAMPLER(i0) (0x00000000 + 0x4*(i0))
@@ -1260,6 +1534,8 @@
#define VIVS_NTE_SAMPLER_LOG_SIZE_HEIGHT__MASK 0x000ffc00
#define VIVS_NTE_SAMPLER_LOG_SIZE_HEIGHT__SHIFT 10
#define VIVS_NTE_SAMPLER_LOG_SIZE_HEIGHT(x) (((x) << VIVS_NTE_SAMPLER_LOG_SIZE_HEIGHT__SHIFT) & VIVS_NTE_SAMPLER_LOG_SIZE_HEIGHT__MASK)
+#define VIVS_NTE_SAMPLER_LOG_SIZE_RGB 0x20000000
+#define VIVS_NTE_SAMPLER_LOG_SIZE_SRGB 0x80000000
#define VIVS_NTE_SAMPLER_LOD_CONFIG(i0) (0x00010180 + 0x4*(i0))
#define VIVS_NTE_SAMPLER_LOD_CONFIG_BIAS_ENABLE 0x00000001
@@ -1295,6 +1571,8 @@
#define VIVS_NTE_SAMPLER_CONFIG1_SWIZZLE_A__MASK 0x00700000
#define VIVS_NTE_SAMPLER_CONFIG1_SWIZZLE_A__SHIFT 20
#define VIVS_NTE_SAMPLER_CONFIG1_SWIZZLE_A(x) (((x) << VIVS_NTE_SAMPLER_CONFIG1_SWIZZLE_A__SHIFT) & VIVS_NTE_SAMPLER_CONFIG1_SWIZZLE_A__MASK)
+#define VIVS_NTE_SAMPLER_CONFIG1_TEXTURE_ARRAY 0x01000000
+#define VIVS_NTE_SAMPLER_CONFIG1_UNK25 0x02000000
#define VIVS_NTE_SAMPLER_CONFIG1_HALIGN__MASK 0x1c000000
#define VIVS_NTE_SAMPLER_CONFIG1_HALIGN__SHIFT 26
#define VIVS_NTE_SAMPLER_CONFIG1_HALIGN(x) (((x) << VIVS_NTE_SAMPLER_CONFIG1_HALIGN__SHIFT) & VIVS_NTE_SAMPLER_CONFIG1_HALIGN__MASK)
@@ -1303,7 +1581,31 @@
#define VIVS_NTE_SAMPLER_UNK10480(i0) (0x00010480 + 0x4*(i0))
-#define VIVS_NTE_SAMPLER_UNK10500(i0) (0x00010500 + 0x4*(i0))
+#define VIVS_NTE_SAMPLER_ASTC_UNK10500(i0) (0x00010500 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_ASTC_UNK10580(i0) (0x00010580 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_ASTC_UNK10600(i0) (0x00010600 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_ASTC_UNK10680(i0) (0x00010600 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_BASELOD(i0) (0x00010700 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_UNK10780(i0) (0x00010780 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_FRAC_UNK11000(i0) (0x00011000 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_FRAC_UNK11080(i0) (0x00011080 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_FRAC_UNK11100(i0) (0x00011100 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_FRAC_UNK11180(i0) (0x00011180 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_HALTI4_UNK11200(i0) (0x00011200 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_HALTI4_UNK11280(i0) (0x00011280 + 0x4*(i0))
+
+#define VIVS_NTE_SAMPLER_FRAC_UNK11300(i0) (0x00011300 + 0x4*(i0))
#define VIVS_NTE_SAMPLER_ADDR(i0) (0x00010800 + 0x40*(i0))
#define VIVS_NTE_SAMPLER_ADDR__ESIZE 0x00000040
@@ -1321,6 +1623,50 @@
#define VIVS_NTE_UNK12400__ESIZE 0x00000004
#define VIVS_NTE_UNK12400__LEN 0x00000100
+#define VIVS_NTE_HALTI3_UNK14C00(i0) (0x00014c00 + 0x4*(i0))
+#define VIVS_NTE_HALTI3_UNK14C00__ESIZE 0x00000004
+#define VIVS_NTE_HALTI3_UNK14C00__LEN 0x00000010
+
+#define VIVS_NTE_DESCRIPTOR_UNK14C40 0x00014c40
+
+#define VIVS_NTE_DESCRIPTOR_INVALIDATE 0x00014c48
+#define VIVS_NTE_DESCRIPTOR_INVALIDATE_IDX__MASK 0x000001ff
+#define VIVS_NTE_DESCRIPTOR_INVALIDATE_IDX__SHIFT 0
+#define VIVS_NTE_DESCRIPTOR_INVALIDATE_IDX(x) (((x) << VIVS_NTE_DESCRIPTOR_INVALIDATE_IDX__SHIFT) & VIVS_NTE_DESCRIPTOR_INVALIDATE_IDX__MASK)
+#define VIVS_NTE_DESCRIPTOR_INVALIDATE_UNK29 0x20000000
+
+#define VIVS_NTE_DESCRIPTOR(i0) (0x00000000 + 0x4*(i0))
+#define VIVS_NTE_DESCRIPTOR__ESIZE 0x00000004
+#define VIVS_NTE_DESCRIPTOR__LEN 0x00000080
+
+#define VIVS_NTE_DESCRIPTOR_MIRROR_UNK15C00(i0) (0x00015800 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_MIRROR_UNK15E00(i0) (0x00015a00 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_UNK15C00(i0) (0x00015c00 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_UNK15E00(i0) (0x00015e00 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_MIRROR_UNK16C00(i0) (0x00016000 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_MIRROR_UNK16E00(i0) (0x00016200 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_MIRROR_UNK17000(i0) (0x00016400 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_MIRROR_UNK17200(i0) (0x00016600 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_MIRROR_UNK17400(i0) (0x00016800 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_UNK16C00(i0) (0x00016c00 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_UNK16E00(i0) (0x00016e00 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_UNK17000(i0) (0x00017000 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_UNK17200(i0) (0x00017200 + 0x4*(i0))
+
+#define VIVS_NTE_DESCRIPTOR_UNK17400(i0) (0x00017400 + 0x4*(i0))
+
#define VIVS_SH 0x00000000
#define VIVS_SH_UNK20000(i0) (0x00020000 + 0x4*(i0))
@@ -1339,5 +1685,13 @@
#define VIVS_SH_UNIFORMS__ESIZE 0x00000004
#define VIVS_SH_UNIFORMS__LEN 0x00000800
+#define VIVS_SH_HALTI5_UNIFORMS_MIRROR(i0) (0x00034000 + 0x4*(i0))
+#define VIVS_SH_HALTI5_UNIFORMS_MIRROR__ESIZE 0x00000004
+#define VIVS_SH_HALTI5_UNIFORMS_MIRROR__LEN 0x00000800
+
+#define VIVS_SH_HALTI5_UNIFORMS(i0) (0x00036000 + 0x4*(i0))
+#define VIVS_SH_HALTI5_UNIFORMS__ESIZE 0x00000004
+#define VIVS_SH_HALTI5_UNIFORMS__LEN 0x00000800
+
#endif /* STATE_3D_XML */
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/hw/state.xml.h mesa-17.3.3/src/gallium/drivers/etnaviv/hw/state.xml.h
--- mesa-17.2.4/src/gallium/drivers/etnaviv/hw/state.xml.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/hw/state.xml.h 2018-01-18 21:30:28.000000000 +0000
@@ -8,13 +8,13 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state.xml ( 19930 bytes, from 2017-01-07 14:27:54)
-- common.xml ( 23473 bytes, from 2017-01-07 14:27:54)
-- state_hi.xml ( 26403 bytes, from 2017-01-07 14:27:54)
-- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22)
-- state_2d.xml ( 51552 bytes, from 2016-10-29 07:29:22)
-- state_3d.xml ( 66964 bytes, from 2017-04-13 12:38:05)
-- state_vg.xml ( 5975 bytes, from 2016-10-29 07:29:22)
+- state.xml ( 26245 bytes, from 2017-10-05 21:32:06)
+- common.xml ( 26135 bytes, from 2017-10-05 21:20:32)
+- state_hi.xml ( 27733 bytes, from 2017-10-05 21:20:32)
+- copyright.xml ( 1597 bytes, from 2016-11-13 13:46:17)
+- state_2d.xml ( 51552 bytes, from 2016-11-13 13:46:17)
+- state_3d.xml ( 80819 bytes, from 2017-10-05 21:20:32)
+- state_vg.xml ( 5975 bytes, from 2016-11-13 13:46:17)
Copyright (C) 2012-2017 by the following authors:
- Wladimir J. van der Laan
@@ -192,17 +192,40 @@
#define VIVS_FE_VERTEX_STREAMS_CONTROL(i0) (0x000006a0 + 0x4*(i0))
-#define VIVS_FE_UNK00700(i0) (0x00000700 + 0x4*(i0))
-#define VIVS_FE_UNK00700__ESIZE 0x00000004
-#define VIVS_FE_UNK00700__LEN 0x00000010
-
-#define VIVS_FE_UNK00740(i0) (0x00000740 + 0x4*(i0))
-#define VIVS_FE_UNK00740__ESIZE 0x00000004
-#define VIVS_FE_UNK00740__LEN 0x00000010
-
-#define VIVS_FE_UNK00780(i0) (0x00000780 + 0x4*(i0))
-#define VIVS_FE_UNK00780__ESIZE 0x00000004
-#define VIVS_FE_UNK00780__LEN 0x00000010
+#define VIVS_FE_GENERIC_ATTRIB(i0) (0x00000000 + 0x4*(i0))
+#define VIVS_FE_GENERIC_ATTRIB__ESIZE 0x00000004
+#define VIVS_FE_GENERIC_ATTRIB__LEN 0x00000010
+
+#define VIVS_FE_GENERIC_ATTRIB_UNK006C0(i0) (0x000006c0 + 0x4*(i0))
+
+#define VIVS_FE_GENERIC_ATTRIB_UNK00700(i0) (0x00000700 + 0x4*(i0))
+
+#define VIVS_FE_GENERIC_ATTRIB_UNK00740(i0) (0x00000740 + 0x4*(i0))
+
+#define VIVS_FE_GENERIC_ATTRIB_UNK00780(i0) (0x00000780 + 0x4*(i0))
+
+#define VIVS_FE_HALTI5_UNK007C4 0x000007c4
+
+#define VIVS_FE_HALTI5_UNK007D0(i0) (0x000007d0 + 0x4*(i0))
+#define VIVS_FE_HALTI5_UNK007D0__ESIZE 0x00000004
+#define VIVS_FE_HALTI5_UNK007D0__LEN 0x00000002
+
+#define VIVS_FE_HALTI5_UNK007D8 0x000007d8
+
+#define VIVS_FE_DESC_START 0x000007dc
+
+#define VIVS_FE_DESC_END 0x000007e0
+
+#define VIVS_FE_DESC_AVAIL 0x000007e4
+#define VIVS_FE_DESC_AVAIL_COUNT__MASK 0x0000007f
+#define VIVS_FE_DESC_AVAIL_COUNT__SHIFT 0
+#define VIVS_FE_DESC_AVAIL_COUNT(x) (((x) << VIVS_FE_DESC_AVAIL_COUNT__SHIFT) & VIVS_FE_DESC_AVAIL_COUNT__MASK)
+
+#define VIVS_FE_FENCE_WAIT_DATA_LOW 0x000007e8
+
+#define VIVS_FE_FENCE_WAIT_DATA_HIGH 0x000007f4
+
+#define VIVS_FE_ROBUSTNESS_UNK007F8 0x000007f8
#define VIVS_GL 0x00000000
@@ -228,6 +251,9 @@
#define VIVS_GL_SEMAPHORE_TOKEN_TO__MASK 0x00001f00
#define VIVS_GL_SEMAPHORE_TOKEN_TO__SHIFT 8
#define VIVS_GL_SEMAPHORE_TOKEN_TO(x) (((x) << VIVS_GL_SEMAPHORE_TOKEN_TO__SHIFT) & VIVS_GL_SEMAPHORE_TOKEN_TO__MASK)
+#define VIVS_GL_SEMAPHORE_TOKEN_UNK28__MASK 0x30000000
+#define VIVS_GL_SEMAPHORE_TOKEN_UNK28__SHIFT 28
+#define VIVS_GL_SEMAPHORE_TOKEN_UNK28(x) (((x) << VIVS_GL_SEMAPHORE_TOKEN_UNK28__SHIFT) & VIVS_GL_SEMAPHORE_TOKEN_UNK28__MASK)
#define VIVS_GL_FLUSH_CACHE 0x0000380c
#define VIVS_GL_FLUSH_CACHE_DEPTH 0x00000001
@@ -237,6 +263,10 @@
#define VIVS_GL_FLUSH_CACHE_TEXTUREVS 0x00000010
#define VIVS_GL_FLUSH_CACHE_SHADER_L1 0x00000020
#define VIVS_GL_FLUSH_CACHE_SHADER_L2 0x00000040
+#define VIVS_GL_FLUSH_CACHE_UNK10 0x00000400
+#define VIVS_GL_FLUSH_CACHE_UNK11 0x00000800
+#define VIVS_GL_FLUSH_CACHE_DESCRIPTOR_UNK12 0x00001000
+#define VIVS_GL_FLUSH_CACHE_DESCRIPTOR_UNK13 0x00002000
#define VIVS_GL_FLUSH_MMU 0x00003810
#define VIVS_GL_FLUSH_MMU_FLUSH_FEMMU 0x00000001
@@ -298,6 +328,8 @@
#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__SHIFT 28
#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR7(x) (((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__MASK)
+#define VIVS_GL_OCCLUSION_QUERY_ADDR 0x00003824
+
#define VIVS_GL_VARYING_COMPONENT_USE(i0) (0x00003828 + 0x4*(i0))
#define VIVS_GL_VARYING_COMPONENT_USE__ESIZE 0x00000004
#define VIVS_GL_VARYING_COMPONENT_USE__LEN 0x00000002
@@ -350,6 +382,10 @@
#define VIVS_GL_VARYING_COMPONENT_USE_COMP15__SHIFT 30
#define VIVS_GL_VARYING_COMPONENT_USE_COMP15(x) (((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP15__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP15__MASK)
+#define VIVS_GL_UNK0382C 0x0000382c
+
+#define VIVS_GL_OCCLUSION_QUERY_CONTROL 0x00003830
+
#define VIVS_GL_UNK03834 0x00003834
#define VIVS_GL_UNK03838 0x00003838
@@ -363,8 +399,44 @@
#define VIVS_GL_UNK03854 0x00003854
+#define VIVS_GL_BUG_FIXES 0x00003860
+
+#define VIVS_GL_FENCE_OUT_ADDRESS 0x00003868
+
+#define VIVS_GL_FENCE_OUT_DATA_LOW 0x0000386c
+
+#define VIVS_GL_HALTI5_UNK03884 0x00003884
+
+#define VIVS_GL_HALTI5_UNK03888 0x00003888
+
+#define VIVS_GL_GS_UNK0388C 0x0000388c
+
+#define VIVS_GL_FENCE_OUT_DATA_HIGH 0x00003898
+
+#define VIVS_GL_SHADER_INDEX 0x0000389c
+
+#define VIVS_GL_GS_UNK038A0(i0) (0x000038a0 + 0x4*(i0))
+#define VIVS_GL_GS_UNK038A0__ESIZE 0x00000004
+#define VIVS_GL_GS_UNK038A0__LEN 0x00000008
+
+#define VIVS_GL_HALTI5_UNK038C0(i0) (0x000038c0 + 0x4*(i0))
+#define VIVS_GL_HALTI5_UNK038C0__ESIZE 0x00000004
+#define VIVS_GL_HALTI5_UNK038C0__LEN 0x00000010
+
+#define VIVS_GL_SECURITY_UNK3900 0x00003900
+
+#define VIVS_GL_SECURITY_UNK3904 0x00003904
+
#define VIVS_GL_UNK03A00 0x00003a00
+#define VIVS_GL_UNK03A04 0x00003a04
+
+#define VIVS_GL_UNK03A08 0x00003a08
+
+#define VIVS_GL_UNK03A0C 0x00003a0c
+
+#define VIVS_GL_UNK03A10 0x00003a10
+
#define VIVS_GL_STALL_TOKEN 0x00003c00
#define VIVS_GL_STALL_TOKEN_FROM__MASK 0x0000001f
#define VIVS_GL_STALL_TOKEN_FROM__SHIFT 0
@@ -387,6 +459,99 @@
#define VIVS_NFE_VERTEX_STREAMS_UNK14680(i0) (0x00014680 + 0x4*(i0))
+#define VIVS_NFE_VERTEX_STREAMS_ROBUSTNESS_UNK146C0(i0) (0x000146c0 + 0x4*(i0))
+
+#define VIVS_NFE_HALTI5_UNK17800(i0) (0x00017800 + 0x4*(i0))
+#define VIVS_NFE_HALTI5_UNK17800__ESIZE 0x00000004
+#define VIVS_NFE_HALTI5_UNK17800__LEN 0x00000020
+
+#define VIVS_NFE_HALTI5_UNK17880(i0) (0x00017880 + 0x4*(i0))
+#define VIVS_NFE_HALTI5_UNK17880__ESIZE 0x00000004
+#define VIVS_NFE_HALTI5_UNK17880__LEN 0x00000020
+
+#define VIVS_NFE_HALTI5_UNK17900(i0) (0x00017900 + 0x4*(i0))
+#define VIVS_NFE_HALTI5_UNK17900__ESIZE 0x00000004
+#define VIVS_NFE_HALTI5_UNK17900__LEN 0x00000020
+
+#define VIVS_NFE_HALTI5_UNK17980(i0) (0x00017980 + 0x4*(i0))
+#define VIVS_NFE_HALTI5_UNK17980__ESIZE 0x00000004
+#define VIVS_NFE_HALTI5_UNK17980__LEN 0x00000020
+
+#define VIVS_NFE_HALTI5_UNK17A00(i0) (0x00017a00 + 0x4*(i0))
+#define VIVS_NFE_HALTI5_UNK17A00__ESIZE 0x00000004
+#define VIVS_NFE_HALTI5_UNK17A00__LEN 0x00000020
+
+#define VIVS_NFE_HALTI5_UNK17A80(i0) (0x00017a80 + 0x4*(i0))
+#define VIVS_NFE_HALTI5_UNK17A80__ESIZE 0x00000004
+#define VIVS_NFE_HALTI5_UNK17A80__LEN 0x00000020
+
+#define VIVS_BLT 0x00000000
+
+#define VIVS_BLT_UNK14000 0x00014000
+
+#define VIVS_BLT_UNK14008 0x00014008
+
+#define VIVS_BLT_UNK1400C 0x0001400c
+
+#define VIVS_BLT_UNK14010 0x00014010
+
+#define VIVS_BLT_UNK14014 0x00014014
+
+#define VIVS_BLT_UNK14018 0x00014018
+
+#define VIVS_BLT_UNK14020 0x00014020
+
+#define VIVS_BLT_UNK14024 0x00014024
+
+#define VIVS_BLT_UNK14028 0x00014028
+
+#define VIVS_BLT_UNK1402C 0x0001402c
+
+#define VIVS_BLT_UNK14030 0x00014030
+
+#define VIVS_BLT_UNK14034 0x00014034
+
+#define VIVS_BLT_UNK14038 0x00014038
+
+#define VIVS_BLT_UNK1403C 0x0001403c
+
+#define VIVS_BLT_UNK14040 0x00014040
+
+#define VIVS_BLT_UNK14044 0x00014044
+
+#define VIVS_BLT_UNK14048 0x00014048
+
+#define VIVS_BLT_UNK1404C 0x0001404c
+
+#define VIVS_BLT_UNK14050 0x00014050
+
+#define VIVS_BLT_UNK14054 0x00014054
+
+#define VIVS_BLT_UNK14058 0x00014058
+
+#define VIVS_BLT_UNK1405C 0x0001405c
+
+#define VIVS_BLT_UNK14060 0x00014060
+
+#define VIVS_BLT_UNK14064 0x00014064
+
+#define VIVS_BLT_UNK1409C 0x0001409c
+
+#define VIVS_BLT_UNK140A0 0x000140a0
+
+#define VIVS_BLT_FENCE_OUT_ADDRESS 0x000140a4
+
+#define VIVS_BLT_FENCE_OUT_DATA_LOW 0x000140a8
+
+#define VIVS_BLT_UNK140AC 0x000140ac
+
+#define VIVS_BLT_FENCE_OUT_DATA_HIGH 0x000140b4
+
+#define VIVS_BLT_ENABLE 0x000140b8
+#define VIVS_BLT_ENABLE_ENABLE 0x00000001
+
+#define VIVS_BLT_UNK140BC 0x000140bc
+
#define VIVS_DUMMY 0x00000000
#define VIVS_DUMMY_DUMMY 0x0003fffc
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/Makefile.in mesa-17.3.3/src/gallium/drivers/etnaviv/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/etnaviv/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -125,7 +125,8 @@
noinst_PROGRAMS = etnaviv_compiler$(EXEEXT)
subdir = src/gallium/drivers/etnaviv
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -146,11 +147,11 @@
am__objects_1 = etnaviv_asm.lo etnaviv_blend.lo etnaviv_clear_blit.lo \
etnaviv_compiler.lo etnaviv_context.lo etnaviv_disasm.lo \
etnaviv_emit.lo etnaviv_fence.lo etnaviv_format.lo \
- etnaviv_query.lo etnaviv_query_sw.lo etnaviv_rasterizer.lo \
- etnaviv_resource.lo etnaviv_rs.lo etnaviv_screen.lo \
- etnaviv_shader.lo etnaviv_state.lo etnaviv_surface.lo \
- etnaviv_texture.lo etnaviv_tiling.lo etnaviv_transfer.lo \
- etnaviv_uniforms.lo etnaviv_zsa.lo
+ etnaviv_query.lo etnaviv_query_hw.lo etnaviv_query_sw.lo \
+ etnaviv_rasterizer.lo etnaviv_resource.lo etnaviv_rs.lo \
+ etnaviv_screen.lo etnaviv_shader.lo etnaviv_state.lo \
+ etnaviv_surface.lo etnaviv_texture.lo etnaviv_tiling.lo \
+ etnaviv_transfer.lo etnaviv_uniforms.lo etnaviv_zsa.lo
am_libetnaviv_la_OBJECTS = $(am__objects_1)
libetnaviv_la_OBJECTS = $(am_libetnaviv_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
@@ -371,9 +372,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -427,6 +428,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -438,12 +441,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -556,6 +562,8 @@
etnaviv_internal.h \
etnaviv_query.c \
etnaviv_query.h \
+ etnaviv_query_hw.c \
+ etnaviv_query_hw.h \
etnaviv_query_sw.c \
etnaviv_query_sw.h \
etnaviv_rasterizer.c \
@@ -627,6 +635,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
@@ -741,6 +751,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/etnaviv_fence.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/etnaviv_format.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/etnaviv_query.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/etnaviv_query_hw.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/etnaviv_query_sw.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/etnaviv_rasterizer.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/etnaviv_resource.Plo@am__quote@
diff -Nru mesa-17.2.4/src/gallium/drivers/etnaviv/Makefile.sources mesa-17.3.3/src/gallium/drivers/etnaviv/Makefile.sources
--- mesa-17.2.4/src/gallium/drivers/etnaviv/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/etnaviv/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -27,6 +27,8 @@
etnaviv_internal.h \
etnaviv_query.c \
etnaviv_query.h \
+ etnaviv_query_hw.c \
+ etnaviv_query_hw.h \
etnaviv_query_sw.c \
etnaviv_query_sw.h \
etnaviv_rasterizer.c \
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_blend.c mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_blend.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_blend.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_blend.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,6 +27,7 @@
*/
#include "pipe/p_state.h"
+#include "util/u_blend.h"
#include "util/u_string.h"
#include "util/u_memory.h"
@@ -61,11 +62,10 @@
{
const struct pipe_rt_blend_state *rt = &cso->rt[0];
struct fd2_blend_stateobj *so;
+ unsigned rop = PIPE_LOGICOP_COPY;
- if (cso->logicop_enable) {
- DBG("Unsupported! logicop");
- return NULL;
- }
+ if (cso->logicop_enable)
+ rop = cso->logicop_func; /* 1:1 mapping with hw */
if (cso->independent_blend_enable) {
DBG("Unsupported! independent blend state");
@@ -78,16 +78,23 @@
so->base = *cso;
- so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(12);
+ so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(rop);
- so->rb_blendcontrol =
+ so->rb_blendcontrol_rgb =
A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(fd_blend_factor(rt->rgb_src_factor)) |
A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
- A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(fd_blend_factor(rt->rgb_dst_factor)) |
+ A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(fd_blend_factor(rt->rgb_dst_factor));
+
+ so->rb_blendcontrol_alpha =
A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(fd_blend_factor(rt->alpha_src_factor)) |
A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(fd_blend_factor(rt->alpha_dst_factor));
+ so->rb_blendcontrol_no_alpha_rgb =
+ A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) |
+ A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
+ A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor)));
+
if (rt->colormask & PIPE_MASK_R)
so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_RED;
if (rt->colormask & PIPE_MASK_G)
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_blend.h mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
--- mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_blend.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_blend.h 2018-01-18 21:30:28.000000000 +0000
@@ -34,7 +34,9 @@
struct fd2_blend_stateobj {
struct pipe_blend_state base;
- uint32_t rb_blendcontrol;
+ uint32_t rb_blendcontrol_rgb;
+ uint32_t rb_blendcontrol_alpha;
+ uint32_t rb_blendcontrol_no_alpha_rgb;
uint32_t rb_colorcontrol; /* must be OR'd w/ zsa->rb_colorcontrol */
uint32_t rb_colormask;
};
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_emit.c mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_emit.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.c 2018-01-18 21:30:28.000000000 +0000
@@ -217,7 +217,7 @@
OUT_RING(ring, zsa->rb_alpha_ref);
}
- if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+ if (ctx->rasterizer && dirty & FD_DIRTY_RASTERIZER) {
struct fd2_rasterizer_stateobj *rasterizer =
fd2_rasterizer_stateobj(ctx->rasterizer);
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
@@ -299,16 +299,31 @@
OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
}
- if (dirty & FD_DIRTY_BLEND) {
+ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
+ enum pipe_format format =
+ pipe_surface_format(ctx->batch->framebuffer.cbufs[0]);
+ bool has_alpha = util_format_has_alpha(format);
+
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
- OUT_RING(ring, blend->rb_blendcontrol);
+ OUT_RING(ring, blend->rb_blendcontrol_alpha |
+ COND(has_alpha, blend->rb_blendcontrol_rgb) |
+ COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, blend->rb_colormask);
}
+ if (dirty & FD_DIRTY_BLEND_COLOR) {
+ OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+ OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
+ OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[0]));
+ OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[1]));
+ OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[2]));
+ OUT_RING(ring, float_to_ubyte(ctx->blend_color.color[3]));
+ }
+
if (dirty & (FD_DIRTY_TEX | FD_DIRTY_PROG))
emit_textures(ring, ctx);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c 2018-01-18 21:30:28.000000000 +0000
@@ -46,6 +46,12 @@
{
switch (format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
/* TODO probably some more.. */
return 1;
default:
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_screen.c mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/a2xx/fd2_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/a2xx/fd2_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -52,8 +52,15 @@
/* TODO figure out how to render to other formats.. */
if ((usage & PIPE_BIND_RENDER_TARGET) &&
- ((format != PIPE_FORMAT_B8G8R8A8_UNORM) &&
- (format != PIPE_FORMAT_B8G8R8X8_UNORM))) {
+ ((format != PIPE_FORMAT_B5G6R5_UNORM) &&
+ (format != PIPE_FORMAT_B5G5R5A1_UNORM) &&
+ (format != PIPE_FORMAT_B5G5R5X1_UNORM) &&
+ (format != PIPE_FORMAT_B4G4R4A4_UNORM) &&
+ (format != PIPE_FORMAT_B4G4R4X4_UNORM) &&
+ (format != PIPE_FORMAT_B8G8R8A8_UNORM) &&
+ (format != PIPE_FORMAT_B8G8R8X8_UNORM) &&
+ (format != PIPE_FORMAT_R8G8B8A8_UNORM) &&
+ (format != PIPE_FORMAT_R8G8B8X8_UNORM))) {
DBG("not supported render target: format=%s, target=%d, sample_count=%d, usage=%x",
util_format_name(format), target, sample_count, usage);
return FALSE;
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/a3xx/fd3_query.c mesa-17.3.3/src/gallium/drivers/freedreno/a3xx/fd3_query.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/a3xx/fd3_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/a3xx/fd3_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -131,6 +131,13 @@
.accumulate_result = occlusion_predicate_accumulate_result,
};
+static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
+ .active = FD_STAGE_DRAW,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
+};
+
void fd3_query_context_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
@@ -142,4 +149,5 @@
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/a4xx/fd4_query.c mesa-17.3.3/src/gallium/drivers/freedreno/a4xx/fd4_query.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/a4xx/fd4_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/a4xx/fd4_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -251,6 +251,13 @@
.accumulate_result = occlusion_predicate_accumulate_result,
};
+static const struct fd_hw_sample_provider occlusion_predicate_conservative = {
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
+ .active = FD_STAGE_DRAW,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
+};
+
static const struct fd_hw_sample_provider time_elapsed = {
.query_type = PIPE_QUERY_TIME_ELAPSED,
.active = FD_STAGE_DRAW | FD_STAGE_CLEAR,
@@ -284,6 +291,7 @@
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_hw_query_register_provider(pctx, &time_elapsed);
fd_hw_query_register_provider(pctx, ×tamp);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/a5xx/fd5_query.c mesa-17.3.3/src/gallium/drivers/freedreno/a5xx/fd5_query.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/a5xx/fd5_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/a5xx/fd5_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -144,6 +144,15 @@
.result = occlusion_predicate_result,
};
+static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
+ .active = FD_STAGE_DRAW,
+ .size = sizeof(struct fd5_query_sample),
+ .resume = occlusion_resume,
+ .pause = occlusion_pause,
+ .result = occlusion_predicate_result,
+};
+
/*
* Timestamp Queries:
*/
@@ -247,6 +256,7 @@
fd_acc_query_register_provider(pctx, &occlusion_counter);
fd_acc_query_register_provider(pctx, &occlusion_predicate);
+ fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
fd_acc_query_register_provider(pctx, &time_elapsed);
fd_acc_query_register_provider(pctx, ×tamp);
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/freedreno_batch.h mesa-17.3.3/src/gallium/drivers/freedreno/freedreno_batch.h
--- mesa-17.2.4/src/gallium/drivers/freedreno/freedreno_batch.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/freedreno_batch.h 2018-01-18 21:30:28.000000000 +0000
@@ -55,7 +55,7 @@
FD_STAGE_ALL = 0xff,
};
-#define MAX_HW_SAMPLE_PROVIDERS 4
+#define MAX_HW_SAMPLE_PROVIDERS 5
struct fd_hw_sample_provider;
struct fd_hw_sample;
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/freedreno_query.h mesa-17.3.3/src/gallium/drivers/freedreno/freedreno_query.h
--- mesa-17.2.4/src/gallium/drivers/freedreno/freedreno_query.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/freedreno_query.h 2018-01-18 21:30:28.000000000 +0000
@@ -86,13 +86,15 @@
return 0;
case PIPE_QUERY_OCCLUSION_PREDICATE:
return 1;
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ return 2;
/* TODO currently queries only emitted in main pass (not in binning pass)..
* which is fine for occlusion query, but pretty much not anything else.
*/
case PIPE_QUERY_TIME_ELAPSED:
- return 2;
- case PIPE_QUERY_TIMESTAMP:
return 3;
+ case PIPE_QUERY_TIMESTAMP:
+ return 4;
default:
return -1;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/freedreno_screen.c mesa-17.3.3/src/gallium/drivers/freedreno/freedreno_screen.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/freedreno_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/freedreno_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -321,6 +321,12 @@
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -514,6 +520,7 @@
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
@@ -523,6 +530,10 @@
if (glsl120)
return 0;
return is_ir3(screen) ? 1 : 0;
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ return 0;
+ case PIPE_SHADER_CAP_FP16:
+ return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16;
@@ -875,8 +886,6 @@
slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16);
- util_format_s3tc_init();
-
return pscreen;
fail:
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c mesa-17.3.3/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 2018-01-18 21:30:28.000000000 +0000
@@ -46,6 +46,7 @@
#include "compiler/glsl/standalone.h"
#include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/nir_types.h"
static void dump_info(struct ir3_shader_variant *so, const char *str)
{
@@ -57,8 +58,6 @@
free(bin);
}
-int st_glsl_type_size(const struct glsl_type *type);
-
static void
insert_sorted(struct exec_list *var_list, nir_variable *new_var)
{
@@ -131,7 +130,7 @@
case MESA_SHADER_VERTEX:
nir_assign_var_locations(&nir->inputs,
&nir->num_inputs,
- st_glsl_type_size);
+ ir3_glsl_type_size);
/* Re-lower global vars, to deal with any dead VS inputs. */
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
@@ -139,18 +138,18 @@
sort_varyings(&nir->outputs);
nir_assign_var_locations(&nir->outputs,
&nir->num_outputs,
- st_glsl_type_size);
+ ir3_glsl_type_size);
fixup_varying_slots(&nir->outputs);
break;
case MESA_SHADER_FRAGMENT:
sort_varyings(&nir->inputs);
nir_assign_var_locations(&nir->inputs,
&nir->num_inputs,
- st_glsl_type_size);
+ ir3_glsl_type_size);
fixup_varying_slots(&nir->inputs);
nir_assign_var_locations(&nir->outputs,
&nir->num_outputs,
- st_glsl_type_size);
+ ir3_glsl_type_size);
break;
default:
errx(1, "unhandled shader stage: %d", stage);
@@ -158,10 +157,10 @@
nir_assign_var_locations(&nir->uniforms,
&nir->num_uniforms,
- st_glsl_type_size);
+ ir3_glsl_type_size);
NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size, 0);
+ NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0);
NIR_PASS_V(nir, nir_lower_samplers, prog);
return nir;
@@ -400,7 +399,7 @@
v.key = key;
v.shader = &s;
- switch (nir->stage) {
+ switch (nir->info.stage) {
case MESA_SHADER_FRAGMENT:
s.type = v.type = SHADER_FRAGMENT;
break;
@@ -411,7 +410,7 @@
s.type = v.type = SHADER_COMPUTE;
break;
default:
- errx(1, "unhandled shader stage: %d", nir->stage);
+ errx(1, "unhandled shader stage: %d", nir->info.stage);
}
info = "NIR compiler";
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/ir3/ir3_nir.c mesa-17.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/ir3/ir3_nir.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.c 2018-01-18 21:30:28.000000000 +0000
@@ -167,11 +167,11 @@
OPT_V(s, nir_lower_regs_to_ssa);
if (key) {
- if (s->stage == MESA_SHADER_VERTEX) {
+ if (s->info.stage == MESA_SHADER_VERTEX) {
OPT_V(s, nir_lower_clip_vs, key->ucp_enables);
if (key->vclamp_color)
OPT_V(s, nir_lower_clamp_color_outputs);
- } else if (s->stage == MESA_SHADER_FRAGMENT) {
+ } else if (s->info.stage == MESA_SHADER_FRAGMENT) {
OPT_V(s, nir_lower_clip_fs, key->ucp_enables);
if (key->fclamp_color)
OPT_V(s, nir_lower_clamp_color_outputs);
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/ir3/ir3_shader.c mesa-17.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.c
--- mesa-17.2.4/src/gallium/drivers/freedreno/ir3/ir3_shader.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.c 2018-01-18 21:30:28.000000000 +0000
@@ -41,6 +41,12 @@
#include "ir3_compiler.h"
#include "ir3_nir.h"
+int
+ir3_glsl_type_size(const struct glsl_type *type)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
static void
delete_variant(struct ir3_shader_variant *v)
{
@@ -290,6 +296,9 @@
if (cso->type == PIPE_SHADER_IR_NIR) {
/* we take ownership of the reference: */
nir = cso->ir.nir;
+
+ NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
+ (nir_lower_io_options)0);
} else {
debug_assert(cso->type == PIPE_SHADER_IR_TGSI);
if (fd_mesa_debug & FD_DBG_DISASM) {
@@ -336,6 +345,9 @@
if (cso->ir_type == PIPE_SHADER_IR_NIR) {
/* we take ownership of the reference: */
nir = (nir_shader *)cso->prog;
+
+ NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
+ (nir_lower_io_options)0);
} else {
debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI);
if (fd_mesa_debug & FD_DBG_DISASM) {
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/ir3/ir3_shader.h mesa-17.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.h
--- mesa-17.2.4/src/gallium/drivers/freedreno/ir3/ir3_shader.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.h 2018-01-18 21:30:28.000000000 +0000
@@ -36,6 +36,8 @@
#include "ir3.h"
#include "disasm.h"
+struct glsl_type;
+
/* driver param indices: */
enum ir3_driver_param {
/* compute shader driver params: */
@@ -339,6 +341,9 @@
void ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
struct fd_context *ctx, const struct pipe_grid_info *info);
+int
+ir3_glsl_type_size(const struct glsl_type *type);
+
static inline const char *
ir3_shader_stage(struct ir3_shader *shader)
{
diff -Nru mesa-17.2.4/src/gallium/drivers/freedreno/Makefile.in mesa-17.3.3/src/gallium/drivers/freedreno/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/freedreno/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/freedreno/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -103,7 +103,8 @@
noinst_PROGRAMS = ir3_compiler$(EXEEXT)
subdir = src/gallium/drivers/freedreno
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -397,9 +398,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -453,6 +454,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -464,12 +467,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -772,6 +778,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/i915/i915_fpc_optimize.c mesa-17.3.3/src/gallium/drivers/i915/i915_fpc_optimize.c
--- mesa-17.2.4/src/gallium/drivers/i915/i915_fpc_optimize.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/i915/i915_fpc_optimize.c 2018-01-18 21:30:28.000000000 +0000
@@ -85,7 +85,6 @@
[ TGSI_OPCODE_DP2 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
[ TGSI_OPCODE_DP3 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
[ TGSI_OPCODE_DP4 ] = { false, true, TGSI_SWIZZLE_ONE, 1, 2 },
- [ TGSI_OPCODE_DPH ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_DST ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_END ] = { false, false, 0, 0, 0 },
[ TGSI_OPCODE_EX2 ] = { false, false, 0, 1, 1 },
@@ -106,7 +105,6 @@
[ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 },
[ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 },
- [ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 },
[ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 },
@@ -119,7 +117,6 @@
[ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 },
[ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 },
[ TGSI_OPCODE_TXP ] = { true, false, 0, 1, 2 },
- [ TGSI_OPCODE_XPD ] = { false, false, 0, 1, 2 },
};
static boolean op_has_dst(unsigned opcode)
diff -Nru mesa-17.2.4/src/gallium/drivers/i915/i915_fpc_translate.c mesa-17.3.3/src/gallium/drivers/i915/i915_fpc_translate.c
--- mesa-17.2.4/src/gallium/drivers/i915/i915_fpc_translate.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/i915/i915_fpc_translate.c 2018-01-18 21:30:28.000000000 +0000
@@ -77,21 +77,6 @@
0
};
-
-/* 1, -1/3!, 1/5!, -1/7! */
-static const float scs_sin_constants[4] = { 1.0,
- -1.0f / (3 * 2 * 1),
- 1.0f / (5 * 4 * 3 * 2 * 1),
- -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
-};
-
-/* 1, -1/2!, 1/4!, -1/6! */
-static const float scs_cos_constants[4] = { 1.0,
- -1.0f / (2 * 1),
- 1.0f / (4 * 3 * 2 * 1),
- -1.0f / (6 * 5 * 4 * 3 * 2 * 1)
-};
-
/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */
static const float sin_constants[4] = { 2.0 * M_PI,
-8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1),
@@ -495,7 +480,6 @@
const struct i915_full_instruction *inst,
struct i915_fragment_shader *fs)
{
- uint writemask;
uint src0, src1, src2, flags;
uint tmp = 0;
@@ -604,17 +588,6 @@
emit_simple_arith(p, inst, A0_DP4, 2, fs);
break;
- case TGSI_OPCODE_DPH:
- src0 = src_vector(p, &inst->Src[0], fs);
- src1 = src_vector(p, &inst->Src[1], fs);
-
- i915_emit_arith(p,
- A0_DP4,
- get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- swizzle(src0, X, Y, Z, ONE), src1, 0);
- break;
-
case TGSI_OPCODE_DST:
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
@@ -814,70 +787,6 @@
swizzle(src0, X, X, X, X), 0, 0);
break;
- case TGSI_OPCODE_SCS:
- src0 = src_vector(p, &inst->Src[0], fs);
- tmp = i915_get_utemp(p);
-
- /*
- * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
- * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
- * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
- * scs.x = DP4 t1, scs_sin_constants
- * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
- * scs.y = DP4 t1, scs_cos_constants
- */
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_XY, 0,
- swizzle(src0, X, X, ONE, ONE),
- swizzle(src0, X, ONE, ONE, ONE), 0);
-
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_ALL, 0,
- swizzle(tmp, X, Y, X, Y),
- swizzle(tmp, X, X, ONE, ONE), 0);
-
- writemask = inst->Dst[0].Register.WriteMask;
-
- if (writemask & TGSI_WRITEMASK_Y) {
- uint tmp1;
-
- if (writemask & TGSI_WRITEMASK_X)
- tmp1 = i915_get_utemp(p);
- else
- tmp1 = tmp;
-
- i915_emit_arith(p,
- A0_MUL,
- tmp1, A0_DEST_CHANNEL_ALL, 0,
- swizzle(tmp, X, Y, Y, W),
- swizzle(tmp, X, Z, ONE, ONE), 0);
-
- i915_emit_arith(p,
- A0_DP4,
- get_result_vector(p, &inst->Dst[0]),
- A0_DEST_CHANNEL_Y, 0,
- swizzle(tmp1, W, Z, Y, X),
- i915_emit_const4fv(p, scs_sin_constants), 0);
- }
-
- if (writemask & TGSI_WRITEMASK_X) {
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_XYZ, 0,
- swizzle(tmp, X, X, Z, ONE),
- swizzle(tmp, Z, ONE, ONE, ONE), 0);
-
- i915_emit_arith(p,
- A0_DP4,
- get_result_vector(p, &inst->Dst[0]),
- A0_DEST_CHANNEL_X, 0,
- swizzle(tmp, ONE, Z, Y, X),
- i915_emit_const4fv(p, scs_cos_constants), 0);
- }
- break;
-
case TGSI_OPCODE_SEQ:
/* if we're both >= and <= then we're == */
src0 = src_vector(p, &inst->Src[0], fs);
@@ -1038,32 +947,6 @@
emit_tex(p, inst, T0_TEXLDP, fs);
break;
- case TGSI_OPCODE_XPD:
- /* Cross product:
- * result.x = src0.y * src1.z - src0.z * src1.y;
- * result.y = src0.z * src1.x - src0.x * src1.z;
- * result.z = src0.x * src1.y - src0.y * src1.x;
- * result.w = undef;
- */
- src0 = src_vector(p, &inst->Src[0], fs);
- src1 = src_vector(p, &inst->Src[1], fs);
- tmp = i915_get_utemp(p);
-
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_ALL, 0,
- swizzle(src0, Z, X, Y, ONE),
- swizzle(src1, Y, Z, X, ONE), 0);
-
- i915_emit_arith(p,
- A0_MAD,
- get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- swizzle(src0, Y, Z, X, ONE),
- swizzle(src1, Z, X, Y, ONE),
- negate(tmp, 1, 1, 1, 0));
- break;
-
default:
i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
p->error = 1;
diff -Nru mesa-17.2.4/src/gallium/drivers/i915/i915_screen.c mesa-17.3.3/src/gallium/drivers/i915/i915_screen.c
--- mesa-17.2.4/src/gallium/drivers/i915/i915_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/i915/i915_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -157,12 +157,15 @@
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
case PIPE_SHADER_CAP_INTEGERS:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return I915_TEX_UNITS;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 0;
@@ -310,6 +313,12 @@
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -621,7 +630,5 @@
i915_debug_init(is);
- util_format_s3tc_init();
-
return &is->base;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/i915/i915_state_derived.c mesa-17.3.3/src/gallium/drivers/i915/i915_state_derived.c
--- mesa-17.2.4/src/gallium/drivers/i915/i915_state_derived.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/i915/i915_state_derived.c 2018-01-18 21:30:28.000000000 +0000
@@ -216,6 +216,23 @@
if (I915_DBG_ON(DBG_ATOMS))
i915_dump_dirty(i915, __FUNCTION__);
+ if (!i915->fs) {
+ i915->dirty &= ~(I915_NEW_FS_CONSTANTS | I915_NEW_FS);
+ i915->hardware_dirty &= ~(I915_HW_PROGRAM | I915_HW_CONSTANTS);
+ }
+
+ if (!i915->vs)
+ i915->dirty &= ~I915_NEW_VS;
+
+ if (!i915->blend)
+ i915->dirty &= ~I915_NEW_BLEND;
+
+ if (!i915->rasterizer)
+ i915->dirty &= ~I915_NEW_RASTERIZER;
+
+ if (!i915->depth_stencil)
+ i915->dirty &= ~I915_NEW_DEPTH_STENCIL;
+
for (i = 0; atoms[i]; i++)
if (atoms[i]->dirty & i915->dirty)
atoms[i]->update(i915);
diff -Nru mesa-17.2.4/src/gallium/drivers/i915/i915_state_dynamic.c mesa-17.3.3/src/gallium/drivers/i915/i915_state_dynamic.c
--- mesa-17.2.4/src/gallium/drivers/i915/i915_state_dynamic.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/i915/i915_state_dynamic.c 2018-01-18 21:30:28.000000000 +0000
@@ -213,7 +213,8 @@
/* I915_NEW_RASTERIZER
*/
- st[1] |= i915->rasterizer->st;
+ if (i915->rasterizer)
+ st[1] |= i915->rasterizer->st;
/* I915_NEW_STIPPLE
*/
diff -Nru mesa-17.2.4/src/gallium/drivers/i915/i915_state_immediate.c mesa-17.3.3/src/gallium/drivers/i915/i915_state_immediate.c
--- mesa-17.2.4/src/gallium/drivers/i915/i915_state_immediate.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/i915/i915_state_immediate.c 2018-01-18 21:30:28.000000000 +0000
@@ -168,11 +168,13 @@
/* I915_NEW_BLEND
*/
- LIS6 |= i915->blend->LIS6;
+ if (i915->blend)
+ LIS6 |= i915->blend->LIS6;
/* I915_NEW_DEPTH
*/
- LIS6 |= i915->depth_stencil->depth_LIS6;
+ if (i915->depth_stencil)
+ LIS6 |= i915->depth_stencil->depth_LIS6;
set_immediate(i915, I915_IMMEDIATE_S6, LIS6);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/i915/i915_state_static.c mesa-17.3.3/src/gallium/drivers/i915/i915_state_static.c
--- mesa-17.2.4/src/gallium/drivers/i915/i915_state_static.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/i915/i915_state_static.c 2018-01-18 21:30:28.000000000 +0000
@@ -216,7 +216,7 @@
zformat = translate_depth_format(depth_surface->format);
if (is->is_i945 && tex->tiling != I915_TILE_NONE
- && !i915->fs->info.writes_z)
+ && (i915->fs && !i915->fs->info.writes_z))
early_z = CLASSIC_EARLY_DEPTH;
} else
zformat = 0;
diff -Nru mesa-17.2.4/src/gallium/drivers/i915/Makefile.in mesa-17.3.3/src/gallium/drivers/i915/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/i915/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/i915/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -123,7 +123,8 @@
subdir = src/gallium/drivers/i915
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -357,9 +358,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -413,6 +414,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -424,12 +427,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -598,6 +604,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/imx/Makefile.in mesa-17.3.3/src/gallium/drivers/imx/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/imx/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/imx/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -101,7 +101,8 @@
subdir = src/gallium/drivers/imx
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -305,9 +306,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -361,6 +362,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -372,12 +375,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -503,6 +509,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_limits.h mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_limits.h
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_limits.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_limits.h 2018-01-18 21:30:28.000000000 +0000
@@ -78,10 +78,8 @@
/**
* Max number of instructions (for all fragment shaders combined per context)
* that will be kept around (counted in terms of llvm ir).
- * Note: the definition looks odd, but there's branches which use a different
- * number of max shader variants.
*/
-#define LP_MAX_SHADER_INSTRUCTIONS MAX2(256*1024, 512*LP_MAX_SHADER_VARIANTS)
+#define LP_MAX_SHADER_INSTRUCTIONS (2048 * LP_MAX_SHADER_VARIANTS)
/**
* Max number of setup variants that will be kept around.
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_query.c mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_query.c
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -125,6 +125,7 @@
}
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
for (i = 0; i < num_threads; i++) {
/* safer (still not guaranteed) when there's an overflow */
vresult->b = vresult->b || pq->end[i];
@@ -155,6 +156,7 @@
*result = pq->num_primitives_written;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
vresult->b = pq->num_primitives_generated > pq->num_primitives_written;
break;
case PIPE_QUERY_SO_STATISTICS: {
@@ -215,6 +217,7 @@
pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written;
pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed;
break;
@@ -229,6 +232,7 @@
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
llvmpipe->active_occlusion_queries++;
llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
break;
@@ -264,6 +268,7 @@
llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
pq->num_primitives_written =
llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written;
pq->num_primitives_generated =
@@ -291,6 +296,7 @@
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
assert(llvmpipe->active_occlusion_queries);
llvmpipe->active_occlusion_queries--;
llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_rast.c mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_rast.c
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_rast.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_rast.c 2018-01-18 21:30:28.000000000 +0000
@@ -486,6 +486,7 @@
switch (pq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
pq->start[task->thread_index] = task->thread_data.vis_counter;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
@@ -512,6 +513,7 @@
switch (pq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
pq->end[task->thread_index] +=
task->thread_data.vis_counter - pq->start[task->thread_index];
pq->start[task->thread_index] = 0;
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_screen.c mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_screen.c
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -132,7 +132,7 @@
case PIPE_CAP_QUERY_TIMESTAMP:
return 1;
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
- return 0;
+ return 1;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
@@ -252,7 +252,6 @@
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;
case PIPE_CAP_TEXTURE_GATHER_SM5:
- case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
return 0;
@@ -265,11 +264,13 @@
return 1;
case PIPE_CAP_FAKE_SW_MSAA:
return 1;
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
return 1;
case PIPE_CAP_VENDOR_ID:
@@ -356,6 +357,11 @@
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
}
/* should only get here on unhandled cases */
@@ -528,10 +534,6 @@
format != PIPE_FORMAT_ETC1_RGB8)
return FALSE;
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- return util_format_s3tc_enabled;
- }
-
/*
* Everything can be supported by u_format
* (those without fetch_rgba_float might be not but shouldn't hit that)
@@ -682,7 +684,5 @@
}
(void) mtx_init(&screen->rast_mutex, mtx_plain);
- util_format_s3tc_init();
-
return &screen->base;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_setup.c mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_setup.c
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_setup.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_setup.c 2018-01-18 21:30:28.000000000 +0000
@@ -1380,6 +1380,7 @@
if (!(pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ pq->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
pq->type == PIPE_QUERY_PIPELINE_STATISTICS))
return;
@@ -1430,6 +1431,7 @@
if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ pq->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
pq->type == PIPE_QUERY_PIPELINE_STATISTICS ||
pq->type == PIPE_QUERY_TIMESTAMP) {
if (pq->type == PIPE_QUERY_TIMESTAMP &&
@@ -1466,6 +1468,7 @@
*/
if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ pq->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
pq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
unsigned i;
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_state_fs.c mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_state_fs.c
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_state_fs.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_state_fs.c 2018-01-18 21:30:28.000000000 +0000
@@ -84,6 +84,7 @@
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
#include "gallivm/lp_bld_pack.h"
#include "gallivm/lp_bld_format.h"
#include "gallivm/lp_bld_quad.h"
@@ -347,7 +348,8 @@
if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
if (key->alpha.enabled ||
key->blend.alpha_to_coverage ||
- shader->info.base.uses_kill) {
+ shader->info.base.uses_kill ||
+ shader->info.base.writes_samplemask) {
/* With alpha test and kill, can do the depth test early
* and hopefully eliminate some quads. But need to do a
* special deferred depth write once the final mask value
@@ -516,6 +518,25 @@
}
}
+ if (shader->info.base.writes_samplemask) {
+ int smaski = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_SAMPLEMASK,
+ 0);
+ LLVMValueRef smask;
+ struct lp_build_context smask_bld;
+ lp_build_context_init(&smask_bld, gallivm, int_type);
+
+ assert(smaski >= 0);
+ smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
+ /*
+ * Pixel is alive according to the first sample in the mask.
+ */
+ smask = LLVMBuildBitCast(builder, smask, smask_bld.vec_type, "");
+ smask = lp_build_and(&smask_bld, smask, smask_bld.one);
+ smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, smask, smask_bld.zero);
+ lp_build_mask_update(&mask, smask);
+ }
+
/* Late Z test */
if (depth_mode & LATE_DEPTH_TEST) {
int pos0 = find_output_by_semantic(&shader->info.base,
@@ -2679,23 +2700,23 @@
debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
}
if (key->depth.enabled) {
- debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE));
+ debug_printf("depth.func = %s\n", util_str_func(key->depth.func, TRUE));
debug_printf("depth.writemask = %u\n", key->depth.writemask);
}
for (i = 0; i < 2; ++i) {
if (key->stencil[i].enabled) {
- debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE));
- debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE));
- debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE));
- debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE));
+ debug_printf("stencil[%u].func = %s\n", i, util_str_func(key->stencil[i].func, TRUE));
+ debug_printf("stencil[%u].fail_op = %s\n", i, util_str_stencil_op(key->stencil[i].fail_op, TRUE));
+ debug_printf("stencil[%u].zpass_op = %s\n", i, util_str_stencil_op(key->stencil[i].zpass_op, TRUE));
+ debug_printf("stencil[%u].zfail_op = %s\n", i, util_str_stencil_op(key->stencil[i].zfail_op, TRUE));
debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask);
debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask);
}
}
if (key->alpha.enabled) {
- debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
+ debug_printf("alpha.func = %s\n", util_str_func(key->alpha.func, TRUE));
}
if (key->occlusion_count) {
@@ -2703,15 +2724,15 @@
}
if (key->blend.logicop_enable) {
- debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE));
+ debug_printf("blend.logicop_func = %s\n", util_str_logicop(key->blend.logicop_func, TRUE));
}
else if (key->blend.rt[0].blend_enable) {
- debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE));
- debug_printf("blend.rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE));
- debug_printf("blend.rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE));
- debug_printf("blend.alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE));
- debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE));
- debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
+ debug_printf("blend.rgb_func = %s\n", util_str_blend_func (key->blend.rt[0].rgb_func, TRUE));
+ debug_printf("blend.rgb_src_factor = %s\n", util_str_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE));
+ debug_printf("blend.rgb_dst_factor = %s\n", util_str_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE));
+ debug_printf("blend.alpha_func = %s\n", util_str_blend_func (key->blend.rt[0].alpha_func, TRUE));
+ debug_printf("blend.alpha_src_factor = %s\n", util_str_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE));
+ debug_printf("blend.alpha_dst_factor = %s\n", util_str_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
}
debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
if (key->blend.alpha_to_coverage) {
@@ -2721,17 +2742,17 @@
const struct lp_static_sampler_state *sampler = &key->state[i].sampler_state;
debug_printf("sampler[%u] = \n", i);
debug_printf(" .wrap = %s %s %s\n",
- util_dump_tex_wrap(sampler->wrap_s, TRUE),
- util_dump_tex_wrap(sampler->wrap_t, TRUE),
- util_dump_tex_wrap(sampler->wrap_r, TRUE));
+ util_str_tex_wrap(sampler->wrap_s, TRUE),
+ util_str_tex_wrap(sampler->wrap_t, TRUE),
+ util_str_tex_wrap(sampler->wrap_r, TRUE));
debug_printf(" .min_img_filter = %s\n",
- util_dump_tex_filter(sampler->min_img_filter, TRUE));
+ util_str_tex_filter(sampler->min_img_filter, TRUE));
debug_printf(" .min_mip_filter = %s\n",
- util_dump_tex_mipfilter(sampler->min_mip_filter, TRUE));
+ util_str_tex_mipfilter(sampler->min_mip_filter, TRUE));
debug_printf(" .mag_img_filter = %s\n",
- util_dump_tex_filter(sampler->mag_img_filter, TRUE));
+ util_str_tex_filter(sampler->mag_img_filter, TRUE));
if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
- debug_printf(" .compare_func = %s\n", util_dump_func(sampler->compare_func, TRUE));
+ debug_printf(" .compare_func = %s\n", util_str_func(sampler->compare_func, TRUE));
debug_printf(" .normalized_coords = %u\n", sampler->normalized_coords);
debug_printf(" .min_max_lod_equal = %u\n", sampler->min_max_lod_equal);
debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
@@ -2744,7 +2765,7 @@
debug_printf(" .format = %s\n",
util_format_name(texture->format));
debug_printf(" .target = %s\n",
- util_dump_tex_target(texture->target, TRUE));
+ util_str_tex_target(texture->target, TRUE));
debug_printf(" .level_zero_only = %u\n",
texture->level_zero_only);
debug_printf(" .pot = %u %u %u\n",
@@ -2777,7 +2798,7 @@
const struct lp_fragment_shader_variant_key *key)
{
struct lp_fragment_shader_variant *variant;
- const struct util_format_description *cbuf0_format_desc;
+ const struct util_format_description *cbuf0_format_desc = NULL;
boolean fullcolormask;
char module_name[64];
@@ -2818,7 +2839,8 @@
!key->alpha.enabled &&
!key->blend.alpha_to_coverage &&
!key->depth.enabled &&
- !shader->info.base.uses_kill
+ !shader->info.base.uses_kill &&
+ !shader->info.base.writes_samplemask
? TRUE : FALSE;
if ((shader->info.base.num_tokens <= 1) &&
@@ -2993,14 +3015,13 @@
llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
struct lp_fragment_shader_variant *variant)
{
- if (gallivm_debug & GALLIVM_DEBUG_IR) {
- debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached"
- " #%u v total cached #%u\n",
- variant->shader->no,
- variant->no,
+ if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
+ debug_printf("llvmpipe: del fs #%u var %u v created %u v cached %u "
+ "v total cached %u inst %u total inst %u\n",
+ variant->shader->no, variant->no,
variant->shader->variants_created,
variant->shader->variants_cached,
- lp->nr_fs_variants);
+ lp->nr_fs_variants, variant->nr_instrs, lp->nr_fs_instrs);
}
gallivm_destroy(variant->gallivm);
@@ -3357,7 +3378,7 @@
unsigned i;
unsigned variants_to_cull;
- if (0) {
+ if (LP_DEBUG & DEBUG_FS) {
debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
lp->nr_fs_variants,
lp->nr_fs_instrs,
@@ -3365,14 +3386,22 @@
}
/* First, check if we've exceeded the max number of shader variants.
- * If so, free 25% of them (the least recently used ones).
+ * If so, free 6.25% of them (the least recently used ones).
*/
- variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 4 : 0;
+ variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 16 : 0;
if (variants_to_cull ||
lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
struct pipe_context *pipe = &lp->pipe;
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("Evicting FS: %u fs variants,\t%u total variants,"
+ "\t%u instrs,\t%u instrs/variant\n",
+ shader->variants_cached,
+ lp->nr_fs_variants, lp->nr_fs_instrs,
+ lp->nr_fs_instrs / lp->nr_fs_variants);
+ }
+
/*
* XXX: we need to flush the context until we have some sort of
* reference counting in fragment shaders as they may still be binned
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_test_blend.c mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_test_blend.c
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_test_blend.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_test_blend.c 2018-01-18 21:30:28.000000000 +0000
@@ -95,12 +95,12 @@
fprintf(fp,
"%s\t%s\t%s\t%s\t%s\t%s\n",
- util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
- util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
- util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
- util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
- util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
- util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
+ util_str_blend_func(blend->rt[0].rgb_func, TRUE),
+ util_str_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
+ util_str_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
+ util_str_blend_func(blend->rt[0].alpha_func, TRUE),
+ util_str_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
+ util_str_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
fflush(fp);
}
@@ -119,12 +119,12 @@
fprintf(fp,
" %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
- "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
- "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
- "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
- "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
- "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
- "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
+ "rgb_func", util_str_blend_func(blend->rt[0].rgb_func, TRUE),
+ "rgb_src_factor", util_str_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
+ "rgb_dst_factor", util_str_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
+ "alpha_func", util_str_blend_func(blend->rt[0].alpha_func, TRUE),
+ "alpha_src_factor", util_str_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
+ "alpha_dst_factor", util_str_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
fprintf(fp, " ...\n");
fflush(fp);
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_test_format.c mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_test_format.c
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_test_format.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_test_format.c 2018-01-18 21:30:28.000000000 +0000
@@ -357,8 +357,6 @@
enum pipe_format format;
boolean success = TRUE;
- util_format_s3tc_init();
-
#if USE_TEXTURE_CACHE
cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
#endif
@@ -383,11 +381,6 @@
if (util_format_is_pure_integer(format))
continue;
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
- !util_format_s3tc_enabled) {
- continue;
- }
-
/* only have util fetch func for etc1 */
if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
format != PIPE_FORMAT_ETC1_RGB8) {
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_texture.c mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_texture.c
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/lp_texture.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/lp_texture.c 2018-01-18 21:30:28.000000000 +0000
@@ -303,6 +303,8 @@
FREE(lpr);
return NULL;
}
+
+
static struct pipe_resource *
llvmpipe_resource_create(struct pipe_screen *_screen,
const struct pipe_resource *templat)
@@ -310,6 +312,7 @@
return llvmpipe_resource_create_front(_screen, templat, NULL);
}
+
static void
llvmpipe_resource_destroy(struct pipe_screen *pscreen,
struct pipe_resource *pt)
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/Makefile.in mesa-17.3.3/src/gallium/drivers/llvmpipe/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -127,7 +127,8 @@
lp_test_printf$(EXEEXT)
subdir = src/gallium/drivers/llvmpipe
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -627,9 +628,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -683,6 +684,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -694,12 +697,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -897,6 +903,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/llvmpipe/meson.build mesa-17.3.3/src/gallium/drivers/llvmpipe/meson.build
--- mesa-17.2.4/src/gallium/drivers/llvmpipe/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/llvmpipe/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,116 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_llvmpipe = files(
+ 'lp_bld_alpha.c',
+ 'lp_bld_alpha.h',
+ 'lp_bld_blend_aos.c',
+ 'lp_bld_blend.c',
+ 'lp_bld_blend.h',
+ 'lp_bld_blend_logicop.c',
+ 'lp_bld_depth.c',
+ 'lp_bld_depth.h',
+ 'lp_bld_interp.c',
+ 'lp_bld_interp.h',
+ 'lp_clear.c',
+ 'lp_clear.h',
+ 'lp_context.c',
+ 'lp_context.h',
+ 'lp_debug.h',
+ 'lp_draw_arrays.c',
+ 'lp_fence.c',
+ 'lp_fence.h',
+ 'lp_flush.c',
+ 'lp_flush.h',
+ 'lp_jit.c',
+ 'lp_jit.h',
+ 'lp_limits.h',
+ 'lp_memory.c',
+ 'lp_memory.h',
+ 'lp_perf.c',
+ 'lp_perf.h',
+ 'lp_public.h',
+ 'lp_query.c',
+ 'lp_query.h',
+ 'lp_rast.c',
+ 'lp_rast_debug.c',
+ 'lp_rast.h',
+ 'lp_rast_priv.h',
+ 'lp_rast_tri.c',
+ 'lp_rast_tri_tmp.h',
+ 'lp_scene.c',
+ 'lp_scene.h',
+ 'lp_scene_queue.c',
+ 'lp_scene_queue.h',
+ 'lp_screen.c',
+ 'lp_screen.h',
+ 'lp_setup.c',
+ 'lp_setup_context.h',
+ 'lp_setup.h',
+ 'lp_setup_line.c',
+ 'lp_setup_point.c',
+ 'lp_setup_tri.c',
+ 'lp_setup_vbuf.c',
+ 'lp_state_blend.c',
+ 'lp_state_clip.c',
+ 'lp_state_derived.c',
+ 'lp_state_fs.c',
+ 'lp_state_fs.h',
+ 'lp_state_gs.c',
+ 'lp_state.h',
+ 'lp_state_rasterizer.c',
+ 'lp_state_sampler.c',
+ 'lp_state_setup.c',
+ 'lp_state_setup.h',
+ 'lp_state_so.c',
+ 'lp_state_surface.c',
+ 'lp_state_vertex.c',
+ 'lp_state_vs.c',
+ 'lp_surface.c',
+ 'lp_surface.h',
+ 'lp_tex_sample.c',
+ 'lp_tex_sample.h',
+ 'lp_texture.c',
+ 'lp_texture.h',
+)
+
+libllvmpipe = static_library(
+ 'llvmpipe',
+ files_llvmpipe,
+ c_args : [c_vis_args, c_msvc_compat_args],
+ cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
+ include_directories : [inc_gallium, inc_gallium_aux, inc_include, inc_src],
+ dependencies : dep_llvm,
+ build_by_default : false,
+)
+
+if with_tests and with_gallium_softpipe and with_llvm
+ foreach t : ['lp_test_format', 'lp_test_arit', 'lp_test_blend',
+ 'lp_test_conv', 'lp_test_printf']
+ test(t, executable(
+ t,
+ ['@0@.c'.format(t), 'lp_test_main.c'],
+ dependencies : [dep_llvm, dep_dl, dep_thread, dep_clock],
+ include_directories : [inc_gallium, inc_gallium_aux, inc_include, inc_src],
+ link_with : [libllvmpipe, libgallium, libmesa_util],
+ )
+ )
+ endforeach
+endif
diff -Nru mesa-17.2.4/src/gallium/drivers/noop/Makefile.in mesa-17.3.3/src/gallium/drivers/noop/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/noop/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/noop/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -106,7 +106,8 @@
subdir = src/gallium/drivers/noop
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -333,9 +334,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -389,6 +390,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -400,12 +403,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -536,6 +542,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/noop/meson.build mesa-17.3.3/src/gallium/drivers/noop/meson.build
--- mesa-17.2.4/src/gallium/drivers/noop/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/noop/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,27 @@
+# Copyright © 2017 Dylan Baker
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libnoop = static_library(
+ 'noop',
+ files('noop_pipe.c', 'noop_public.h', 'noop_state.c'),
+ c_args : [c_vis_args],
+ include_directories : [inc_gallium, inc_include, inc_src, inc_gallium_aux],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp mesa-17.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
--- mesa-17.2.4/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -277,7 +277,6 @@
case TGSI_OPCODE_DP3:
return 0x7;
case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
return 0xf;
case TGSI_OPCODE_DST:
@@ -289,7 +288,6 @@
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SCS:
return 0x1;
case TGSI_OPCODE_IF:
case TGSI_OPCODE_UIF:
@@ -347,14 +345,6 @@
return mask;
case TGSI_OPCODE_TXQ:
return 1;
- case TGSI_OPCODE_XPD:
- {
- unsigned int x = 0;
- if (mask & 1) x |= 0x6;
- if (mask & 2) x |= 0x5;
- if (mask & 4) x |= 0x3;
- return x;
- }
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_D2F:
@@ -620,7 +610,6 @@
case TGSI_OPCODE_ISHR:
case TGSI_OPCODE_ISLT:
case TGSI_OPCODE_ISSG:
- case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
case TGSI_OPCODE_MOD:
case TGSI_OPCODE_UARL:
case TGSI_OPCODE_ATOMIMIN:
@@ -845,7 +834,6 @@
NV50_IR_OPCODE_CASE(OR, OR);
NV50_IR_OPCODE_CASE(MOD, MOD);
NV50_IR_OPCODE_CASE(XOR, XOR);
- NV50_IR_OPCODE_CASE(SAD, SAD);
NV50_IR_OPCODE_CASE(TXF, TXF);
NV50_IR_OPCODE_CASE(TXF_LZ, TXF);
NV50_IR_OPCODE_CASE(TXQ, TXQ);
@@ -996,9 +984,6 @@
static uint16_t opcodeToSubOp(uint opcode)
{
switch (opcode) {
- case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL);
- case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL);
- case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL);
case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;
@@ -3198,7 +3183,6 @@
break;
case TGSI_OPCODE_MAD:
case TGSI_OPCODE_UMAD:
- case TGSI_OPCODE_SAD:
case TGSI_OPCODE_FMA:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
@@ -3273,19 +3257,6 @@
mkOp1(op, TYPE_F32, dst0[3], val0);
}
break;
- case TGSI_OPCODE_SCS:
- if (mask & 3) {
- val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
- if (dst0[0])
- mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
- if (dst0[1])
- mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
- }
- if (dst0[2])
- loadImm(dst0[2], 0.0f);
- if (dst0[3])
- loadImm(dst0[3], 1.0f);
- break;
case TGSI_OPCODE_EXP:
src0 = fetchSrc(0, 0);
val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
@@ -3327,13 +3298,6 @@
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
mkMov(dst0[c], val0);
break;
- case TGSI_OPCODE_DPH:
- val0 = buildDot(3);
- src1 = fetchSrc(1, 3);
- mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
- FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
- mkMov(dst0[c], val0);
- break;
case TGSI_OPCODE_DST:
if (dst0[0])
loadImm(dst0[0], 1.0f);
@@ -3361,25 +3325,6 @@
case TGSI_OPCODE_LIT:
handleLIT(dst0);
break;
- case TGSI_OPCODE_XPD:
- FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
- if (c < 3) {
- val0 = getSSA();
- src0 = fetchSrc(1, (c + 1) % 3);
- src1 = fetchSrc(0, (c + 2) % 3);
- mkOp2(OP_MUL, TYPE_F32, val0, src0, src1)
- ->dnz = info->io.mul_zero_wins;
- mkOp1(OP_NEG, TYPE_F32, val0, val0);
-
- src0 = fetchSrc(0, (c + 1) % 3);
- src1 = fetchSrc(1, (c + 2) % 3);
- mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0)
- ->dnz = info->io.mul_zero_wins;
- } else {
- loadImm(dst0[c], 1.0f);
- }
- }
- break;
case TGSI_OPCODE_ISSG:
case TGSI_OPCODE_SSG:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
@@ -3786,13 +3731,6 @@
geni->fixed = 1;
geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
break;
- case TGSI_OPCODE_MFENCE:
- case TGSI_OPCODE_LFENCE:
- case TGSI_OPCODE_SFENCE:
- geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
- geni->fixed = 1;
- geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
- break;
case TGSI_OPCODE_MEMBAR:
{
uint32_t level = tgsi.getSrc(0).getValueU32(0, info);
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp mesa-17.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
--- mesa-17.2.4/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -45,11 +45,27 @@
{
FlowInstruction *call;
int builtin;
- Value *def[2];
bld.setPosition(i, false);
- def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0);
- def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0);
+
+ // Generate movs to the input regs for the call we want to generate
+ for (int s = 0; i->srcExists(s); ++s) {
+ Instruction *ld = i->getSrc(s)->getInsn();
+ assert(ld->getSrc(0) != NULL);
+ // check if we are moving an immediate, propagate it in that case
+ if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV) ||
+ !(ld->src(0).getFile() == FILE_IMMEDIATE))
+ bld.mkMovToReg(s, i->getSrc(s));
+ else {
+ bld.mkMovToReg(s, ld->getSrc(0));
+ // Clear the src, to make code elimination possible here before we
+ // delete the instruction i later
+ i->setSrc(s, NULL);
+ if (ld->isDead())
+ delete_Instruction(prog, ld);
+ }
+ }
+
switch (i->dType) {
case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break;
case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break;
@@ -57,7 +73,7 @@
return;
}
call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
- bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]);
+ bld.mkMovFromReg(i->getDef(0), i->op == OP_DIV ? 0 : 1);
bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2);
bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0);
@@ -200,7 +216,7 @@
// Compute LO (all shift values)
bld.mkOp2(op, type, (dst[0] = bld.getSSA()), src[0], shift);
// Compute HI (shift > 32)
- bld.mkOp2(op, type, (hi2 = bld.getSSA()), src[1],
+ bld.mkOp2(op, type, (hi2 = bld.getSSA()), src[0],
bld.mkOp1v(OP_NEG, TYPE_S32, bld.getSSA(), x32_minus_shift))
->setPredicate(CC_NOT_P, pred);
bld.mkOp2(OP_UNION, TYPE_U32, (dst[1] = bld.getSSA()), hi1, hi2);
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/Makefile.in mesa-17.3.3/src/gallium/drivers/nouveau/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/nouveau/Makefile.in 2017-10-30 14:49:59.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -125,7 +125,8 @@
noinst_PROGRAMS = nouveau_compiler$(EXEEXT)
subdir = src/gallium/drivers/nouveau
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -432,9 +433,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -488,6 +489,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -499,12 +502,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -825,6 +831,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/meson.build mesa-17.3.3/src/gallium/drivers/nouveau/meson.build
--- mesa-17.2.4/src/gallium/drivers/nouveau/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,224 @@
+# Copyright © 2017 Dylan Baker
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_libnouveau = files(
+ 'nouveau_buffer.c',
+ 'nouveau_buffer.h',
+ 'nouveau_context.h',
+ 'nouveau_debug.h',
+ 'nouveau_fence.c',
+ 'nouveau_fence.h',
+ 'nouveau_gldefs.h',
+ 'nouveau_heap.c',
+ 'nouveau_heap.h',
+ 'nouveau_mm.c',
+ 'nouveau_mm.h',
+ 'nouveau_screen.c',
+ 'nouveau_screen.h',
+ 'nouveau_statebuf.h',
+ 'nouveau_video.c',
+ 'nouveau_video.h',
+ 'nouveau_vp3_video_bsp.c',
+ 'nouveau_vp3_video.c',
+ 'nouveau_vp3_video.h',
+ 'nouveau_vp3_video_vp.c',
+ 'nouveau_winsys.h',
+ 'nv17_mpeg.xml.h',
+ 'nv31_mpeg.xml.h',
+ 'nv_m2mf.xml.h',
+ 'nv_object.xml.h',
+ 'nv30/nv01_2d.xml.h',
+ 'nv30/nv30-40_3d.xml.h',
+ 'nv30/nv30_clear.c',
+ 'nv30/nv30_context.c',
+ 'nv30/nv30_context.h',
+ 'nv30/nv30_draw.c',
+ 'nv30/nv30_format.c',
+ 'nv30/nv30_format.h',
+ 'nv30/nv30_fragprog.c',
+ 'nv30/nv30_fragtex.c',
+ 'nv30/nv30_miptree.c',
+ 'nv30/nv30_push.c',
+ 'nv30/nv30_query.c',
+ 'nv30/nv30_resource.c',
+ 'nv30/nv30_resource.h',
+ 'nv30/nv30_screen.c',
+ 'nv30/nv30_screen.h',
+ 'nv30/nv30_state.c',
+ 'nv30/nv30_state.h',
+ 'nv30/nv30_state_validate.c',
+ 'nv30/nv30_texture.c',
+ 'nv30/nv30_transfer.c',
+ 'nv30/nv30_transfer.h',
+ 'nv30/nv30_vbo.c',
+ 'nv30/nv30_vertprog.c',
+ 'nv30/nv30_vertprog.h',
+ 'nv30/nv30_winsys.h',
+ 'nv30/nv40_vertprog.h',
+ 'nv30/nv40_verttex.c',
+ 'nv30/nvfx_fragprog.c',
+ 'nv30/nvfx_shader.h',
+ 'nv30/nvfx_vertprog.c',
+ 'nv50/g80_defs.xml.h',
+ 'nv50/g80_texture.xml.h',
+ 'nv50/nv50_2d.xml.h',
+ 'nv50/nv50_3ddefs.xml.h',
+ 'nv50/nv50_3d.xml.h',
+ 'nv50/nv50_blit.h',
+ 'nv50/nv50_compute.c',
+ 'nv50/nv50_compute.xml.h',
+ 'nv50/nv50_context.c',
+ 'nv50/nv50_context.h',
+ 'nv50/nv50_formats.c',
+ 'nv50/nv50_miptree.c',
+ 'nv50/nv50_program.c',
+ 'nv50/nv50_program.h',
+ 'nv50/nv50_push.c',
+ 'nv50/nv50_query.c',
+ 'nv50/nv50_query.h',
+ 'nv50/nv50_query_hw.c',
+ 'nv50/nv50_query_hw.h',
+ 'nv50/nv50_query_hw_metric.c',
+ 'nv50/nv50_query_hw_metric.h',
+ 'nv50/nv50_query_hw_sm.c',
+ 'nv50/nv50_query_hw_sm.h',
+ 'nv50/nv50_resource.c',
+ 'nv50/nv50_resource.h',
+ 'nv50/nv50_screen.c',
+ 'nv50/nv50_screen.h',
+ 'nv50/nv50_shader_state.c',
+ 'nv50/nv50_state.c',
+ 'nv50/nv50_stateobj.h',
+ 'nv50/nv50_stateobj_tex.h',
+ 'nv50/nv50_state_validate.c',
+ 'nv50/nv50_surface.c',
+ 'nv50/nv50_tex.c',
+ 'nv50/nv50_transfer.c',
+ 'nv50/nv50_transfer.h',
+ 'nv50/nv50_vbo.c',
+ 'nv50/nv50_winsys.h',
+ 'nv50/nv84_video_bsp.c',
+ 'nv50/nv84_video.c',
+ 'nv50/nv84_video.h',
+ 'nv50/nv84_video_vp.c',
+ 'nv50/nv98_video_bsp.c',
+ 'nv50/nv98_video.c',
+ 'nv50/nv98_video.h',
+ 'nv50/nv98_video_ppp.c',
+ 'nv50/nv98_video_vp.c',
+ 'codegen/nv50_ir.cpp',
+ 'codegen/nv50_ir_bb.cpp',
+ 'codegen/nv50_ir_build_util.cpp',
+ 'codegen/nv50_ir_build_util.h',
+ 'codegen/nv50_ir_driver.h',
+ 'codegen/nv50_ir_emit_nv50.cpp',
+ 'codegen/nv50_ir_from_tgsi.cpp',
+ 'codegen/nv50_ir_graph.cpp',
+ 'codegen/nv50_ir_graph.h',
+ 'codegen/nv50_ir.h',
+ 'codegen/nv50_ir_inlines.h',
+ 'codegen/nv50_ir_lowering_nv50.cpp',
+ 'codegen/nv50_ir_peephole.cpp',
+ 'codegen/nv50_ir_print.cpp',
+ 'codegen/nv50_ir_ra.cpp',
+ 'codegen/nv50_ir_ssa.cpp',
+ 'codegen/nv50_ir_target.cpp',
+ 'codegen/nv50_ir_target.h',
+ 'codegen/nv50_ir_target_nv50.cpp',
+ 'codegen/nv50_ir_target_nv50.h',
+ 'codegen/nv50_ir_util.cpp',
+ 'codegen/nv50_ir_util.h',
+ 'codegen/unordered_set.h',
+ 'codegen/nv50_ir_emit_gk110.cpp',
+ 'codegen/nv50_ir_emit_gm107.cpp',
+ 'codegen/nv50_ir_emit_nvc0.cpp',
+ 'codegen/nv50_ir_lowering_gm107.cpp',
+ 'codegen/nv50_ir_lowering_gm107.h',
+ 'codegen/nv50_ir_lowering_nvc0.cpp',
+ 'codegen/nv50_ir_lowering_nvc0.h',
+ 'codegen/nv50_ir_target_gm107.cpp',
+ 'codegen/nv50_ir_target_gm107.h',
+ 'codegen/nv50_ir_target_nvc0.cpp',
+ 'codegen/nv50_ir_target_nvc0.h',
+ 'nvc0/gm107_texture.xml.h',
+ 'nvc0/nvc0_3d.xml.h',
+ 'nvc0/nvc0_compute.c',
+ 'nvc0/nvc0_compute.xml.h',
+ 'nvc0/nvc0_context.c',
+ 'nvc0/nvc0_context.h',
+ 'nvc0/nvc0_formats.c',
+ 'nvc0/nvc0_m2mf.xml.h',
+ 'nvc0/nvc0_macros.h',
+ 'nvc0/nvc0_miptree.c',
+ 'nvc0/nvc0_program.c',
+ 'nvc0/nvc0_program.h',
+ 'nvc0/nvc0_query.c',
+ 'nvc0/nvc0_query.h',
+ 'nvc0/nvc0_query_hw.c',
+ 'nvc0/nvc0_query_hw.h',
+ 'nvc0/nvc0_query_hw_metric.c',
+ 'nvc0/nvc0_query_hw_metric.h',
+ 'nvc0/nvc0_query_hw_sm.c',
+ 'nvc0/nvc0_query_hw_sm.h',
+ 'nvc0/nvc0_query_sw.c',
+ 'nvc0/nvc0_query_sw.h',
+ 'nvc0/nvc0_resource.c',
+ 'nvc0/nvc0_resource.h',
+ 'nvc0/nvc0_screen.c',
+ 'nvc0/nvc0_screen.h',
+ 'nvc0/nvc0_shader_state.c',
+ 'nvc0/nvc0_state.c',
+ 'nvc0/nvc0_stateobj.h',
+ 'nvc0/nvc0_state_validate.c',
+ 'nvc0/nvc0_surface.c',
+ 'nvc0/nvc0_tex.c',
+ 'nvc0/nvc0_transfer.c',
+ 'nvc0/nvc0_vbo.c',
+ 'nvc0/nvc0_vbo_translate.c',
+ 'nvc0/nvc0_video_bsp.c',
+ 'nvc0/nvc0_video.c',
+ 'nvc0/nvc0_video.h',
+ 'nvc0/nvc0_video_ppp.c',
+ 'nvc0/nvc0_video_vp.c',
+ 'nvc0/nvc0_winsys.h',
+ 'nvc0/nve4_compute.c',
+ 'nvc0/nve4_compute.h',
+ 'nvc0/nve4_compute.xml.h',
+ 'nvc0/nve4_p2mf.xml.h',
+)
+
+libnouveau = static_library(
+ 'nouveau',
+ [files_libnouveau],
+ include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
+ c_args : [c_vis_args],
+ cpp_args : [cpp_vis_args],
+ dependencies : [dep_libdrm, dep_libdrm_nouveau],
+ build_by_default : false,
+)
+
+nouveau_compiler = executable(
+ 'nouveau_compiler',
+ 'nouveau_compiler.c',
+ include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
+ dependencies : [dep_libdrm, dep_libdrm_nouveau],
+ link_with : [libnouveau, libgallium, libmesa_util],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nouveau_screen.c mesa-17.3.3/src/gallium/drivers/nouveau/nouveau_screen.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nouveau_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nouveau_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -242,8 +242,6 @@
nouveau_disk_cache_create(screen);
- util_format_s3tc_init();
-
screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
screen->vidmem_bindings =
PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nv30_query.c mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nv30_query.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nv30_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nv30_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -121,6 +121,7 @@
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
q->enable = NV30_3D_QUERY_ENABLE;
q->report = 1;
break;
@@ -228,7 +229,8 @@
nv30_query_object_del(screen, &q->qo[1]);
}
- if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE)
+ if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
result->b = !!q->result;
else
result->u64 = q->result;
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nv30_screen.c mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nv30_screen.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nv30_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nv30_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -220,6 +220,12 @@
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -310,8 +316,11 @@
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INTEGERS:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
@@ -358,8 +367,10 @@
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INTEGERS:
+ case PIPE_SHADER_CAP_FP16:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nv30_vertprog.h 2018-01-18 21:30:28.000000000 +0000
@@ -10,7 +10,6 @@
* POW - EX2 + MUL + LG2
* SUB - ADD, second source negated
* SWZ - MOV
- * XPD -
*
* Register access
* - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c 2018-01-18 21:30:28.000000000 +0000
@@ -591,11 +591,6 @@
case TGSI_OPCODE_DP4:
nvfx_fp_emit(fpc, arith(sat, DP4, dst, mask, src[0], src[1], none));
break;
- case TGSI_OPCODE_DPH:
- tmp = nvfx_src(temp(fpc));
- nvfx_fp_emit(fpc, arith(0, DP3, tmp.reg, NVFX_FP_MASK_X, src[0], src[1], none));
- nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none));
- break;
case TGSI_OPCODE_DST:
nvfx_fp_emit(fpc, arith(sat, DST, dst, mask, src[0], src[1], none));
break;
@@ -694,23 +689,6 @@
nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
}
break;
- case TGSI_OPCODE_SCS:
- /* avoid overwriting the source */
- if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
- {
- if (mask & NVFX_FP_MASK_X)
- nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
- if (mask & NVFX_FP_MASK_Y)
- nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
- }
- else
- {
- if (mask & NVFX_FP_MASK_Y)
- nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
- if (mask & NVFX_FP_MASK_X)
- nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
- }
- break;
case TGSI_OPCODE_SEQ:
nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
break;
@@ -779,11 +757,6 @@
case TGSI_OPCODE_TXP:
nvfx_fp_emit(fpc, tex(sat, TXP, unit, dst, mask, src[0], none, none));
break;
- case TGSI_OPCODE_XPD:
- tmp = nvfx_src(temp(fpc));
- nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
- nvfx_fp_emit(fpc, arith(sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
- break;
case TGSI_OPCODE_IF:
// MOVRC0 R31 (TR0.xyzw), R:
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nvfx_shader.h mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nvfx_shader.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nvfx_shader.h 2018-01-18 21:30:28.000000000 +0000
@@ -163,8 +163,6 @@
* SUB - ADD, negate second source
* RSQ - LG2 + EX2
* POW - LG2 + MUL + EX2
- * SCS - COS + SIN
- * XPD
*
* NV40 Looping
* Loops appear to be fairly expensive on NV40 at least, the proprietary
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c 2018-01-18 21:30:28.000000000 +0000
@@ -588,9 +588,6 @@
case TGSI_OPCODE_DP4:
nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none));
break;
- case TGSI_OPCODE_DPH:
- nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none));
- break;
case TGSI_OPCODE_DST:
nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none));
break;
@@ -686,11 +683,6 @@
insn.cc_test = NVFX_COND_LT;
nvfx_vp_emit(vpc, insn);
break;
- case TGSI_OPCODE_XPD:
- tmp = nvfx_src(temp(vpc));
- nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
- nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
- break;
case TGSI_OPCODE_IF:
insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
insn.cc_update = 1;
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv50/nv50_query.c mesa-17.3.3/src/gallium/drivers/nouveau/nv50/nv50_query.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv50/nv50_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv50/nv50_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -97,6 +97,7 @@
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
if (likely(!condition)) {
if (unlikely(hq->nesting))
cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c mesa-17.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 2018-01-18 21:30:28.000000000 +0000
@@ -157,6 +157,7 @@
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
hq->nesting = nv50->screen->num_occlusion_queries_active++;
if (hq->nesting) {
nv50_hw_query_get(push, q, 0x10, 0x0100f002);
@@ -215,6 +216,7 @@
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
nv50_hw_query_get(push, q, 0, 0x0100f002);
if (--nv50->screen->num_occlusion_queries_active == 0) {
PUSH_SPACE(push, 2);
@@ -307,6 +309,7 @@
res64[0] = hq->data[1] - hq->data[5];
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
res8[0] = hq->data[1] != hq->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
@@ -378,6 +381,7 @@
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
hq->rotate = 32;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nv50/nv50_screen.c mesa-17.3.3/src/gallium/drivers/nouveau/nv50/nv50_screen.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nv50/nv50_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nv50/nv50_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -272,6 +272,12 @@
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -341,6 +347,8 @@
return 1;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
case PIPE_SHADER_CAP_SUBROUTINES:
return 0; /* please inline, or provide function declarations */
case PIPE_SHADER_CAP_INTEGERS:
@@ -357,6 +365,7 @@
return 32;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nvc0/nvc0_query.c mesa-17.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -119,6 +119,7 @@
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
if (likely(!condition)) {
if (unlikely(hq->nesting))
cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c mesa-17.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c 2018-01-18 21:30:28.000000000 +0000
@@ -157,6 +157,7 @@
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
hq->nesting = nvc0->screen->num_occlusion_queries_active++;
if (hq->nesting) {
nvc0_hw_query_get(push, q, 0x10, 0x0100f002);
@@ -224,6 +225,7 @@
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
nvc0_hw_query_get(push, q, 0, 0x0100f002);
if (--nvc0->screen->num_occlusion_queries_active == 0) {
PUSH_SPACE(push, 1);
@@ -320,6 +322,7 @@
res64[0] = hq->data[1] - hq->data[5];
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
res8[0] = hq->data[1] != hq->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
@@ -408,7 +411,8 @@
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 9);
- if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */
+ if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) /* XXX what if 64-bit? */
PUSH_DATA(push, 0x00000001);
else if (result_type == PIPE_QUERY_TYPE_I32)
PUSH_DATA(push, 0x7fffffff);
@@ -513,6 +517,7 @@
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
hq->rotate = 32;
space = NVC0_HW_QUERY_ALLOC_SPACE;
break;
diff -Nru mesa-17.2.4/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c mesa-17.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
--- mesa-17.2.4/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -301,6 +301,12 @@
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -399,8 +405,11 @@
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
return 1;
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
return 0;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return NVC0_MAX_BUFFERS;
diff -Nru mesa-17.2.4/src/gallium/drivers/pl111/Makefile.in mesa-17.3.3/src/gallium/drivers/pl111/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/pl111/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/pl111/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -101,7 +101,8 @@
subdir = src/gallium/drivers/pl111
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -305,9 +306,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -361,6 +362,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -372,12 +375,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -503,6 +509,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c mesa-17.3.3/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
--- mesa-17.2.4/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c 2018-01-18 21:30:28.000000000 +0000
@@ -35,6 +35,8 @@
#include
+#include "util/macros.h"
+
#include "r300_reg.h"
#include "radeon_compiler.h"
@@ -61,7 +63,7 @@
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
};
-static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+static const int num_native_swizzles = ARRAY_SIZE(native_swizzles);
/**
* Find a native RGB swizzle that matches the given swizzle.
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/Makefile.in mesa-17.3.3/src/gallium/drivers/r300/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/r300/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -103,7 +103,8 @@
TESTS = r300_compiler_tests$(EXEEXT)
subdir = src/gallium/drivers/r300
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -581,9 +582,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -637,6 +638,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -648,12 +651,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -890,6 +896,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_context.h mesa-17.3.3/src/gallium/drivers/r300/r300_context.h
--- mesa-17.2.4/src/gallium/drivers/r300/r300_context.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_context.h 2018-01-18 21:30:28.000000000 +0000
@@ -743,10 +743,12 @@
/* r300_render.c */
void r500_emit_index_bias(struct r300_context *r300, int index_bias);
void r300_blitter_draw_rectangle(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
int x1, int y1, int x2, int y2,
- float depth,
+ float depth, unsigned num_instances,
enum blitter_attrib_type type,
- const union pipe_color_union *attrib);
+ const union blitter_attrib *attrib);
/* r300_state.c */
enum r300_fb_state_change {
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_emit.c mesa-17.3.3/src/gallium/drivers/r300/r300_emit.c
--- mesa-17.2.4/src/gallium/drivers/r300/r300_emit.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_emit.c 2018-01-18 21:30:28.000000000 +0000
@@ -25,7 +25,6 @@
#include "util/u_format.h"
#include "util/u_math.h"
-#include "util/u_mm.h"
#include "r300_context.h"
#include "r300_cb.h"
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_hyperz.c mesa-17.3.3/src/gallium/drivers/r300/r300_hyperz.c
--- mesa-17.2.4/src/gallium/drivers/r300/r300_hyperz.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_hyperz.c 2018-01-18 21:30:28.000000000 +0000
@@ -26,7 +26,6 @@
#include "r300_fs.h"
#include "util/u_format.h"
-#include "util/u_mm.h"
/*
HiZ rules - taken from various docs
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_public.h mesa-17.3.3/src/gallium/drivers/r300/r300_public.h
--- mesa-17.2.4/src/gallium/drivers/r300/r300_public.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_public.h 2018-01-18 21:30:28.000000000 +0000
@@ -7,8 +7,10 @@
#endif
struct radeon_winsys;
+struct pipe_screen_config;
-struct pipe_screen* r300_screen_create(struct radeon_winsys *rws, unsigned flags);
+struct pipe_screen* r300_screen_create(struct radeon_winsys *rws,
+ const struct pipe_screen_config *config);
#ifdef __cplusplus
} // extern "C"
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_query.c mesa-17.3.3/src/gallium/drivers/r300/r300_query.c
--- mesa-17.2.4/src/gallium/drivers/r300/r300_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -39,6 +39,7 @@
if (query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
+ query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE &&
query_type != PIPE_QUERY_GPU_FINISHED) {
return NULL;
}
@@ -171,7 +172,8 @@
map++;
}
- if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ q->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
vresult->b = temp != 0;
} else {
vresult->u64 = temp;
@@ -195,7 +197,8 @@
mode == PIPE_RENDER_COND_BY_REGION_WAIT;
if (r300_get_query_result(pipe, query, wait, &result)) {
- if (r300_query(query)->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ if (r300_query(query)->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ r300_query(query)->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
r300->skip_rendering = condition == result.b;
} else {
r300->skip_rendering = condition == !!result.u64;
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_render.c mesa-17.3.3/src/gallium/drivers/r300/r300_render.c
--- mesa-17.2.4/src/gallium/drivers/r300/r300_render.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_render.c 2018-01-18 21:30:28.000000000 +0000
@@ -1113,10 +1113,12 @@
* would be computed and stored twice, which makes the clear/copy codepaths
* somewhat inefficient. Instead we use a rectangular point sprite. */
void r300_blitter_draw_rectangle(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
int x1, int y1, int x2, int y2,
- float depth,
+ float depth, unsigned num_instances,
enum blitter_attrib_type type,
- const union pipe_color_union *attrib)
+ const union blitter_attrib *attrib)
{
struct r300_context *r300 = r300_context(util_blitter_get_pipe(blitter));
unsigned last_sprite_coord_enable = r300->sprite_coord_enable;
@@ -1125,21 +1127,28 @@
unsigned vertex_size =
type == UTIL_BLITTER_ATTRIB_COLOR || !r300->draw ? 8 : 4;
unsigned dwords = 13 + vertex_size +
- (type == UTIL_BLITTER_ATTRIB_TEXCOORD ? 7 : 0);
- static const union pipe_color_union zeros;
+ (type == UTIL_BLITTER_ATTRIB_TEXCOORD_XY ? 7 : 0);
+ static const union blitter_attrib zeros;
CS_LOCALS(r300);
/* XXX workaround for a lockup in MSAA resolve on SWTCL chipsets, this
* function most probably doesn't handle type=NONE correctly */
- if (!r300->screen->caps.has_tcl && type == UTIL_BLITTER_ATTRIB_NONE) {
- util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib);
+ if ((!r300->screen->caps.has_tcl && type == UTIL_BLITTER_ATTRIB_NONE) ||
+ type == UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW ||
+ num_instances > 1) {
+ util_blitter_draw_rectangle(blitter, vertex_elements_cso, get_vs,
+ x1, y1, x2, y2,
+ depth, num_instances, type, attrib);
return;
}
if (r300->skip_rendering)
return;
- if (type == UTIL_BLITTER_ATTRIB_TEXCOORD)
+ r300->context.bind_vertex_elements_state(&r300->context, vertex_elements_cso);
+ r300->context.bind_vs_state(&r300->context, get_vs(blitter));
+
+ if (type == UTIL_BLITTER_ATTRIB_TEXCOORD_XY)
r300->sprite_coord_enable = 1;
r300_update_derived_state(r300);
@@ -1156,15 +1165,15 @@
/* Set up GA. */
OUT_CS_REG(R300_GA_POINT_SIZE, (height * 6) | ((width * 6) << 16));
- if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
+ if (type == UTIL_BLITTER_ATTRIB_TEXCOORD_XY) {
/* Set up the GA to generate texcoords. */
OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE |
(R300_GB_TEX_STR << R300_GB_TEX0_SOURCE_SHIFT));
OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
- OUT_CS_32F(attrib->f[0]);
- OUT_CS_32F(attrib->f[3]);
- OUT_CS_32F(attrib->f[2]);
- OUT_CS_32F(attrib->f[1]);
+ OUT_CS_32F(attrib->texcoord.x1);
+ OUT_CS_32F(attrib->texcoord.y2);
+ OUT_CS_32F(attrib->texcoord.x2);
+ OUT_CS_32F(attrib->texcoord.y1);
}
/* Set up VAP controls. */
@@ -1188,7 +1197,7 @@
if (vertex_size == 8) {
if (!attrib)
attrib = &zeros;
- OUT_CS_TABLE(attrib->f, 4);
+ OUT_CS_TABLE(attrib->color, 4);
}
END_CS;
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_screen.c mesa-17.3.3/src/gallium/drivers/r300/r300_screen.c
--- mesa-17.2.4/src/gallium/drivers/r300/r300_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -127,7 +127,7 @@
/* r300 cannot do swizzling of compressed textures. Supported otherwise. */
case PIPE_CAP_TEXTURE_SWIZZLE:
- return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1;
+ return r300screen->caps.dxtc_swizzle;
/* We don't support color clamping on r500, so that we can use color
* intepolators for generic varyings. */
@@ -242,6 +242,12 @@
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
/* SWTCL-only features. */
@@ -350,8 +356,11 @@
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INTEGERS:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
@@ -409,10 +418,13 @@
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INTEGERS:
+ case PIPE_SHADER_CAP_FP16:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
@@ -724,7 +736,8 @@
return rws->fence_wait(rws, fence, timeout);
}
-struct pipe_screen* r300_screen_create(struct radeon_winsys *rws, unsigned flags)
+struct pipe_screen* r300_screen_create(struct radeon_winsys *rws,
+ const struct pipe_screen_config *config)
{
struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen);
@@ -762,7 +775,6 @@
slab_create_parent(&r300screen->pool_transfers, sizeof(struct pipe_transfer), 64);
- util_format_s3tc_init();
(void) mtx_init(&r300screen->cmask_mutex, mtx_plain);
return &r300screen->screen;
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_state.c mesa-17.3.3/src/gallium/drivers/r300/r300_state.c
--- mesa-17.2.4/src/gallium/drivers/r300/r300_state.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_state.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,7 +27,6 @@
#include "util/u_half.h"
#include "util/u_helpers.h"
#include "util/u_math.h"
-#include "util/u_mm.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "util/u_transfer.h"
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_texture.c mesa-17.3.3/src/gallium/drivers/r300/r300_texture.c
--- mesa-17.2.4/src/gallium/drivers/r300/r300_texture.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_texture.c 2018-01-18 21:30:28.000000000 +0000
@@ -34,7 +34,6 @@
#include "util/u_format_s3tc.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_mm.h"
#include "pipe/p_screen.h"
@@ -251,10 +250,6 @@
/* S3TC formats. */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- if (!util_format_s3tc_enabled) {
- return ~0; /* Unsupported. */
- }
-
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
diff -Nru mesa-17.2.4/src/gallium/drivers/r300/r300_tgsi_to_rc.c mesa-17.3.3/src/gallium/drivers/r300/r300_tgsi_to_rc.c
--- mesa-17.2.4/src/gallium/drivers/r300/r300_tgsi_to_rc.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r300/r300_tgsi_to_rc.c 2018-01-18 21:30:28.000000000 +0000
@@ -51,15 +51,12 @@
case TGSI_OPCODE_SGE: return RC_OPCODE_SGE;
case TGSI_OPCODE_MAD: return RC_OPCODE_MAD;
case TGSI_OPCODE_LRP: return RC_OPCODE_LRP;
- /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */
case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND;
case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
case TGSI_OPCODE_POW: return RC_OPCODE_POW;
- case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
- case TGSI_OPCODE_DPH: return RC_OPCODE_DPH;
case TGSI_OPCODE_COS: return RC_OPCODE_COS;
case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
@@ -85,7 +82,6 @@
/* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
- case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
@@ -108,7 +104,6 @@
/* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */
/* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */
/* case TGSI_OPCODE_XOR: return RC_OPCODE_XOR; */
- /* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */
/* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */
/* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */
case TGSI_OPCODE_CONT: return RC_OPCODE_CONT;
@@ -119,8 +114,6 @@
/* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */
/* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */
case TGSI_OPCODE_NOP: return RC_OPCODE_NOP;
- /* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */
- /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */
case TGSI_OPCODE_KILL_IF: return RC_OPCODE_KIL;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/Automake.inc mesa-17.3.3/src/gallium/drivers/r600/Automake.inc
--- mesa-17.2.4/src/gallium/drivers/r600/Automake.inc 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/Automake.inc 2018-01-18 21:30:28.000000000 +0000
@@ -11,6 +11,4 @@
TARGET_RADEON_WINSYS = \
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
-TARGET_RADEON_COMMON = \
- $(top_builddir)/src/gallium/drivers/radeon/libradeon.la
endif
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/cayman_msaa.c mesa-17.3.3/src/gallium/drivers/r600/cayman_msaa.c
--- mesa-17.2.4/src/gallium/drivers/r600/cayman_msaa.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/cayman_msaa.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,270 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ *
+ */
+
+#include "r600_cs.h"
+#include "evergreend.h"
+
+/* 2xMSAA
+ * There are two locations (4, 4), (-4, -4). */
+const uint32_t eg_sample_locs_2x[4] = {
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+};
+const unsigned eg_max_dist_2x = 4;
+/* 4xMSAA
+ * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
+const uint32_t eg_sample_locs_4x[4] = {
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+};
+const unsigned eg_max_dist_4x = 6;
+
+/* Cayman 8xMSAA */
+static const uint32_t cm_sample_locs_8x[] = {
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+};
+static const unsigned cm_max_dist_8x = 8;
+/* Cayman 16xMSAA */
+static const uint32_t cm_sample_locs_16x[] = {
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+};
+static const unsigned cm_max_dist_16x = 8;
+
+void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
+ unsigned sample_index, float *out_value)
+{
+ int offset, index;
+ struct {
+ int idx:4;
+ } val;
+ switch (sample_count) {
+ case 1:
+ default:
+ out_value[0] = out_value[1] = 0.5;
+ break;
+ case 2:
+ offset = 4 * (sample_index * 2);
+ val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 4:
+ offset = 4 * (sample_index * 2);
+ val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 8:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 16:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ }
+}
+
+void cayman_init_msaa(struct pipe_context *ctx)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ int i;
+
+ cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
+
+ for (i = 0; i < 2; i++)
+ cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
+ for (i = 0; i < 4; i++)
+ cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
+ for (i = 0; i < 8; i++)
+ cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
+ for (i = 0; i < 16; i++)
+ cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
+}
+
+void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
+{
+ switch (nr_samples) {
+ default:
+ case 1:
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+ break;
+ case 2:
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
+ break;
+ case 4:
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
+ break;
+ case 8:
+ radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+ radeon_emit(cs, cm_sample_locs_8x[0]);
+ radeon_emit(cs, cm_sample_locs_8x[4]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, cm_sample_locs_8x[1]);
+ radeon_emit(cs, cm_sample_locs_8x[5]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, cm_sample_locs_8x[2]);
+ radeon_emit(cs, cm_sample_locs_8x[6]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, cm_sample_locs_8x[3]);
+ radeon_emit(cs, cm_sample_locs_8x[7]);
+ break;
+ case 16:
+ radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+ radeon_emit(cs, cm_sample_locs_16x[0]);
+ radeon_emit(cs, cm_sample_locs_16x[4]);
+ radeon_emit(cs, cm_sample_locs_16x[8]);
+ radeon_emit(cs, cm_sample_locs_16x[12]);
+ radeon_emit(cs, cm_sample_locs_16x[1]);
+ radeon_emit(cs, cm_sample_locs_16x[5]);
+ radeon_emit(cs, cm_sample_locs_16x[9]);
+ radeon_emit(cs, cm_sample_locs_16x[13]);
+ radeon_emit(cs, cm_sample_locs_16x[2]);
+ radeon_emit(cs, cm_sample_locs_16x[6]);
+ radeon_emit(cs, cm_sample_locs_16x[10]);
+ radeon_emit(cs, cm_sample_locs_16x[14]);
+ radeon_emit(cs, cm_sample_locs_16x[3]);
+ radeon_emit(cs, cm_sample_locs_16x[7]);
+ radeon_emit(cs, cm_sample_locs_16x[11]);
+ radeon_emit(cs, cm_sample_locs_16x[15]);
+ break;
+ }
+}
+
+void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
+ int ps_iter_samples, int overrast_samples,
+ unsigned sc_mode_cntl_1)
+{
+ int setup_samples = nr_samples > 1 ? nr_samples :
+ overrast_samples > 1 ? overrast_samples : 0;
+ /* Required by OpenGL line rasterization.
+ *
+ * TODO: We should also enable perpendicular endcaps for AA lines,
+ * but that requires implementing line stippling in the pixel
+ * shader. SC can only do line stippling with axis-aligned
+ * endcaps.
+ */
+ unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
+
+ if (setup_samples > 1) {
+ /* indexed by log2(nr_samples) */
+ unsigned max_dist[] = {
+ 0,
+ eg_max_dist_2x,
+ eg_max_dist_4x,
+ cm_max_dist_8x,
+ cm_max_dist_16x
+ };
+ unsigned log_samples = util_logbase2(setup_samples);
+ unsigned log_ps_iter_samples =
+ util_logbase2(util_next_power_of_two(ps_iter_samples));
+
+ radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_emit(cs, sc_line_cntl |
+ S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+ radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
+ S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
+ S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+
+ if (nr_samples > 1) {
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
+ S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
+ S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
+ S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
+ S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
+ sc_mode_cntl_1);
+ } else if (overrast_samples > 1) {
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
+ S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ sc_mode_cntl_1);
+ }
+ } else {
+ radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+ radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+
+ radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+ radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ sc_mode_cntl_1);
+ }
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/eg_asm.c mesa-17.3.3/src/gallium/drivers/r600/eg_asm.c
--- mesa-17.2.4/src/gallium/drivers/r600/eg_asm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/eg_asm.c 2018-01-18 21:30:28.000000000 +0000
@@ -71,9 +71,12 @@
} else if (cfop->flags & CF_CLAUSE) {
/* CF_TEX/VTX (CF_ALU already handled above) */
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
- bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
+ bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) |
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
+ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
+ id++;
} else if (cfop->flags & CF_EXP) {
/* EXPORT instructions */
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
@@ -111,12 +114,14 @@
} else {
/* other instructions */
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
- bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode)|
+ bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) |
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COND(cf->cond) |
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
- S_SQ_CF_WORD1_COUNT(cf->count) |
- S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
+ S_SQ_CF_WORD1_COUNT(cf->count);
+ if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
+ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
+ id++;
}
}
return 0;
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/evergreen_compute.c mesa-17.3.3/src/gallium/drivers/r600/evergreen_compute.c
--- mesa-17.2.4/src/gallium/drivers/r600/evergreen_compute.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/evergreen_compute.c 2018-01-18 21:30:28.000000000 +0000
@@ -746,8 +746,9 @@
radeon_compute_set_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */
- S_0288D4_NUM_GPRS(ngpr)
- | S_0288D4_STACK_SIZE(nstack));
+ S_0288D4_NUM_GPRS(ngpr) |
+ S_0288D4_DX10_CLAMP(1) |
+ S_0288D4_STACK_SIZE(nstack));
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/evergreend.h mesa-17.3.3/src/gallium/drivers/r600/evergreend.h
--- mesa-17.2.4/src/gallium/drivers/r600/evergreend.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/evergreend.h 2018-01-18 21:30:28.000000000 +0000
@@ -201,6 +201,116 @@
/* Registers */
#define R_0084FC_CP_STRMOUT_CNTL 0x0084FC
#define S_0084FC_OFFSET_UPDATE_DONE(x) (((unsigned)(x) & 0x1) << 0)
+#define R_028B94_VGT_STRMOUT_CONFIG 0x028B94
+#define S_028B94_STREAMOUT_0_EN(x) (((unsigned)(x) & 0x1) << 0)
+#define G_028B94_STREAMOUT_0_EN(x) (((x) >> 0) & 0x1)
+#define C_028B94_STREAMOUT_0_EN 0xFFFFFFFE
+#define S_028B94_STREAMOUT_1_EN(x) (((unsigned)(x) & 0x1) << 1)
+#define G_028B94_STREAMOUT_1_EN(x) (((x) >> 1) & 0x1)
+#define C_028B94_STREAMOUT_1_EN 0xFFFFFFFD
+#define S_028B94_STREAMOUT_2_EN(x) (((unsigned)(x) & 0x1) << 2)
+#define G_028B94_STREAMOUT_2_EN(x) (((x) >> 2) & 0x1)
+#define C_028B94_STREAMOUT_2_EN 0xFFFFFFFB
+#define S_028B94_STREAMOUT_3_EN(x) (((unsigned)(x) & 0x1) << 3)
+#define G_028B94_STREAMOUT_3_EN(x) (((x) >> 3) & 0x1)
+#define C_028B94_STREAMOUT_3_EN 0xFFFFFFF7
+#define S_028B94_RAST_STREAM(x) (((unsigned)(x) & 0x07) << 4)
+#define G_028B94_RAST_STREAM(x) (((x) >> 4) & 0x07)
+#define C_028B94_RAST_STREAM 0xFFFFFF8F
+#define S_028B94_RAST_STREAM_MASK(x) (((unsigned)(x) & 0x0F) << 8) /* SI+ */
+#define G_028B94_RAST_STREAM_MASK(x) (((x) >> 8) & 0x0F)
+#define C_028B94_RAST_STREAM_MASK 0xFFFFF0FF
+#define S_028B94_USE_RAST_STREAM_MASK(x) (((unsigned)(x) & 0x1) << 31) /* SI+ */
+#define G_028B94_USE_RAST_STREAM_MASK(x) (((x) >> 31) & 0x1)
+#define C_028B94_USE_RAST_STREAM_MASK 0x7FFFFFFF
+#define R_028B98_VGT_STRMOUT_BUFFER_CONFIG 0x028B98
+#define S_028B98_STREAM_0_BUFFER_EN(x) (((unsigned)(x) & 0x0F) << 0)
+#define G_028B98_STREAM_0_BUFFER_EN(x) (((x) >> 0) & 0x0F)
+#define C_028B98_STREAM_0_BUFFER_EN 0xFFFFFFF0
+#define S_028B98_STREAM_1_BUFFER_EN(x) (((unsigned)(x) & 0x0F) << 4)
+#define G_028B98_STREAM_1_BUFFER_EN(x) (((x) >> 4) & 0x0F)
+#define C_028B98_STREAM_1_BUFFER_EN 0xFFFFFF0F
+#define S_028B98_STREAM_2_BUFFER_EN(x) (((unsigned)(x) & 0x0F) << 8)
+#define G_028B98_STREAM_2_BUFFER_EN(x) (((x) >> 8) & 0x0F)
+#define C_028B98_STREAM_2_BUFFER_EN 0xFFFFF0FF
+#define S_028B98_STREAM_3_BUFFER_EN(x) (((unsigned)(x) & 0x0F) << 12)
+#define G_028B98_STREAM_3_BUFFER_EN(x) (((x) >> 12) & 0x0F)
+#define C_028B98_STREAM_3_BUFFER_EN 0xFFFF0FFF
+
+#define EG_R_028A4C_PA_SC_MODE_CNTL_1 0x028A4C
+#define EG_S_028A4C_PS_ITER_SAMPLE(x) (((unsigned)(x) & 0x1) << 16)
+#define EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((unsigned)(x) & 0x1) << 25)
+#define EG_S_028A4C_FORCE_EOV_REZ_ENABLE(x) (((unsigned)(x) & 0x1) << 26)
+#define CM_R_028804_DB_EQAA 0x00028804
+#define S_028804_MAX_ANCHOR_SAMPLES(x) (((unsigned)(x) & 0x07) << 0)
+#define G_028804_MAX_ANCHOR_SAMPLES(x) (((x) >> 0) & 0x07)
+#define C_028804_MAX_ANCHOR_SAMPLES 0xFFFFFFF8
+#define S_028804_PS_ITER_SAMPLES(x) (((unsigned)(x) & 0x07) << 4)
+#define G_028804_PS_ITER_SAMPLES(x) (((x) >> 4) & 0x07)
+#define C_028804_PS_ITER_SAMPLES 0xFFFFFF8F
+#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((unsigned)(x) & 0x07) << 8)
+#define G_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) >> 8) & 0x07)
+#define C_028804_MASK_EXPORT_NUM_SAMPLES 0xFFFFF8FF
+#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((unsigned)(x) & 0x07) << 12)
+#define G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) >> 12) & 0x07)
+#define C_028804_ALPHA_TO_MASK_NUM_SAMPLES 0xFFFF8FFF
+#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((unsigned)(x) & 0x1) << 16)
+#define G_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) >> 16) & 0x1)
+#define C_028804_HIGH_QUALITY_INTERSECTIONS 0xFFFEFFFF
+#define S_028804_INCOHERENT_EQAA_READS(x) (((unsigned)(x) & 0x1) << 17)
+#define G_028804_INCOHERENT_EQAA_READS(x) (((x) >> 17) & 0x1)
+#define C_028804_INCOHERENT_EQAA_READS 0xFFFDFFFF
+#define S_028804_INTERPOLATE_COMP_Z(x) (((unsigned)(x) & 0x1) << 18)
+#define G_028804_INTERPOLATE_COMP_Z(x) (((x) >> 18) & 0x1)
+#define C_028804_INTERPOLATE_COMP_Z 0xFFFBFFFF
+#define S_028804_INTERPOLATE_SRC_Z(x) (((unsigned)(x) & 0x1) << 19)
+#define G_028804_INTERPOLATE_SRC_Z(x) (((x) >> 19) & 0x1)
+#define C_028804_INTERPOLATE_SRC_Z 0xFFF7FFFF
+#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((unsigned)(x) & 0x1) << 20)
+#define G_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) >> 20) & 0x1)
+#define C_028804_STATIC_ANCHOR_ASSOCIATIONS 0xFFEFFFFF
+#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((unsigned)(x) & 0x1) << 21)
+#define G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) >> 21) & 0x1)
+#define C_028804_ALPHA_TO_MASK_EQAA_DISABLE 0xFFDFFFFF
+#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((unsigned)(x) & 0x07) << 24)
+#define G_028804_OVERRASTERIZATION_AMOUNT(x) (((x) >> 24) & 0x07)
+#define C_028804_OVERRASTERIZATION_AMOUNT 0xF8FFFFFF
+#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((unsigned)(x) & 0x1) << 27)
+#define G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) >> 27) & 0x1)
+#define C_028804_ENABLE_POSTZ_OVERRASTERIZATION 0xF7FFFFFF
+#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc
+#define S_028BDC_EXPAND_LINE_WIDTH(x) (((unsigned)(x) & 0x1) << 9)
+#define G_028BDC_EXPAND_LINE_WIDTH(x) (((x) >> 9) & 0x1)
+#define C_028BDC_EXPAND_LINE_WIDTH 0xFFFFFDFF
+#define S_028BDC_LAST_PIXEL(x) (((unsigned)(x) & 0x1) << 10)
+#define G_028BDC_LAST_PIXEL(x) (((x) >> 10) & 0x1)
+#define C_028BDC_LAST_PIXEL 0xFFFFFBFF
+#define S_028BDC_PERPENDICULAR_ENDCAP_ENA(x) (((unsigned)(x) & 0x1) << 11)
+#define G_028BDC_PERPENDICULAR_ENDCAP_ENA(x) (((x) >> 11) & 0x1)
+#define C_028BDC_PERPENDICULAR_ENDCAP_ENA 0xFFFFF7FF
+#define S_028BDC_DX10_DIAMOND_TEST_ENA(x) (((unsigned)(x) & 0x1) << 12)
+#define G_028BDC_DX10_DIAMOND_TEST_ENA(x) (((x) >> 12) & 0x1)
+#define C_028BDC_DX10_DIAMOND_TEST_ENA 0xFFFFEFFF
+#define CM_R_028BE0_PA_SC_AA_CONFIG 0x28be0
+#define S_028BE0_MSAA_NUM_SAMPLES(x) (((unsigned)(x) & 0x07) << 0)
+#define G_028BE0_MSAA_NUM_SAMPLES(x) (((x) >> 0) & 0x07)
+#define C_028BE0_MSAA_NUM_SAMPLES 0xFFFFFFF8
+#define S_028BE0_AA_MASK_CENTROID_DTMN(x) (((unsigned)(x) & 0x1) << 4)
+#define G_028BE0_AA_MASK_CENTROID_DTMN(x) (((x) >> 4) & 0x1)
+#define C_028BE0_AA_MASK_CENTROID_DTMN 0xFFFFFFEF
+#define S_028BE0_MAX_SAMPLE_DIST(x) (((unsigned)(x) & 0x0F) << 13)
+#define G_028BE0_MAX_SAMPLE_DIST(x) (((x) >> 13) & 0x0F)
+#define C_028BE0_MAX_SAMPLE_DIST 0xFFFE1FFF
+#define S_028BE0_MSAA_EXPOSED_SAMPLES(x) (((unsigned)(x) & 0x07) << 20)
+#define G_028BE0_MSAA_EXPOSED_SAMPLES(x) (((x) >> 20) & 0x07)
+#define C_028BE0_MSAA_EXPOSED_SAMPLES 0xFF8FFFFF
+#define S_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((unsigned)(x) & 0x03) << 24)
+#define G_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((x) >> 24) & 0x03)
+#define C_028BE0_DETAIL_TO_EXPOSED_MODE 0xFCFFFFFF
+#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8
+#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08
+#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18
+#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28
#define R_008960_VGT_STRMOUT_BUFFER_FILLED_SIZE_0 0x008960 /* read-only */
#define R_008964_VGT_STRMOUT_BUFFER_FILLED_SIZE_1 0x008964 /* read-only */
#define R_008968_VGT_STRMOUT_BUFFER_FILLED_SIZE_2 0x008968 /* read-only */
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/evergreen_state.c mesa-17.3.3/src/gallium/drivers/r600/evergreen_state.c
--- mesa-17.2.4/src/gallium/drivers/r600/evergreen_state.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/evergreen_state.c 2018-01-18 21:30:28.000000000 +0000
@@ -1371,7 +1371,7 @@
surf->db_depth_slice = S_02805C_SLICE_TILE_MAX(levelinfo->nblk_x *
levelinfo->nblk_y / 64 - 1);
- if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+ if (rtex->surface.has_stencil) {
uint64_t stencil_offset;
unsigned stile_split = rtex->surface.u.legacy.stencil_tile_split;
@@ -1392,8 +1392,7 @@
S_028044_FORMAT(V_028044_STENCIL_8);
}
- /* use htile only for first level */
- if (rtex->htile_offset && !level) {
+ if (r600_htile_enabled(rtex, level)) {
uint64_t va = rtex->resource.gpu_address + rtex->htile_offset;
surf->db_htile_data_base = va >> 8;
surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
@@ -1651,7 +1650,7 @@
S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
- radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
@@ -1659,7 +1658,7 @@
radeon_set_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
- radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+ radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
}
@@ -2298,6 +2297,9 @@
struct r600_cso_state *state = (struct r600_cso_state*)a;
struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
+ if (!shader)
+ return;
+
radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
(shader->buffer->gpu_address + shader->offset) >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
@@ -3230,6 +3232,7 @@
r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
S_028844_NUM_GPRS(rshader->bc.ngpr) |
S_028844_PRIME_CACHE_ON_DRAW(1) |
+ S_028844_DX10_CLAMP(1) |
S_028844_STACK_SIZE(rshader->bc.nstack));
/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
@@ -3250,6 +3253,7 @@
r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
S_028890_NUM_GPRS(rshader->bc.ngpr) |
+ S_028890_DX10_CLAMP(1) |
S_028890_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
shader->bo->gpu_address >> 8);
@@ -3312,6 +3316,7 @@
r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
S_028878_NUM_GPRS(rshader->bc.ngpr) |
+ S_028878_DX10_CLAMP(1) |
S_028878_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
shader->bo->gpu_address >> 8);
@@ -3352,6 +3357,7 @@
S_0286C4_VS_EXPORT_COUNT(nparams - 1));
r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS,
S_028860_NUM_GPRS(rshader->bc.ngpr) |
+ S_028860_DX10_CLAMP(1) |
S_028860_STACK_SIZE(rshader->bc.nstack));
if (rshader->vs_position_window_space) {
r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
@@ -3386,6 +3392,7 @@
r600_init_command_buffer(cb, 32);
r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS,
S_0288BC_NUM_GPRS(rshader->bc.ngpr) |
+ S_0288BC_DX10_CLAMP(1) |
S_0288BC_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS,
shader->bo->gpu_address >> 8);
@@ -3399,6 +3406,7 @@
r600_init_command_buffer(cb, 32);
r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS,
S_0288D4_NUM_GPRS(rshader->bc.ngpr) |
+ S_0288D4_DX10_CLAMP(1) |
S_0288D4_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS,
shader->bo->gpu_address >> 8);
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/Makefile.am mesa-17.3.3/src/gallium/drivers/r600/Makefile.am
--- mesa-17.2.4/src/gallium/drivers/r600/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -27,8 +27,7 @@
if HAVE_GALLIUM_LLVM
AM_CFLAGS += \
- $(LLVM_CFLAGS) \
- -I$(top_srcdir)/src/gallium/drivers/radeon/
+ $(LLVM_CFLAGS)
endif
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/Makefile.in mesa-17.3.3/src/gallium/drivers/r600/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/r600/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -100,15 +100,15 @@
@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS)
@HAVE_GALLIUM_LLVM_TRUE@am__append_4 = \
-@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_CFLAGS) \
-@HAVE_GALLIUM_LLVM_TRUE@ -I$(top_srcdir)/src/gallium/drivers/radeon/
+@HAVE_GALLIUM_LLVM_TRUE@ $(LLVM_CFLAGS)
@HAVE_GALLIUM_COMPUTE_TRUE@am__append_5 = \
@HAVE_GALLIUM_COMPUTE_TRUE@ -DHAVE_OPENCL
subdir = src/gallium/drivers/r600
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -130,7 +130,11 @@
evergreen_compute.lo evergreen_hw_context.lo \
evergreen_state.lo r600_asm.lo r600_blit.lo r600_hw_context.lo \
r600_isa.lo r600_pipe.lo r600_shader.lo r600_state.lo \
- r600_state_common.lo r600_uvd.lo r700_asm.lo
+ r600_state_common.lo r600_uvd.lo r700_asm.lo cayman_msaa.lo \
+ r600_buffer_common.lo r600_gpu_load.lo r600_perfcounter.lo \
+ r600_pipe_common.lo r600_query.lo r600_streamout.lo \
+ r600_test_dma.lo r600_texture.lo r600_viewport.lo \
+ radeon_uvd.lo radeon_vce.lo radeon_video.lo
am__dirstamp = $(am__leading_dot)dirstamp
am__objects_2 = sb/sb_bc_builder.lo sb/sb_bc_decoder.lo \
sb/sb_bc_dump.lo sb/sb_bc_finalize.lo sb/sb_bc_parser.lo \
@@ -367,9 +371,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -423,6 +427,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -434,12 +440,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -524,6 +533,7 @@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
C_SOURCES = \
+ r600d_common.h \
compute_memory_pool.c \
compute_memory_pool.h \
eg_asm.c \
@@ -554,7 +564,26 @@
r600_state_common.c \
r600_uvd.c \
r700_asm.c \
- r700_sq.h
+ r700_sq.h \
+ cayman_msaa.c \
+ r600_buffer_common.c \
+ r600_cs.h \
+ r600_gpu_load.c \
+ r600_perfcounter.c \
+ r600_pipe_common.c \
+ r600_pipe_common.h \
+ r600_query.c \
+ r600_query.h \
+ r600_streamout.c \
+ r600_test_dma.c \
+ r600_texture.c \
+ r600_viewport.c \
+ radeon_uvd.c \
+ radeon_uvd.h \
+ radeon_vce.c \
+ radeon_vce.h \
+ radeon_video.c \
+ radeon_video.h
CXX_SOURCES = \
sb/sb_bc_builder.cpp \
@@ -636,6 +665,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
@@ -768,6 +799,7 @@
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cayman_msaa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compute_memory_pool.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eg_asm.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eg_debug.Plo@am__quote@
@@ -776,14 +808,26 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/evergreen_state.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_asm.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_blit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_buffer_common.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_gpu_load.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_hw_context.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_isa.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_perfcounter.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_pipe.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_pipe_common.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_query.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_shader.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_state.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_state_common.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_streamout.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_test_dma.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_texture.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_uvd.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_viewport.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r700_asm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_uvd.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_video.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@sb/$(DEPDIR)/sb_bc_builder.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@sb/$(DEPDIR)/sb_bc_decoder.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@sb/$(DEPDIR)/sb_bc_dump.Plo@am__quote@
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/Makefile.sources mesa-17.3.3/src/gallium/drivers/r600/Makefile.sources
--- mesa-17.2.4/src/gallium/drivers/r600/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -1,4 +1,6 @@
+
C_SOURCES = \
+ r600d_common.h \
compute_memory_pool.c \
compute_memory_pool.h \
eg_asm.c \
@@ -29,7 +31,26 @@
r600_state_common.c \
r600_uvd.c \
r700_asm.c \
- r700_sq.h
+ r700_sq.h \
+ cayman_msaa.c \
+ r600_buffer_common.c \
+ r600_cs.h \
+ r600_gpu_load.c \
+ r600_perfcounter.c \
+ r600_pipe_common.c \
+ r600_pipe_common.h \
+ r600_query.c \
+ r600_query.h \
+ r600_streamout.c \
+ r600_test_dma.c \
+ r600_texture.c \
+ r600_viewport.c \
+ radeon_uvd.c \
+ radeon_uvd.h \
+ radeon_vce.c \
+ radeon_vce.h \
+ radeon_video.c \
+ radeon_video.h
CXX_SOURCES = \
sb/sb_bc_builder.cpp \
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_asm.c mesa-17.3.3/src/gallium/drivers/r600/r600_asm.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_asm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_asm.c 2018-01-18 21:30:28.000000000 +0000
@@ -43,15 +43,11 @@
return alu->dst.write || alu->is_op3;
}
-static inline unsigned int r600_bytecode_get_num_operands(
- struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu)
{
return r600_isa_alu(alu->op)->src_count;
}
-int r700_bytecode_alu_build(struct r600_bytecode *bc,
- struct r600_bytecode_alu *alu, unsigned id);
-
static struct r600_bytecode_cf *r600_bytecode_cf(void)
{
struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
@@ -236,7 +232,7 @@
}
/* alu instructions that can ony exits once per group */
-static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_alu_once_inst(struct r600_bytecode_alu *alu)
{
return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
}
@@ -247,14 +243,14 @@
(r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
}
-static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_alu_mova_inst(struct r600_bytecode_alu *alu)
{
return r600_isa_alu(alu->op)->flags & AF_MOVA;
}
-static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int alu_uses_rel(struct r600_bytecode_alu *alu)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned src;
if (alu->dst.rel) {
@@ -274,9 +270,9 @@
return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
}
-static int alu_uses_lds(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int alu_uses_lds(struct r600_bytecode_alu *alu)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned src;
for (src = 0; src < num_src; ++src) {
@@ -287,7 +283,7 @@
return 0;
}
-static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_alu_64bit_inst(struct r600_bytecode_alu *alu)
{
const struct alu_op_info *op = r600_isa_alu(alu->op);
return (op->flags & AF_64);
@@ -312,7 +308,7 @@
return slots == AF_VS;
}
-static int is_nop_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_nop_inst(struct r600_bytecode_alu *alu)
{
return alu->op == ALU_OP0_NOP;
}
@@ -406,7 +402,8 @@
return 0;
}
-static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
+static int reserve_cfile(const struct r600_bytecode *bc,
+ struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
{
int res, num_res = 4;
if (bc->chip_class >= R700) {
@@ -448,12 +445,12 @@
sel <= V_SQ_ALU_SRC_LITERAL);
}
-static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
struct alu_bank_swizzle *bs, int bank_swizzle)
{
int r, src, num_src, sel, elem, cycle;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; src++) {
sel = alu->src[src].sel;
elem = alu->src[src].chan;
@@ -478,12 +475,12 @@
return 0;
}
-static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
struct alu_bank_swizzle *bs, int bank_swizzle)
{
int r, src, num_src, const_count, sel, elem, cycle;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (const_count = 0, src = 0; src < num_src; ++src) {
sel = alu->src[src].sel;
elem = alu->src[src].chan;
@@ -524,7 +521,7 @@
return 0;
}
-static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
+static int check_and_set_bank_swizzle(const struct r600_bytecode *bc,
struct r600_bytecode_alu *slots[5])
{
struct alu_bank_swizzle bs;
@@ -618,7 +615,7 @@
for (i = 0; i < max_slots; ++i) {
if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) {
- if (is_alu_64bit_inst(bc, prev[i])) {
+ if (is_alu_64bit_inst(prev[i])) {
gpr[i] = -1;
continue;
}
@@ -638,9 +635,9 @@
if (!alu)
continue;
- if (is_alu_64bit_inst(bc, alu))
+ if (is_alu_64bit_inst(alu))
continue;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; ++src) {
if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
continue;
@@ -703,10 +700,10 @@
}
/* compute how many literal are needed */
-static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu,
uint32_t literal[4], unsigned *nliteral)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned i, j;
for (i = 0; i < num_src; ++i) {
@@ -729,11 +726,10 @@
return 0;
}
-static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc,
- struct r600_bytecode_alu *alu,
- uint32_t literal[4], unsigned nliteral)
+static void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu,
+ uint32_t literal[4], unsigned nliteral)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned i, j;
for (i = 0; i < num_src; ++i) {
@@ -771,13 +767,13 @@
if (prev[i]) {
if (prev[i]->pred_sel)
return 0;
- if (is_alu_once_inst(bc, prev[i]))
+ if (is_alu_once_inst(prev[i]))
return 0;
}
if (slots[i]) {
if (slots[i]->pred_sel)
return 0;
- if (is_alu_once_inst(bc, slots[i]))
+ if (is_alu_once_inst(slots[i]))
return 0;
}
}
@@ -790,28 +786,28 @@
/* check number of literals */
if (prev[i]) {
- if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral))
+ if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral))
return 0;
- if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
+ if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral))
return 0;
- if (is_alu_mova_inst(bc, prev[i])) {
+ if (is_alu_mova_inst(prev[i])) {
if (have_rel)
return 0;
have_mova = 1;
}
- if (alu_uses_rel(bc, prev[i])) {
+ if (alu_uses_rel(prev[i])) {
if (have_mova) {
return 0;
}
have_rel = 1;
}
- if (alu_uses_lds(bc, prev[i]))
+ if (alu_uses_lds(prev[i]))
return 0;
- num_once_inst += is_alu_once_inst(bc, prev[i]);
+ num_once_inst += is_alu_once_inst(prev[i]);
}
- if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral))
+ if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral))
return 0;
/* Let's check used slots. */
@@ -821,7 +817,7 @@
} else if (prev[i] && slots[i]) {
if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
/* Trans unit is still free try to use it. */
- if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(bc, slots[i])) {
+ if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) {
result[i] = prev[i];
result[4] = slots[i];
} else if (is_alu_any_unit_inst(bc, prev[i])) {
@@ -850,20 +846,20 @@
}
alu = slots[i];
- num_once_inst += is_alu_once_inst(bc, alu);
+ num_once_inst += is_alu_once_inst(alu);
/* don't reschedule NOPs */
- if (is_nop_inst(bc, alu))
+ if (is_nop_inst(alu))
return 0;
- if (is_alu_mova_inst(bc, alu)) {
+ if (is_alu_mova_inst(alu)) {
if (have_rel) {
return 0;
}
have_mova = 1;
}
- if (alu_uses_rel(bc, alu)) {
+ if (alu_uses_rel(alu)) {
if (have_mova) {
return 0;
}
@@ -875,7 +871,7 @@
return 0; /* data hazard with MOVA */
/* Let's check source gprs */
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; ++src) {
/* Constants don't matter. */
@@ -1021,7 +1017,7 @@
return 0;
}
-static int r600_bytecode_assign_kcache_banks(struct r600_bytecode *bc,
+static int r600_bytecode_assign_kcache_banks(
struct r600_bytecode_alu *alu,
struct r600_bytecode_kcache * kcache)
{
@@ -1283,7 +1279,7 @@
for (i = 0, nliteral = 0; i < max_slots; i++) {
if (slots[i]) {
- r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral);
+ r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral);
if (r)
return r;
}
@@ -1629,7 +1625,8 @@
*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
/* common for r600/r700 - eg in eg_asm.c */
@@ -1748,11 +1745,11 @@
nliteral = 0;
memset(literal, 0, sizeof(literal));
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
- r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
+ r = r600_bytecode_alu_nliterals(alu, literal, &nliteral);
if (r)
return r;
- r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
- r600_bytecode_assign_kcache_banks(bc, alu, cf->kcache);
+ r600_bytecode_alu_adjust_literals(alu, literal, nliteral);
+ r600_bytecode_assign_kcache_banks(alu, cf->kcache);
switch(bc->chip_class) {
case R600:
@@ -2092,6 +2089,8 @@
bc->bytecode[id + 1], cfop->name);
fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
fprintf(stderr, "\n");
+ if (cf->end_of_program)
+ fprintf(stderr, "EOP ");
} else if (cfop->flags & CF_EXP) {
int o = 0;
const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
@@ -2189,7 +2188,7 @@
const struct alu_op_info *aop = r600_isa_alu(alu->op);
int o = 0;
- r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
+ r600_bytecode_alu_nliterals(alu, literal, &nliteral);
o += fprintf(stderr, " %04d %08X %08X ", id, bc->bytecode[id], bc->bytecode[id+1]);
if (last)
o += fprintf(stderr, "%4d ", ++ngr);
@@ -2379,6 +2378,12 @@
*endian = r600_endian_swap(16);
return;
}
+
+ if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) {
+ *format = FMT_1_5_5_5;
+ *endian = r600_endian_swap(16);
+ return;
+ }
desc = util_format_description(pformat);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_blit.c mesa-17.3.3/src/gallium/drivers/r600/r600_blit.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_blit.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_blit.c 2018-01-18 21:30:28.000000000 +0000
@@ -377,7 +377,7 @@
r600_blit_decompress_depth_in_place(rctx, rtex, false,
level, level,
first_layer, last_layer);
- if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+ if (rtex->surface.has_stencil) {
r600_blit_decompress_depth_in_place(rctx, rtex, true,
level, level,
first_layer, last_layer);
@@ -443,8 +443,7 @@
* array are clear to different value. To simplify code just
* disable fast clear for texture array.
*/
- /* Only use htile for first level */
- if (rtex->htile_offset && !level &&
+ if (r600_htile_enabled(rtex, level) &&
fb->zsbuf->u.tex.first_layer == 0 &&
fb->zsbuf->u.tex.last_layer == util_max_layer(&rtex->resource.b.b, level)) {
if (rtex->depth_clear_value != depth) {
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_buffer_common.c mesa-17.3.3/src/gallium/drivers/r600/r600_buffer_common.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_buffer_common.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_buffer_common.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,685 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Marek Olšák
+ */
+
+#include "r600_cs.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include
+#include
+
+bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
+ struct pb_buffer *buf,
+ enum radeon_bo_usage usage)
+{
+ if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
+ return true;
+ }
+ if (radeon_emitted(ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
+ return true;
+ }
+ return false;
+}
+
+void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+ struct r600_resource *resource,
+ unsigned usage)
+{
+ enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
+ bool busy = false;
+
+ assert(!(resource->flags & RADEON_FLAG_SPARSE));
+
+ if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+ return ctx->ws->buffer_map(resource->buf, NULL, usage);
+ }
+
+ if (!(usage & PIPE_TRANSFER_WRITE)) {
+ /* have to wait for the last write */
+ rusage = RADEON_USAGE_WRITE;
+ }
+
+ if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
+ resource->buf, rusage)) {
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ return NULL;
+ } else {
+ ctx->gfx.flush(ctx, 0, NULL);
+ busy = true;
+ }
+ }
+ if (radeon_emitted(ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
+ resource->buf, rusage)) {
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ return NULL;
+ } else {
+ ctx->dma.flush(ctx, 0, NULL);
+ busy = true;
+ }
+ }
+
+ if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ return NULL;
+ } else {
+ /* We will be wait for the GPU. Wait for any offloaded
+ * CS flush to complete to avoid busy-waiting in the winsys. */
+ ctx->ws->cs_sync_flush(ctx->gfx.cs);
+ if (ctx->dma.cs)
+ ctx->ws->cs_sync_flush(ctx->dma.cs);
+ }
+ }
+
+ /* Setting the CS to NULL will prevent doing checks we have done already. */
+ return ctx->ws->buffer_map(resource->buf, NULL, usage);
+}
+
+void r600_init_resource_fields(struct r600_common_screen *rscreen,
+ struct r600_resource *res,
+ uint64_t size, unsigned alignment)
+{
+ struct r600_texture *rtex = (struct r600_texture*)res;
+
+ res->bo_size = size;
+ res->bo_alignment = alignment;
+ res->flags = 0;
+ res->texture_handle_allocated = false;
+ res->image_handle_allocated = false;
+
+ switch (res->b.b.usage) {
+ case PIPE_USAGE_STREAM:
+ res->flags = RADEON_FLAG_GTT_WC;
+ /* fall through */
+ case PIPE_USAGE_STAGING:
+ /* Transfers are likely to occur more often with these
+ * resources. */
+ res->domains = RADEON_DOMAIN_GTT;
+ break;
+ case PIPE_USAGE_DYNAMIC:
+ /* Older kernels didn't always flush the HDP cache before
+ * CS execution
+ */
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40) {
+ res->domains = RADEON_DOMAIN_GTT;
+ res->flags |= RADEON_FLAG_GTT_WC;
+ break;
+ }
+ /* fall through */
+ case PIPE_USAGE_DEFAULT:
+ case PIPE_USAGE_IMMUTABLE:
+ default:
+ /* Not listing GTT here improves performance in some
+ * apps. */
+ res->domains = RADEON_DOMAIN_VRAM;
+ res->flags |= RADEON_FLAG_GTT_WC;
+ break;
+ }
+
+ if (res->b.b.target == PIPE_BUFFER &&
+ res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
+ PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
+ /* Use GTT for all persistent mappings with older
+ * kernels, because they didn't always flush the HDP
+ * cache before CS execution.
+ *
+ * Write-combined CPU mappings are fine, the kernel
+ * ensures all CPU writes finish before the GPU
+ * executes a command stream.
+ */
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40)
+ res->domains = RADEON_DOMAIN_GTT;
+ }
+
+ /* Tiled textures are unmappable. Always put them in VRAM. */
+ if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
+ res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
+ res->domains = RADEON_DOMAIN_VRAM;
+ res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
+ RADEON_FLAG_GTT_WC;
+ }
+
+ /* Only displayable single-sample textures can be shared between
+ * processes. */
+ if (res->b.b.target == PIPE_BUFFER ||
+ res->b.b.nr_samples >= 2 ||
+ (rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY &&
+ /* Raven doesn't use display micro mode for 32bpp, so check this: */
+ !(res->b.b.bind & PIPE_BIND_SCANOUT)))
+ res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
+ /* If VRAM is just stolen system memory, allow both VRAM and
+ * GTT, whichever has free space. If a buffer is evicted from
+ * VRAM to GTT, it will stay there.
+ *
+ * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
+ * placements even with a low amount of stolen VRAM.
+ */
+ if (!rscreen->info.has_dedicated_vram &&
+ (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
+ res->domains == RADEON_DOMAIN_VRAM) {
+ res->domains = RADEON_DOMAIN_VRAM_GTT;
+ res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */
+ }
+
+ if (rscreen->debug_flags & DBG_NO_WC)
+ res->flags &= ~RADEON_FLAG_GTT_WC;
+
+ if (res->b.b.bind & PIPE_BIND_SHARED)
+ res->flags |= RADEON_FLAG_NO_SUBALLOC;
+
+ /* Set expected VRAM and GART usage for the buffer. */
+ res->vram_usage = 0;
+ res->gart_usage = 0;
+
+ if (res->domains & RADEON_DOMAIN_VRAM)
+ res->vram_usage = size;
+ else if (res->domains & RADEON_DOMAIN_GTT)
+ res->gart_usage = size;
+}
+
+bool r600_alloc_resource(struct r600_common_screen *rscreen,
+ struct r600_resource *res)
+{
+ struct pb_buffer *old_buf, *new_buf;
+
+ /* Allocate a new resource. */
+ new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size,
+ res->bo_alignment,
+ res->domains, res->flags);
+ if (!new_buf) {
+ return false;
+ }
+
+ /* Replace the pointer such that if res->buf wasn't NULL, it won't be
+ * NULL. This should prevent crashes with multiple contexts using
+ * the same buffer where one of the contexts invalidates it while
+ * the others are using it. */
+ old_buf = res->buf;
+ res->buf = new_buf; /* should be atomic */
+
+ if (rscreen->info.has_virtual_memory)
+ res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
+ else
+ res->gpu_address = 0;
+
+ pb_reference(&old_buf, NULL);
+
+ util_range_set_empty(&res->valid_buffer_range);
+
+ /* Print debug information. */
+ if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
+ fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
+ res->gpu_address, res->gpu_address + res->buf->size,
+ res->buf->size);
+ }
+ return true;
+}
+
+static void r600_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_resource *buf)
+{
+ struct r600_resource *rbuffer = r600_resource(buf);
+
+ threaded_resource_deinit(buf);
+ util_range_destroy(&rbuffer->valid_buffer_range);
+ pb_reference(&rbuffer->buf, NULL);
+ FREE(rbuffer);
+}
+
+static bool
+r600_invalidate_buffer(struct r600_common_context *rctx,
+ struct r600_resource *rbuffer)
+{
+ /* Shared buffers can't be reallocated. */
+ if (rbuffer->b.is_shared)
+ return false;
+
+ /* Sparse buffers can't be reallocated. */
+ if (rbuffer->flags & RADEON_FLAG_SPARSE)
+ return false;
+
+ /* In AMD_pinned_memory, the user pointer association only gets
+ * broken when the buffer is explicitly re-allocated.
+ */
+ if (rbuffer->b.is_user_ptr)
+ return false;
+
+ /* Check if mapping this buffer would cause waiting for the GPU. */
+ if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+ rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
+ } else {
+ util_range_set_empty(&rbuffer->valid_buffer_range);
+ }
+
+ return true;
+}
+
+/* Replace the storage of dst with src. */
+void r600_replace_buffer_storage(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_resource *rdst = r600_resource(dst);
+ struct r600_resource *rsrc = r600_resource(src);
+ uint64_t old_gpu_address = rdst->gpu_address;
+
+ pb_reference(&rdst->buf, rsrc->buf);
+ rdst->gpu_address = rsrc->gpu_address;
+ rdst->b.b.bind = rsrc->b.b.bind;
+ rdst->flags = rsrc->flags;
+
+ assert(rdst->vram_usage == rsrc->vram_usage);
+ assert(rdst->gart_usage == rsrc->gart_usage);
+ assert(rdst->bo_size == rsrc->bo_size);
+ assert(rdst->bo_alignment == rsrc->bo_alignment);
+ assert(rdst->domains == rsrc->domains);
+
+ rctx->rebind_buffer(ctx, dst, old_gpu_address);
+}
+
+void r600_invalidate_resource(struct pipe_context *ctx,
+ struct pipe_resource *resource)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_resource *rbuffer = r600_resource(resource);
+
+ /* We currently only do anyting here for buffers */
+ if (resource->target == PIPE_BUFFER)
+ (void)r600_invalidate_buffer(rctx, rbuffer);
+}
+
+static void *r600_buffer_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer,
+ void *data, struct r600_resource *staging,
+ unsigned offset)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_transfer *transfer;
+
+ if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
+ transfer = slab_alloc(&rctx->pool_transfers_unsync);
+ else
+ transfer = slab_alloc(&rctx->pool_transfers);
+
+ transfer->b.b.resource = NULL;
+ pipe_resource_reference(&transfer->b.b.resource, resource);
+ transfer->b.b.level = 0;
+ transfer->b.b.usage = usage;
+ transfer->b.b.box = *box;
+ transfer->b.b.stride = 0;
+ transfer->b.b.layer_stride = 0;
+ transfer->b.staging = NULL;
+ transfer->offset = offset;
+ transfer->staging = staging;
+ *ptransfer = &transfer->b.b;
+ return data;
+}
+
+static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
+ unsigned dstx, unsigned srcx, unsigned size)
+{
+ bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
+
+ return rctx->screen->has_cp_dma ||
+ (dword_aligned && (rctx->dma.cs ||
+ rctx->screen->has_streamout));
+
+}
+
+static void *r600_buffer_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
+ struct r600_resource *rbuffer = r600_resource(resource);
+ uint8_t *data;
+
+ assert(box->x + box->width <= resource->width0);
+
+ /* From GL_AMD_pinned_memory issues:
+ *
+ * 4) Is glMapBuffer on a shared buffer guaranteed to return the
+ * same system address which was specified at creation time?
+ *
+ * RESOLVED: NO. The GL implementation might return a different
+ * virtual mapping of that memory, although the same physical
+ * page will be used.
+ *
+ * So don't ever use staging buffers.
+ */
+ if (rbuffer->b.is_user_ptr)
+ usage |= PIPE_TRANSFER_PERSISTENT;
+
+ /* See if the buffer range being mapped has never been initialized,
+ * in which case it can be mapped unsynchronized. */
+ if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
+ usage & PIPE_TRANSFER_WRITE &&
+ !rbuffer->b.is_shared &&
+ !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ }
+
+ /* If discarding the entire range, discard the whole resource instead. */
+ if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
+ box->x == 0 && box->width == resource->width0) {
+ usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
+ !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ TC_TRANSFER_MAP_NO_INVALIDATE))) {
+ assert(usage & PIPE_TRANSFER_WRITE);
+
+ if (r600_invalidate_buffer(rctx, rbuffer)) {
+ /* At this point, the buffer is always idle. */
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ } else {
+ /* Fall back to a temporary buffer. */
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ }
+ }
+
+ if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+ !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
+ ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_PERSISTENT)) &&
+ r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
+ (rbuffer->flags & RADEON_FLAG_SPARSE))) {
+ assert(usage & PIPE_TRANSFER_WRITE);
+
+ /* Check if mapping this buffer would cause waiting for the GPU.
+ */
+ if (rbuffer->flags & RADEON_FLAG_SPARSE ||
+ r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+ /* Do a wait-free write-only transfer using a temporary buffer. */
+ unsigned offset;
+ struct r600_resource *staging = NULL;
+
+ u_upload_alloc(ctx->stream_uploader, 0,
+ box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
+ rctx->screen->info.tcc_cache_line_size,
+ &offset, (struct pipe_resource**)&staging,
+ (void**)&data);
+
+ if (staging) {
+ data += box->x % R600_MAP_BUFFER_ALIGNMENT;
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
+ ptransfer, data, staging, offset);
+ } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+ return NULL;
+ }
+ } else {
+ /* At this point, the buffer is always idle (we checked it above). */
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ }
+ }
+ /* Use a staging buffer in cached GTT for reads. */
+ else if (((usage & PIPE_TRANSFER_READ) &&
+ !(usage & PIPE_TRANSFER_PERSISTENT) &&
+ (rbuffer->domains & RADEON_DOMAIN_VRAM ||
+ rbuffer->flags & RADEON_FLAG_GTT_WC) &&
+ r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) ||
+ (rbuffer->flags & RADEON_FLAG_SPARSE)) {
+ struct r600_resource *staging;
+
+ assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
+ staging = (struct r600_resource*) pipe_buffer_create(
+ ctx->screen, 0, PIPE_USAGE_STAGING,
+ box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
+ if (staging) {
+ /* Copy the VRAM buffer to the staging buffer. */
+ rctx->dma_copy(ctx, &staging->b.b, 0,
+ box->x % R600_MAP_BUFFER_ALIGNMENT,
+ 0, 0, resource, 0, box);
+
+ data = r600_buffer_map_sync_with_rings(rctx, staging,
+ usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
+ if (!data) {
+ r600_resource_reference(&staging, NULL);
+ return NULL;
+ }
+ data += box->x % R600_MAP_BUFFER_ALIGNMENT;
+
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
+ ptransfer, data, staging, 0);
+ } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+ return NULL;
+ }
+ }
+
+ data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
+ if (!data) {
+ return NULL;
+ }
+ data += box->x;
+
+ return r600_buffer_get_transfer(ctx, resource, usage, box,
+ ptransfer, data, NULL, 0);
+}
+
+static void r600_buffer_do_flush_region(struct pipe_context *ctx,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+ struct r600_resource *rbuffer = r600_resource(transfer->resource);
+
+ if (rtransfer->staging) {
+ struct pipe_resource *dst, *src;
+ unsigned soffset;
+ struct pipe_box dma_box;
+
+ dst = transfer->resource;
+ src = &rtransfer->staging->b.b;
+ soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT;
+
+ u_box_1d(soffset, box->width, &dma_box);
+
+ /* Copy the staging buffer into the original one. */
+ ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
+ }
+
+ util_range_add(&rbuffer->valid_buffer_range, box->x,
+ box->x + box->width);
+}
+
+static void r600_buffer_flush_region(struct pipe_context *ctx,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *rel_box)
+{
+ unsigned required_usage = PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_FLUSH_EXPLICIT;
+
+ if ((transfer->usage & required_usage) == required_usage) {
+ struct pipe_box box;
+
+ u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
+ r600_buffer_do_flush_region(ctx, transfer, &box);
+ }
+}
+
+static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+
+ if (transfer->usage & PIPE_TRANSFER_WRITE &&
+ !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
+
+ r600_resource_reference(&rtransfer->staging, NULL);
+ assert(rtransfer->b.staging == NULL); /* for threaded context only */
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ /* Don't use pool_transfers_unsync. We are always in the driver
+ * thread. */
+ slab_free(&rctx->pool_transfers, transfer);
+}
+
+void r600_buffer_subdata(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned usage, unsigned offset,
+ unsigned size, const void *data)
+{
+ struct pipe_transfer *transfer = NULL;
+ struct pipe_box box;
+ uint8_t *map = NULL;
+
+ u_box_1d(offset, size, &box);
+ map = r600_buffer_transfer_map(ctx, buffer, 0,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_DISCARD_RANGE |
+ usage,
+ &box, &transfer);
+ if (!map)
+ return;
+
+ memcpy(map, data, size);
+ r600_buffer_transfer_unmap(ctx, transfer);
+}
+
+static const struct u_resource_vtbl r600_buffer_vtbl =
+{
+ NULL, /* get_handle */
+ r600_buffer_destroy, /* resource_destroy */
+ r600_buffer_transfer_map, /* transfer_map */
+ r600_buffer_flush_region, /* transfer_flush_region */
+ r600_buffer_transfer_unmap, /* transfer_unmap */
+};
+
+static struct r600_resource *
+r600_alloc_buffer_struct(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct r600_resource *rbuffer;
+
+ rbuffer = MALLOC_STRUCT(r600_resource);
+
+ rbuffer->b.b = *templ;
+ rbuffer->b.b.next = NULL;
+ pipe_reference_init(&rbuffer->b.b.reference, 1);
+ rbuffer->b.b.screen = screen;
+
+ rbuffer->b.vtbl = &r600_buffer_vtbl;
+ threaded_resource_init(&rbuffer->b.b);
+
+ rbuffer->buf = NULL;
+ rbuffer->bind_history = 0;
+ util_range_init(&rbuffer->valid_buffer_range);
+ return rbuffer;
+}
+
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ unsigned alignment)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
+
+ r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
+
+ if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
+ rbuffer->flags |= RADEON_FLAG_SPARSE;
+
+ if (!r600_alloc_resource(rscreen, rbuffer)) {
+ FREE(rbuffer);
+ return NULL;
+ }
+ return &rbuffer->b.b;
+}
+
+struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned flags,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment)
+{
+ struct pipe_resource buffer;
+
+ memset(&buffer, 0, sizeof buffer);
+ buffer.target = PIPE_BUFFER;
+ buffer.format = PIPE_FORMAT_R8_UNORM;
+ buffer.bind = 0;
+ buffer.usage = usage;
+ buffer.flags = flags;
+ buffer.width0 = size;
+ buffer.height0 = 1;
+ buffer.depth0 = 1;
+ buffer.array_size = 1;
+ return r600_buffer_create(screen, &buffer, alignment);
+}
+
+struct pipe_resource *
+r600_buffer_from_user_memory(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ void *user_memory)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
+
+ rbuffer->domains = RADEON_DOMAIN_GTT;
+ rbuffer->flags = 0;
+ rbuffer->b.is_user_ptr = true;
+ util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
+ util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
+
+ /* Convert a user pointer to a buffer. */
+ rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
+ if (!rbuffer->buf) {
+ FREE(rbuffer);
+ return NULL;
+ }
+
+ if (rscreen->info.has_virtual_memory)
+ rbuffer->gpu_address =
+ ws->buffer_get_virtual_address(rbuffer->buf);
+ else
+ rbuffer->gpu_address = 0;
+
+ rbuffer->vram_usage = 0;
+ rbuffer->gart_usage = templ->width0;
+
+ return &rbuffer->b.b;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_cs.h mesa-17.3.3/src/gallium/drivers/r600/r600_cs.h
--- mesa-17.2.4/src/gallium/drivers/r600/r600_cs.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_cs.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ */
+
+/**
+ * This file contains helpers for writing commands to commands streams.
+ */
+
+#ifndef R600_CS_H
+#define R600_CS_H
+
+#include "r600_pipe_common.h"
+#include "r600d_common.h"
+
+/**
+ * Return true if there is enough memory in VRAM and GTT for the buffers
+ * added so far.
+ *
+ * \param vram VRAM memory size not added to the buffer list yet
+ * \param gtt GTT memory size not added to the buffer list yet
+ */
+static inline bool
+radeon_cs_memory_below_limit(struct r600_common_screen *screen,
+ struct radeon_winsys_cs *cs,
+ uint64_t vram, uint64_t gtt)
+{
+ vram += cs->used_vram;
+ gtt += cs->used_gart;
+
+ /* Anything that goes above the VRAM size should go to GTT. */
+ if (vram > screen->info.vram_size)
+ gtt += vram - screen->info.vram_size;
+
+ /* Now we just need to check if we have enough GTT. */
+ return gtt < screen->info.gart_size * 0.7;
+}
+
+/**
+ * Add a buffer to the buffer list for the given command stream (CS).
+ *
+ * All buffers used by a CS must be added to the list. This tells the kernel
+ * driver which buffers are used by GPU commands. Other buffers can
+ * be swapped out (not accessible) during execution.
+ *
+ * The buffer list becomes empty after every context flush and must be
+ * rebuilt.
+ */
+static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
+ struct r600_ring *ring,
+ struct r600_resource *rbo,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_priority priority)
+{
+ assert(usage);
+ return rctx->ws->cs_add_buffer(
+ ring->cs, rbo->buf,
+ (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
+ rbo->domains, priority) * 4;
+}
+
+/**
+ * Same as above, but also checks memory usage and flushes the context
+ * accordingly.
+ *
+ * When this SHOULD NOT be used:
+ *
+ * - if r600_context_add_resource_size has been called for the buffer
+ * followed by *_need_cs_space for checking the memory usage
+ *
+ * - if r600_need_dma_space has been called for the buffer
+ *
+ * - when emitting state packets and draw packets (because preceding packets
+ * can't be re-emitted at that point)
+ *
+ * - if shader resource "enabled_mask" is not up-to-date or there is
+ * a different constraint disallowing a context flush
+ */
+static inline unsigned
+radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
+ struct r600_ring *ring,
+ struct r600_resource *rbo,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_priority priority,
+ bool check_mem)
+{
+ if (check_mem &&
+ !radeon_cs_memory_below_limit(rctx->screen, ring->cs,
+ rctx->vram + rbo->vram_usage,
+ rctx->gtt + rbo->gart_usage))
+ ring->flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+
+ return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
+}
+
+static inline void r600_emit_reloc(struct r600_common_context *rctx,
+ struct r600_ring *ring, struct r600_resource *rbo,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_priority priority)
+{
+ struct radeon_winsys_cs *cs = ring->cs;
+ bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
+ unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
+
+ if (!has_vm) {
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, reloc);
+ }
+}
+
+static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+ assert(reg < R600_CONTEXT_REG_OFFSET);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
+ radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
+}
+
+static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+ radeon_set_config_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+ assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
+ radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
+}
+
+static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+ radeon_set_context_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs,
+ unsigned reg, unsigned idx,
+ unsigned value)
+{
+ assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(cs->current.cdw + 3 <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
+ radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+ assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
+ radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
+}
+
+static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+ radeon_set_sh_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+ assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+ assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
+ radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
+}
+
+static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+ radeon_set_uconfig_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
+}
+
+static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
+ unsigned reg, unsigned idx,
+ unsigned value)
+{
+ assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+ assert(cs->current.cdw + 3 <= cs->current.max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0));
+ radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, value);
+}
+
+#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600d_common.h mesa-17.3.3/src/gallium/drivers/r600/r600d_common.h
--- mesa-17.2.4/src/gallium/drivers/r600/r600d_common.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600d_common.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ */
+
+#ifndef R600D_COMMON_H
+#define R600D_COMMON_H
+
+#define R600_CONFIG_REG_OFFSET 0x08000
+#define R600_CONTEXT_REG_OFFSET 0x28000
+#define SI_SH_REG_OFFSET 0x0000B000
+#define SI_SH_REG_END 0x0000C000
+#define CIK_UCONFIG_REG_OFFSET 0x00030000
+#define CIK_UCONFIG_REG_END 0x00038000
+
+#define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
+#define PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
+#define PKT3_IT_OPCODE_S(x) (((unsigned)(x) & 0xFF) << 8)
+#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
+
+#define PKT3_NOP 0x10
+#define PKT3_SET_PREDICATION 0x20
+#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
+#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
+#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x) & 0x3) << 1)
+#define STRMOUT_OFFSET_FROM_PACKET 0
+#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
+#define STRMOUT_OFFSET_FROM_MEM 2
+#define STRMOUT_OFFSET_NONE 3
+#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x) & 0x3) << 8)
+#define PKT3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_EQUAL 3
+#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4)
+#define PKT3_COPY_DATA 0x40
+#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
+#define COPY_DATA_REG 0
+#define COPY_DATA_MEM 1
+#define COPY_DATA_PERF 4
+#define COPY_DATA_IMM 5
+#define COPY_DATA_TIMESTAMP 9
+#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8)
+#define COPY_DATA_MEM_ASYNC 5
+#define COPY_DATA_COUNT_SEL (1 << 16)
+#define COPY_DATA_WR_CONFIRM (1 << 20)
+#define PKT3_EVENT_WRITE 0x46
+#define PKT3_EVENT_WRITE_EOP 0x47
+#define EOP_INT_SEL(x) ((x) << 24)
+#define EOP_INT_SEL_NONE 0
+#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
+#define EOP_DATA_SEL(x) ((x) << 29)
+#define EOP_DATA_SEL_DISCARD 0
+#define EOP_DATA_SEL_VALUE_32BIT 1
+#define EOP_DATA_SEL_VALUE_64BIT 2
+#define EOP_DATA_SEL_TIMESTAMP 3
+#define PKT3_RELEASE_MEM 0x49 /* GFX9+ */
+#define PKT3_SET_CONFIG_REG 0x68
+#define PKT3_SET_CONTEXT_REG 0x69
+#define PKT3_STRMOUT_BASE_UPDATE 0x72 /* r700 only */
+#define PKT3_SURFACE_BASE_UPDATE 0x73 /* r600 only */
+#define SURFACE_BASE_UPDATE_DEPTH (1 << 0)
+#define SURFACE_BASE_UPDATE_COLOR(x) (2 << (x))
+#define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
+#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
+#define PKT3_SET_SH_REG 0x76 /* SI and later */
+#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */
+
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS3 0x3 /* EG and later */
+#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
+#define EVENT_TYPE_ZPASS_DONE 0x15
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
+#define EVENT_TYPE_PERFCOUNTER_START 0x17
+#define EVENT_TYPE_PERFCOUNTER_STOP 0x18
+#define EVENT_TYPE_PIPELINESTAT_START 25
+#define EVENT_TYPE_PIPELINESTAT_STOP 26
+#define EVENT_TYPE_PERFCOUNTER_SAMPLE 0x1B
+#define EVENT_TYPE_SAMPLE_PIPELINESTAT 30
+#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
+#define EVENT_TYPE_BOTTOM_OF_PIPE_TS 40
+#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
+#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ * 5 - TS events
+ */
+
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_PRIMCOUNT 0x2
+#define PREDICATION_OP_BOOL64 0x3
+#define PRED_OP(x) ((x) << 16)
+#define PREDICATION_CONTINUE (1 << 31)
+#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
+#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
+#define PREDICATION_DRAW_VISIBLE (1 << 8)
+
+#define V_0280A0_SWAP_STD 0x00000000
+#define V_0280A0_SWAP_ALT 0x00000001
+#define V_0280A0_SWAP_STD_REV 0x00000002
+#define V_0280A0_SWAP_ALT_REV 0x00000003
+
+#define EG_S_028C70_FAST_CLEAR(x) (((unsigned)(x) & 0x1) << 17)
+#define SI_S_028C70_FAST_CLEAR(x) (((unsigned)(x) & 0x1) << 13)
+
+#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_gpu_load.c mesa-17.3.3/src/gallium/drivers/r600/r600_gpu_load.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_gpu_load.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_gpu_load.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,263 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ *
+ */
+
+/* The GPU load is measured as follows.
+ *
+ * There is a thread which samples the GRBM_STATUS register at a certain
+ * frequency and the "busy" or "idle" counter is incremented based on
+ * whether the GUI_ACTIVE bit is set or not.
+ *
+ * Then, the user can sample the counters twice and calculate the average
+ * GPU load between the two samples.
+ */
+
+#include "r600_pipe_common.h"
+#include "r600_query.h"
+#include "os/os_time.h"
+
+/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
+ * fps (there are too few samples per frame). */
+#define SAMPLES_PER_SEC 10000
+
+#define GRBM_STATUS 0x8010
+#define TA_BUSY(x) (((x) >> 14) & 0x1)
+#define GDS_BUSY(x) (((x) >> 15) & 0x1)
+#define VGT_BUSY(x) (((x) >> 17) & 0x1)
+#define IA_BUSY(x) (((x) >> 19) & 0x1)
+#define SX_BUSY(x) (((x) >> 20) & 0x1)
+#define WD_BUSY(x) (((x) >> 21) & 0x1)
+#define SPI_BUSY(x) (((x) >> 22) & 0x1)
+#define BCI_BUSY(x) (((x) >> 23) & 0x1)
+#define SC_BUSY(x) (((x) >> 24) & 0x1)
+#define PA_BUSY(x) (((x) >> 25) & 0x1)
+#define DB_BUSY(x) (((x) >> 26) & 0x1)
+#define CP_BUSY(x) (((x) >> 29) & 0x1)
+#define CB_BUSY(x) (((x) >> 30) & 0x1)
+#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)
+
+#define SRBM_STATUS2 0x0e4c
+#define SDMA_BUSY(x) (((x) >> 5) & 0x1)
+
+#define CP_STAT 0x8680
+#define PFP_BUSY(x) (((x) >> 15) & 0x1)
+#define MEQ_BUSY(x) (((x) >> 16) & 0x1)
+#define ME_BUSY(x) (((x) >> 17) & 0x1)
+#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
+#define DMA_BUSY(x) (((x) >> 22) & 0x1)
+#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
+
+#define IDENTITY(x) x
+
+#define UPDATE_COUNTER(field, mask) \
+ do { \
+ if (mask(value)) \
+ p_atomic_inc(&counters->named.field.busy); \
+ else \
+ p_atomic_inc(&counters->named.field.idle); \
+ } while (0)
+
+static void r600_update_mmio_counters(struct r600_common_screen *rscreen,
+ union r600_mmio_counters *counters)
+{
+ uint32_t value = 0;
+ bool gui_busy, sdma_busy = false;
+
+ /* GRBM_STATUS */
+ rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
+
+ UPDATE_COUNTER(ta, TA_BUSY);
+ UPDATE_COUNTER(gds, GDS_BUSY);
+ UPDATE_COUNTER(vgt, VGT_BUSY);
+ UPDATE_COUNTER(ia, IA_BUSY);
+ UPDATE_COUNTER(sx, SX_BUSY);
+ UPDATE_COUNTER(wd, WD_BUSY);
+ UPDATE_COUNTER(spi, SPI_BUSY);
+ UPDATE_COUNTER(bci, BCI_BUSY);
+ UPDATE_COUNTER(sc, SC_BUSY);
+ UPDATE_COUNTER(pa, PA_BUSY);
+ UPDATE_COUNTER(db, DB_BUSY);
+ UPDATE_COUNTER(cp, CP_BUSY);
+ UPDATE_COUNTER(cb, CB_BUSY);
+ UPDATE_COUNTER(gui, GUI_ACTIVE);
+ gui_busy = GUI_ACTIVE(value);
+
+ value = gui_busy || sdma_busy;
+ UPDATE_COUNTER(gpu, IDENTITY);
+}
+
+#undef UPDATE_COUNTER
+
+static int
+r600_gpu_load_thread(void *param)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
+ const int period_us = 1000000 / SAMPLES_PER_SEC;
+ int sleep_us = period_us;
+ int64_t cur_time, last_time = os_time_get();
+
+ while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
+ if (sleep_us)
+ os_time_sleep(sleep_us);
+
+ /* Make sure we sleep the ideal amount of time to match
+ * the expected frequency. */
+ cur_time = os_time_get();
+
+ if (os_time_timeout(last_time, last_time + period_us,
+ cur_time))
+ sleep_us = MAX2(sleep_us - 1, 1);
+ else
+ sleep_us += 1;
+
+ /*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
+ last_time = cur_time;
+
+ /* Update the counters. */
+ r600_update_mmio_counters(rscreen, &rscreen->mmio_counters);
+ }
+ p_atomic_dec(&rscreen->gpu_load_stop_thread);
+ return 0;
+}
+
+void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
+{
+ if (!rscreen->gpu_load_thread)
+ return;
+
+ p_atomic_inc(&rscreen->gpu_load_stop_thread);
+ thrd_join(rscreen->gpu_load_thread, NULL);
+ rscreen->gpu_load_thread = 0;
+}
+
+static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen,
+ unsigned busy_index)
+{
+ /* Start the thread if needed. */
+ if (!rscreen->gpu_load_thread) {
+ mtx_lock(&rscreen->gpu_load_mutex);
+ /* Check again inside the mutex. */
+ if (!rscreen->gpu_load_thread)
+ rscreen->gpu_load_thread =
+ u_thread_create(r600_gpu_load_thread, rscreen);
+ mtx_unlock(&rscreen->gpu_load_mutex);
+ }
+
+ unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]);
+ unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]);
+
+ return busy | ((uint64_t)idle << 32);
+}
+
+static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen,
+ uint64_t begin, unsigned busy_index)
+{
+ uint64_t end = r600_read_mmio_counter(rscreen, busy_index);
+ unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
+ unsigned idle = (end >> 32) - (begin >> 32);
+
+ /* Calculate the % of time the busy counter was being incremented.
+ *
+ * If no counters were incremented, return the current counter status.
+ * It's for the case when the load is queried faster than
+ * the counters are updated.
+ */
+ if (idle || busy) {
+ return busy*100 / (busy + idle);
+ } else {
+ union r600_mmio_counters counters;
+
+ memset(&counters, 0, sizeof(counters));
+ r600_update_mmio_counters(rscreen, &counters);
+ return counters.array[busy_index] ? 100 : 0;
+ }
+}
+
+#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \
+ rscreen->mmio_counters.array)
+
+static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
+ unsigned type)
+{
+ switch (type) {
+ case R600_QUERY_GPU_LOAD:
+ return BUSY_INDEX(rscreen, gpu);
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ return BUSY_INDEX(rscreen, spi);
+ case R600_QUERY_GPU_TA_BUSY:
+ return BUSY_INDEX(rscreen, ta);
+ case R600_QUERY_GPU_GDS_BUSY:
+ return BUSY_INDEX(rscreen, gds);
+ case R600_QUERY_GPU_VGT_BUSY:
+ return BUSY_INDEX(rscreen, vgt);
+ case R600_QUERY_GPU_IA_BUSY:
+ return BUSY_INDEX(rscreen, ia);
+ case R600_QUERY_GPU_SX_BUSY:
+ return BUSY_INDEX(rscreen, sx);
+ case R600_QUERY_GPU_WD_BUSY:
+ return BUSY_INDEX(rscreen, wd);
+ case R600_QUERY_GPU_BCI_BUSY:
+ return BUSY_INDEX(rscreen, bci);
+ case R600_QUERY_GPU_SC_BUSY:
+ return BUSY_INDEX(rscreen, sc);
+ case R600_QUERY_GPU_PA_BUSY:
+ return BUSY_INDEX(rscreen, pa);
+ case R600_QUERY_GPU_DB_BUSY:
+ return BUSY_INDEX(rscreen, db);
+ case R600_QUERY_GPU_CP_BUSY:
+ return BUSY_INDEX(rscreen, cp);
+ case R600_QUERY_GPU_CB_BUSY:
+ return BUSY_INDEX(rscreen, cb);
+ case R600_QUERY_GPU_SDMA_BUSY:
+ return BUSY_INDEX(rscreen, sdma);
+ case R600_QUERY_GPU_PFP_BUSY:
+ return BUSY_INDEX(rscreen, pfp);
+ case R600_QUERY_GPU_MEQ_BUSY:
+ return BUSY_INDEX(rscreen, meq);
+ case R600_QUERY_GPU_ME_BUSY:
+ return BUSY_INDEX(rscreen, me);
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ return BUSY_INDEX(rscreen, surf_sync);
+ case R600_QUERY_GPU_CP_DMA_BUSY:
+ return BUSY_INDEX(rscreen, cp_dma);
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ return BUSY_INDEX(rscreen, scratch_ram);
+ default:
+ unreachable("invalid query type");
+ }
+}
+
+uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
+{
+ unsigned busy_index = busy_index_from_type(rscreen, type);
+ return r600_read_mmio_counter(rscreen, busy_index);
+}
+
+unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
+ uint64_t begin)
+{
+ unsigned busy_index = busy_index_from_type(rscreen, type);
+ return r600_end_mmio_counter(rscreen, begin, busy_index);
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_hw_context.c mesa-17.3.3/src/gallium/drivers/r600/r600_hw_context.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_hw_context.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_hw_context.c 2018-01-18 21:30:28.000000000 +0000
@@ -284,7 +284,7 @@
if (ctx->is_debug) {
/* Save the IB for debug contexts. */
radeon_clear_saved_cs(&ctx->last_gfx);
- radeon_save_cs(ws, cs, &ctx->last_gfx);
+ radeon_save_cs(ws, cs, &ctx->last_gfx, true);
r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
r600_resource_reference(&ctx->trace_buf, NULL);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_perfcounter.c mesa-17.3.3/src/gallium/drivers/r600/r600_perfcounter.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_perfcounter.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_perfcounter.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,649 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Nicolai Hähnle
+ *
+ */
+
+#include "util/u_memory.h"
+#include "r600_query.h"
+#include "r600_pipe_common.h"
+#include "r600d_common.h"
+
+/* Max counters per HW block */
+#define R600_QUERY_MAX_COUNTERS 16
+
+static struct r600_perfcounter_block *
+lookup_counter(struct r600_perfcounters *pc, unsigned index,
+ unsigned *base_gid, unsigned *sub_index)
+{
+ struct r600_perfcounter_block *block = pc->blocks;
+ unsigned bid;
+
+ *base_gid = 0;
+ for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
+ unsigned total = block->num_groups * block->num_selectors;
+
+ if (index < total) {
+ *sub_index = index;
+ return block;
+ }
+
+ index -= total;
+ *base_gid += block->num_groups;
+ }
+
+ return NULL;
+}
+
+static struct r600_perfcounter_block *
+lookup_group(struct r600_perfcounters *pc, unsigned *index)
+{
+ unsigned bid;
+ struct r600_perfcounter_block *block = pc->blocks;
+
+ for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
+ if (*index < block->num_groups)
+ return block;
+ *index -= block->num_groups;
+ }
+
+ return NULL;
+}
+
+struct r600_pc_group {
+ struct r600_pc_group *next;
+ struct r600_perfcounter_block *block;
+ unsigned sub_gid; /* only used during init */
+ unsigned result_base; /* only used during init */
+ int se;
+ int instance;
+ unsigned num_counters;
+ unsigned selectors[R600_QUERY_MAX_COUNTERS];
+};
+
+struct r600_pc_counter {
+ unsigned base;
+ unsigned qwords;
+ unsigned stride; /* in uint64s */
+};
+
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
+struct r600_query_pc {
+ struct r600_query_hw b;
+
+ unsigned shaders;
+ unsigned num_counters;
+ struct r600_pc_counter *counters;
+ struct r600_pc_group *groups;
+};
+
+static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)rquery;
+
+ while (query->groups) {
+ struct r600_pc_group *group = query->groups;
+ query->groups = group->next;
+ FREE(group);
+ }
+
+ FREE(query->counters);
+
+ r600_query_hw_destroy(rscreen, rquery);
+}
+
+static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
+ struct r600_query_hw *hwquery,
+ struct r600_resource *buffer)
+{
+ /* no-op */
+ return true;
+}
+
+static void r600_pc_query_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *hwquery,
+ struct r600_resource *buffer, uint64_t va)
+{
+ struct r600_perfcounters *pc = ctx->screen->perfcounters;
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ struct r600_pc_group *group;
+ int current_se = -1;
+ int current_instance = -1;
+
+ if (query->shaders)
+ pc->emit_shaders(ctx, query->shaders);
+
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+
+ if (group->se != current_se || group->instance != current_instance) {
+ current_se = group->se;
+ current_instance = group->instance;
+ pc->emit_instance(ctx, group->se, group->instance);
+ }
+
+ pc->emit_select(ctx, block, group->num_counters, group->selectors);
+ }
+
+ if (current_se != -1 || current_instance != -1)
+ pc->emit_instance(ctx, -1, -1);
+
+ pc->emit_start(ctx, buffer, va);
+}
+
+static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *hwquery,
+ struct r600_resource *buffer, uint64_t va)
+{
+ struct r600_perfcounters *pc = ctx->screen->perfcounters;
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ struct r600_pc_group *group;
+
+ pc->emit_stop(ctx, buffer, va);
+
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+ unsigned se = group->se >= 0 ? group->se : 0;
+ unsigned se_end = se + 1;
+
+ if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
+ se_end = ctx->screen->info.max_se;
+
+ do {
+ unsigned instance = group->instance >= 0 ? group->instance : 0;
+
+ do {
+ pc->emit_instance(ctx, se, instance);
+ pc->emit_read(ctx, block,
+ group->num_counters, group->selectors,
+ buffer, va);
+ va += sizeof(uint64_t) * group->num_counters;
+ } while (group->instance < 0 && ++instance < block->num_instances);
+ } while (++se < se_end);
+ }
+
+ pc->emit_instance(ctx, -1, -1);
+}
+
+static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
+ union pipe_query_result *result)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+
+ memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
+}
+
+static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
+ struct r600_query_hw *hwquery,
+ void *buffer,
+ union pipe_query_result *result)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ uint64_t *results = buffer;
+ unsigned i, j;
+
+ for (i = 0; i < query->num_counters; ++i) {
+ struct r600_pc_counter *counter = &query->counters[i];
+
+ for (j = 0; j < counter->qwords; ++j) {
+ uint32_t value = results[counter->base + j * counter->stride];
+ result->batch[i].u64 += value;
+ }
+ }
+}
+
+static struct r600_query_ops batch_query_ops = {
+ .destroy = r600_pc_query_destroy,
+ .begin = r600_query_hw_begin,
+ .end = r600_query_hw_end,
+ .get_result = r600_query_hw_get_result
+};
+
+static struct r600_query_hw_ops batch_query_hw_ops = {
+ .prepare_buffer = r600_pc_query_prepare_buffer,
+ .emit_start = r600_pc_query_emit_start,
+ .emit_stop = r600_pc_query_emit_stop,
+ .clear_result = r600_pc_query_clear_result,
+ .add_result = r600_pc_query_add_result,
+};
+
+static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
+ struct r600_query_pc *query,
+ struct r600_perfcounter_block *block,
+ unsigned sub_gid)
+{
+ struct r600_pc_group *group = query->groups;
+
+ while (group) {
+ if (group->block == block && group->sub_gid == sub_gid)
+ return group;
+ group = group->next;
+ }
+
+ group = CALLOC_STRUCT(r600_pc_group);
+ if (!group)
+ return NULL;
+
+ group->block = block;
+ group->sub_gid = sub_gid;
+
+ if (block->flags & R600_PC_BLOCK_SHADER) {
+ unsigned sub_gids = block->num_instances;
+ unsigned shader_id;
+ unsigned shaders;
+ unsigned query_shaders;
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ sub_gids = sub_gids * screen->info.max_se;
+ shader_id = sub_gid / sub_gids;
+ sub_gid = sub_gid % sub_gids;
+
+ shaders = screen->perfcounters->shader_type_bits[shader_id];
+
+ query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+ if (query_shaders && query_shaders != shaders) {
+ fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
+ FREE(group);
+ return NULL;
+ }
+ query->shaders = shaders;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
+ // A non-zero value in query->shaders ensures that the shader
+ // masking is reset unless the user explicitly requests one.
+ query->shaders = R600_PC_SHADERS_WINDOWING;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ group->se = sub_gid / block->num_instances;
+ sub_gid = sub_gid % block->num_instances;
+ } else {
+ group->se = -1;
+ }
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ group->instance = sub_gid;
+ } else {
+ group->instance = -1;
+ }
+
+ group->next = query->groups;
+ query->groups = group;
+
+ return group;
+}
+
+struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
+ unsigned num_queries,
+ unsigned *query_types)
+{
+ struct r600_common_screen *screen =
+ (struct r600_common_screen *)ctx->screen;
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+ struct r600_pc_group *group;
+ struct r600_query_pc *query;
+ unsigned base_gid, sub_gid, sub_index;
+ unsigned i, j;
+
+ if (!pc)
+ return NULL;
+
+ query = CALLOC_STRUCT(r600_query_pc);
+ if (!query)
+ return NULL;
+
+ query->b.b.ops = &batch_query_ops;
+ query->b.ops = &batch_query_hw_ops;
+
+ query->num_counters = num_queries;
+
+ /* Collect selectors per group */
+ for (i = 0; i < num_queries; ++i) {
+ unsigned sub_gid;
+
+ if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
+ goto error;
+
+ block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+ &base_gid, &sub_index);
+ if (!block)
+ goto error;
+
+ sub_gid = sub_index / block->num_selectors;
+ sub_index = sub_index % block->num_selectors;
+
+ group = get_group_state(screen, query, block, sub_gid);
+ if (!group)
+ goto error;
+
+ if (group->num_counters >= block->num_counters) {
+ fprintf(stderr,
+ "perfcounter group %s: too many selected\n",
+ block->basename);
+ goto error;
+ }
+ group->selectors[group->num_counters] = sub_index;
+ ++group->num_counters;
+ }
+
+ /* Compute result bases and CS size per group */
+ query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
+ query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
+
+ query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
+ query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
+
+ i = 0;
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+ unsigned select_dw, read_dw;
+ unsigned instances = 1;
+
+ if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
+ instances = screen->info.max_se;
+ if (group->instance < 0)
+ instances *= block->num_instances;
+
+ group->result_base = i;
+ query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
+ i += instances * group->num_counters;
+
+ pc->get_size(block, group->num_counters, group->selectors,
+ &select_dw, &read_dw);
+ query->b.num_cs_dw_begin += select_dw;
+ query->b.num_cs_dw_end += instances * read_dw;
+ query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
+ query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
+ }
+
+ if (query->shaders) {
+ if (query->shaders == R600_PC_SHADERS_WINDOWING)
+ query->shaders = 0xffffffff;
+ query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
+ }
+
+ /* Map user-supplied query array to result indices */
+ query->counters = CALLOC(num_queries, sizeof(*query->counters));
+ for (i = 0; i < num_queries; ++i) {
+ struct r600_pc_counter *counter = &query->counters[i];
+ struct r600_perfcounter_block *block;
+
+ block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+ &base_gid, &sub_index);
+
+ sub_gid = sub_index / block->num_selectors;
+ sub_index = sub_index % block->num_selectors;
+
+ group = get_group_state(screen, query, block, sub_gid);
+ assert(group != NULL);
+
+ for (j = 0; j < group->num_counters; ++j) {
+ if (group->selectors[j] == sub_index)
+ break;
+ }
+
+ counter->base = group->result_base + j;
+ counter->stride = group->num_counters;
+
+ counter->qwords = 1;
+ if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
+ counter->qwords = screen->info.max_se;
+ if (group->instance < 0)
+ counter->qwords *= block->num_instances;
+ }
+
+ if (!r600_query_hw_init(screen, &query->b))
+ goto error;
+
+ return (struct pipe_query *)query;
+
+error:
+ r600_pc_query_destroy(screen, &query->b.b);
+ return NULL;
+}
+
+static bool r600_init_block_names(struct r600_common_screen *screen,
+ struct r600_perfcounter_block *block)
+{
+ unsigned i, j, k;
+ unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
+ unsigned namelen;
+ char *groupname;
+ char *p;
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ groups_instance = block->num_instances;
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ groups_se = screen->info.max_se;
+ if (block->flags & R600_PC_BLOCK_SHADER)
+ groups_shader = screen->perfcounters->num_shader_types;
+
+ namelen = strlen(block->basename);
+ block->group_name_stride = namelen + 1;
+ if (block->flags & R600_PC_BLOCK_SHADER)
+ block->group_name_stride += 3;
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ assert(groups_se <= 10);
+ block->group_name_stride += 1;
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ block->group_name_stride += 1;
+ }
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ assert(groups_instance <= 100);
+ block->group_name_stride += 2;
+ }
+
+ block->group_names = MALLOC(block->num_groups * block->group_name_stride);
+ if (!block->group_names)
+ return false;
+
+ groupname = block->group_names;
+ for (i = 0; i < groups_shader; ++i) {
+ const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+ unsigned shaderlen = strlen(shader_suffix);
+ for (j = 0; j < groups_se; ++j) {
+ for (k = 0; k < groups_instance; ++k) {
+ strcpy(groupname, block->basename);
+ p = groupname + namelen;
+
+ if (block->flags & R600_PC_BLOCK_SHADER) {
+ strcpy(p, shader_suffix);
+ p += shaderlen;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ p += sprintf(p, "%d", j);
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ *p++ = '_';
+ }
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ p += sprintf(p, "%d", k);
+
+ groupname += block->group_name_stride;
+ }
+ }
+ }
+
+ assert(block->num_selectors <= 1000);
+ block->selector_name_stride = block->group_name_stride + 4;
+ block->selector_names = MALLOC(block->num_groups * block->num_selectors *
+ block->selector_name_stride);
+ if (!block->selector_names)
+ return false;
+
+ groupname = block->group_names;
+ p = block->selector_names;
+ for (i = 0; i < block->num_groups; ++i) {
+ for (j = 0; j < block->num_selectors; ++j) {
+ sprintf(p, "%s_%03d", groupname, j);
+ p += block->selector_name_stride;
+ }
+ groupname += block->group_name_stride;
+ }
+
+ return true;
+}
+
+int r600_get_perfcounter_info(struct r600_common_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+ unsigned base_gid, sub;
+
+ if (!pc)
+ return 0;
+
+ if (!info) {
+ unsigned bid, num_queries = 0;
+
+ for (bid = 0; bid < pc->num_blocks; ++bid) {
+ num_queries += pc->blocks[bid].num_selectors *
+ pc->blocks[bid].num_groups;
+ }
+
+ return num_queries;
+ }
+
+ block = lookup_counter(pc, index, &base_gid, &sub);
+ if (!block)
+ return 0;
+
+ if (!block->selector_names) {
+ if (!r600_init_block_names(screen, block))
+ return 0;
+ }
+ info->name = block->selector_names + sub * block->selector_name_stride;
+ info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
+ info->max_value.u64 = 0;
+ info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+ info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
+ info->group_id = base_gid + sub / block->num_selectors;
+ info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+ if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
+ info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
+ return 1;
+}
+
+int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+
+ if (!pc)
+ return 0;
+
+ if (!info)
+ return pc->num_groups;
+
+ block = lookup_group(pc, &index);
+ if (!block)
+ return 0;
+
+ if (!block->group_names) {
+ if (!r600_init_block_names(screen, block))
+ return 0;
+ }
+ info->name = block->group_names + index * block->group_name_stride;
+ info->num_queries = block->num_selectors;
+ info->max_active_queries = block->num_counters;
+ return 1;
+}
+
+void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
+{
+ if (rscreen->perfcounters)
+ rscreen->perfcounters->cleanup(rscreen);
+}
+
+bool r600_perfcounters_init(struct r600_perfcounters *pc,
+ unsigned num_blocks)
+{
+ pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
+ if (!pc->blocks)
+ return false;
+
+ pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
+ pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
+
+ return true;
+}
+
+void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
+ struct r600_perfcounters *pc,
+ const char *name, unsigned flags,
+ unsigned counters, unsigned selectors,
+ unsigned instances, void *data)
+{
+ struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
+
+ assert(counters <= R600_QUERY_MAX_COUNTERS);
+
+ block->basename = name;
+ block->flags = flags;
+ block->num_counters = counters;
+ block->num_selectors = selectors;
+ block->num_instances = MAX2(instances, 1);
+ block->data = data;
+
+ if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
+ block->flags |= R600_PC_BLOCK_SE_GROUPS;
+ if (pc->separate_instance && block->num_instances > 1)
+ block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ block->num_groups = block->num_instances;
+ } else {
+ block->num_groups = 1;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ block->num_groups *= rscreen->info.max_se;
+ if (block->flags & R600_PC_BLOCK_SHADER)
+ block->num_groups *= pc->num_shader_types;
+
+ ++pc->num_blocks;
+ pc->num_groups += block->num_groups;
+}
+
+void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
+{
+ unsigned i;
+
+ for (i = 0; i < pc->num_blocks; ++i) {
+ FREE(pc->blocks[i].group_names);
+ FREE(pc->blocks[i].selector_names);
+ }
+ FREE(pc->blocks);
+ FREE(pc);
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_pipe.c mesa-17.3.3/src/gallium/drivers/r600/r600_pipe.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_pipe.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_pipe.c 2018-01-18 21:30:28.000000000 +0000
@@ -37,8 +37,8 @@
#include "util/u_math.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
-#include "radeon/radeon_video.h"
-#include "radeon/radeon_uvd.h"
+#include "radeon_video.h"
+#include "radeon_uvd.h"
#include "os/os_time.h"
static const struct debug_named_value r600_debug_options[] = {
@@ -398,6 +398,12 @@
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
case PIPE_CAP_DOUBLES:
@@ -566,6 +572,8 @@
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
return 1;
case PIPE_SHADER_CAP_SUBROUTINES:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
return 0;
case PIPE_SHADER_CAP_INTEGERS:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
@@ -590,6 +598,7 @@
return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
@@ -632,7 +641,8 @@
return r600_resource_create_common(screen, templ);
}
-struct pipe_screen *r600_screen_create(struct radeon_winsys *ws, unsigned flags)
+struct pipe_screen *r600_screen_create(struct radeon_winsys *ws,
+ const struct pipe_screen_config *config)
{
struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen);
@@ -647,7 +657,7 @@
rscreen->b.b.get_shader_param = r600_get_shader_param;
rscreen->b.b.resource_create = r600_resource_create;
- if (!r600_common_screen_init(&rscreen->b, ws, flags)) {
+ if (!r600_common_screen_init(&rscreen->b, ws)) {
FREE(rscreen);
return NULL;
}
@@ -662,7 +672,7 @@
if (debug_get_bool_option("R600_DEBUG_COMPUTE", FALSE))
rscreen->b.debug_flags |= DBG_COMPUTE;
if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE))
- rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS | DBG_TCS | DBG_TES;
+ rscreen->b.debug_flags |= DBG_ALL_SHADERS | DBG_FS;
if (!debug_get_bool_option("R600_HYPERZ", TRUE))
rscreen->b.debug_flags |= DBG_NO_HYPERZ;
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_pipe_common.c mesa-17.3.3/src/gallium/drivers/r600/r600_pipe_common.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_pipe_common.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_pipe_common.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,1433 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ *
+ */
+
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/list.h"
+#include "util/u_draw_quad.h"
+#include "util/u_memory.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_upload_mgr.h"
+#include "os/os_time.h"
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+#include "radeon_video.h"
+#include
+#include
+
+#ifndef HAVE_LLVM
+#define HAVE_LLVM 0
+#endif
+
+#if HAVE_LLVM
+#include
+#endif
+
+#ifndef MESA_LLVM_VERSION_PATCH
+#define MESA_LLVM_VERSION_PATCH 0
+#endif
+
+struct r600_multi_fence {
+ struct pipe_reference reference;
+ struct pipe_fence_handle *gfx;
+ struct pipe_fence_handle *sdma;
+
+ /* If the context wasn't flushed at fence creation, this is non-NULL. */
+ struct {
+ struct r600_common_context *ctx;
+ unsigned ib_index;
+ } gfx_unflushed;
+};
+
+/*
+ * shader binary helpers.
+ */
+void radeon_shader_binary_init(struct ac_shader_binary *b)
+{
+ memset(b, 0, sizeof(*b));
+}
+
+void radeon_shader_binary_clean(struct ac_shader_binary *b)
+{
+ if (!b)
+ return;
+ FREE(b->code);
+ FREE(b->config);
+ FREE(b->rodata);
+ FREE(b->global_symbol_offsets);
+ FREE(b->relocs);
+ FREE(b->disasm_string);
+ FREE(b->llvm_ir_string);
+}
+
+/*
+ * pipe_context
+ */
+
+/**
+ * Write an EOP event.
+ *
+ * \param event EVENT_TYPE_*
+ * \param event_flags Optional cache flush flags (TC)
+ * \param data_sel 1 = fence, 3 = timestamp
+ * \param buf Buffer
+ * \param va GPU address
+ * \param old_value Previous fence value (for a bug workaround)
+ * \param new_value Fence value to write for this event.
+ */
+void r600_gfx_write_event_eop(struct r600_common_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t new_fence, unsigned query_type)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+ unsigned op = EVENT_TYPE(event) |
+ EVENT_INDEX(5) |
+ event_flags;
+ unsigned sel = EOP_DATA_SEL(data_sel);
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
+ radeon_emit(cs, new_fence); /* immediate data */
+ radeon_emit(cs, 0); /* unused */
+
+ if (buf)
+ r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
+}
+
+unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
+{
+ unsigned dwords = 6;
+
+ if (!screen->info.has_virtual_memory)
+ dwords += 2;
+
+ return dwords;
+}
+
+void r600_gfx_wait_fence(struct r600_common_context *ctx,
+ uint64_t va, uint32_t ref, uint32_t mask)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, ref); /* reference value */
+ radeon_emit(cs, mask); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+}
+
+void r600_draw_rectangle(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
+ int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances,
+ enum blitter_attrib_type type,
+ const union blitter_attrib *attrib)
+{
+ struct r600_common_context *rctx =
+ (struct r600_common_context*)util_blitter_get_pipe(blitter);
+ struct pipe_viewport_state viewport;
+ struct pipe_resource *buf = NULL;
+ unsigned offset = 0;
+ float *vb;
+
+ rctx->b.bind_vertex_elements_state(&rctx->b, vertex_elements_cso);
+ rctx->b.bind_vs_state(&rctx->b, get_vs(blitter));
+
+ /* Some operations (like color resolve on r6xx) don't work
+ * with the conventional primitive types.
+ * One that works is PT_RECTLIST, which we use here. */
+
+ /* setup viewport */
+ viewport.scale[0] = 1.0f;
+ viewport.scale[1] = 1.0f;
+ viewport.scale[2] = 1.0f;
+ viewport.translate[0] = 0.0f;
+ viewport.translate[1] = 0.0f;
+ viewport.translate[2] = 0.0f;
+ rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport);
+
+ /* Upload vertices. The hw rectangle has only 3 vertices,
+ * The 4th one is derived from the first 3.
+ * The vertex specification should match u_blitter's vertex element state. */
+ u_upload_alloc(rctx->b.stream_uploader, 0, sizeof(float) * 24,
+ rctx->screen->info.tcc_cache_line_size,
+ &offset, &buf, (void**)&vb);
+ if (!buf)
+ return;
+
+ vb[0] = x1;
+ vb[1] = y1;
+ vb[2] = depth;
+ vb[3] = 1;
+
+ vb[8] = x1;
+ vb[9] = y2;
+ vb[10] = depth;
+ vb[11] = 1;
+
+ vb[16] = x2;
+ vb[17] = y1;
+ vb[18] = depth;
+ vb[19] = 1;
+
+ switch (type) {
+ case UTIL_BLITTER_ATTRIB_COLOR:
+ memcpy(vb+4, attrib->color, sizeof(float)*4);
+ memcpy(vb+12, attrib->color, sizeof(float)*4);
+ memcpy(vb+20, attrib->color, sizeof(float)*4);
+ break;
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW:
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XY:
+ vb[6] = vb[14] = vb[22] = attrib->texcoord.z;
+ vb[7] = vb[15] = vb[23] = attrib->texcoord.w;
+ /* fall through */
+ vb[4] = attrib->texcoord.x1;
+ vb[5] = attrib->texcoord.y1;
+ vb[12] = attrib->texcoord.x1;
+ vb[13] = attrib->texcoord.y2;
+ vb[20] = attrib->texcoord.x2;
+ vb[21] = attrib->texcoord.y1;
+ break;
+ default:; /* Nothing to do. */
+ }
+
+ /* draw */
+ struct pipe_vertex_buffer vbuffer = {};
+ vbuffer.buffer.resource = buf;
+ vbuffer.stride = 2 * 4 * sizeof(float); /* vertex size */
+ vbuffer.buffer_offset = offset;
+
+ rctx->b.set_vertex_buffers(&rctx->b, blitter->vb_slot, 1, &vbuffer);
+ util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3,
+ 0, num_instances);
+ pipe_resource_reference(&buf, NULL);
+}
+
+static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->dma.cs;
+
+ if (rctx->chip_class >= EVERGREEN)
+ radeon_emit(cs, 0xf0000000); /* NOP */
+ else {
+ /* TODO: R600-R700 should use the FENCE packet.
+ * CS checker support is required. */
+ }
+}
+
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+ struct r600_resource *dst, struct r600_resource *src)
+{
+ uint64_t vram = ctx->dma.cs->used_vram;
+ uint64_t gtt = ctx->dma.cs->used_gart;
+
+ if (dst) {
+ vram += dst->vram_usage;
+ gtt += dst->gart_usage;
+ }
+ if (src) {
+ vram += src->vram_usage;
+ gtt += src->gart_usage;
+ }
+
+ /* Flush the GFX IB if DMA depends on it. */
+ if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ((dst &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf,
+ RADEON_USAGE_READWRITE)) ||
+ (src &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf,
+ RADEON_USAGE_WRITE))))
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+
+ /* Flush if there's not enough space, or if the memory usage per IB
+ * is too large.
+ *
+ * IBs using too little memory are limited by the IB submission overhead.
+ * IBs using too much memory are limited by the kernel/TTM overhead.
+ * Too long IBs create CPU-GPU pipeline bubbles and add latency.
+ *
+ * This heuristic makes sure that DMA requests are executed
+ * very soon after the call is made and lowers memory usage.
+ * It improves texture upload performance by keeping the DMA
+ * engine busy while uploads are being submitted.
+ */
+ num_dw++; /* for emit_wait_idle below */
+ if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) ||
+ ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 ||
+ !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) {
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw);
+ }
+
+ /* Wait for idle if either buffer has been used in the IB before to
+ * prevent read-after-write hazards.
+ */
+ if ((dst &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf,
+ RADEON_USAGE_READWRITE)) ||
+ (src &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf,
+ RADEON_USAGE_WRITE)))
+ r600_dma_emit_wait_idle(ctx);
+
+ /* If GPUVM is not supported, the CS checker needs 2 entries
+ * in the buffer list per packet, which has to be done manually.
+ */
+ if (ctx->screen->info.has_virtual_memory) {
+ if (dst)
+ radeon_add_to_buffer_list(ctx, &ctx->dma, dst,
+ RADEON_USAGE_WRITE,
+ RADEON_PRIO_SDMA_BUFFER);
+ if (src)
+ radeon_add_to_buffer_list(ctx, &ctx->dma, src,
+ RADEON_USAGE_READ,
+ RADEON_PRIO_SDMA_BUFFER);
+ }
+
+ /* this function is called before all DMA calls, so increment this. */
+ ctx->num_dma_calls++;
+}
+
+static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
+{
+}
+
+void r600_preflush_suspend_features(struct r600_common_context *ctx)
+{
+ /* suspend queries */
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ r600_suspend_queries(ctx);
+
+ ctx->streamout.suspended = false;
+ if (ctx->streamout.begin_emitted) {
+ r600_emit_streamout_end(ctx);
+ ctx->streamout.suspended = true;
+ }
+}
+
+void r600_postflush_resume_features(struct r600_common_context *ctx)
+{
+ if (ctx->streamout.suspended) {
+ ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
+ r600_streamout_buffers_dirty(ctx);
+ }
+
+ /* resume queries */
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ r600_resume_queries(ctx);
+}
+
+static void r600_add_fence_dependency(struct r600_common_context *rctx,
+ struct pipe_fence_handle *fence)
+{
+ struct radeon_winsys *ws = rctx->ws;
+
+ if (rctx->dma.cs)
+ ws->cs_add_fence_dependency(rctx->dma.cs, fence);
+ ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
+}
+
+static void r600_fence_server_sync(struct pipe_context *ctx,
+ struct pipe_fence_handle *fence)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+
+ /* Only amdgpu needs to handle fence dependencies (for fence imports).
+ * radeon synchronizes all rings by default and will not implement
+ * fence imports.
+ */
+ if (rctx->screen->info.drm_major == 2)
+ return;
+
+ /* Only imported fences need to be handled by fence_server_sync,
+ * because the winsys handles synchronizations automatically for BOs
+ * within the process.
+ *
+ * Simply skip unflushed fences here, and the winsys will drop no-op
+ * dependencies (i.e. dependencies within the same ring).
+ */
+ if (rfence->gfx_unflushed.ctx)
+ return;
+
+ /* All unflushed commands will not start execution before
+ * this fence dependency is signalled.
+ *
+ * Should we flush the context to allow more GPU parallelism?
+ */
+ if (rfence->sdma)
+ r600_add_fence_dependency(rctx, rfence->sdma);
+ if (rfence->gfx)
+ r600_add_fence_dependency(rctx, rfence->gfx);
+}
+
+static void r600_flush_from_st(struct pipe_context *ctx,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct pipe_screen *screen = ctx->screen;
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct radeon_winsys *ws = rctx->ws;
+ struct pipe_fence_handle *gfx_fence = NULL;
+ struct pipe_fence_handle *sdma_fence = NULL;
+ bool deferred_fence = false;
+ unsigned rflags = RADEON_FLUSH_ASYNC;
+
+ if (flags & PIPE_FLUSH_END_OF_FRAME)
+ rflags |= RADEON_FLUSH_END_OF_FRAME;
+
+ /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
+ if (rctx->dma.cs)
+ rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
+
+ if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
+ if (fence)
+ ws->fence_reference(&gfx_fence, rctx->last_gfx_fence);
+ if (!(flags & PIPE_FLUSH_DEFERRED))
+ ws->cs_sync_flush(rctx->gfx.cs);
+ } else {
+ /* Instead of flushing, create a deferred fence. Constraints:
+ * - The state tracker must allow a deferred flush.
+ * - The state tracker must request a fence.
+ * Thread safety in fence_finish must be ensured by the state tracker.
+ */
+ if (flags & PIPE_FLUSH_DEFERRED && fence) {
+ gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
+ deferred_fence = true;
+ } else {
+ rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+ }
+ }
+
+ /* Both engines can signal out of order, so we need to keep both fences. */
+ if (fence) {
+ struct r600_multi_fence *multi_fence =
+ CALLOC_STRUCT(r600_multi_fence);
+ if (!multi_fence) {
+ ws->fence_reference(&sdma_fence, NULL);
+ ws->fence_reference(&gfx_fence, NULL);
+ goto finish;
+ }
+
+ multi_fence->reference.count = 1;
+ /* If both fences are NULL, fence_finish will always return true. */
+ multi_fence->gfx = gfx_fence;
+ multi_fence->sdma = sdma_fence;
+
+ if (deferred_fence) {
+ multi_fence->gfx_unflushed.ctx = rctx;
+ multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
+ }
+
+ screen->fence_reference(screen, fence, NULL);
+ *fence = (struct pipe_fence_handle*)multi_fence;
+ }
+finish:
+ if (!(flags & PIPE_FLUSH_DEFERRED)) {
+ if (rctx->dma.cs)
+ ws->cs_sync_flush(rctx->dma.cs);
+ ws->cs_sync_flush(rctx->gfx.cs);
+ }
+}
+
+static void r600_flush_dma_ring(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct radeon_winsys_cs *cs = rctx->dma.cs;
+ struct radeon_saved_cs saved;
+ bool check_vm =
+ (rctx->screen->debug_flags & DBG_CHECK_VM) &&
+ rctx->check_vm_faults;
+
+ if (!radeon_emitted(cs, 0)) {
+ if (fence)
+ rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
+ return;
+ }
+
+ if (check_vm)
+ radeon_save_cs(rctx->ws, cs, &saved, true);
+
+ rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
+ if (fence)
+ rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
+
+ if (check_vm) {
+ /* Use conservative timeout 800ms, after which we won't wait any
+ * longer and assume the GPU is hung.
+ */
+ rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
+
+ rctx->check_vm_faults(rctx, &saved, RING_DMA);
+ radeon_clear_saved_cs(&saved);
+ }
+}
+
+/**
+ * Store a linearized copy of all chunks of \p cs together with the buffer
+ * list in \p saved.
+ */
+void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
+ struct radeon_saved_cs *saved, bool get_buffer_list)
+{
+ uint32_t *buf;
+ unsigned i;
+
+ /* Save the IB chunks. */
+ saved->num_dw = cs->prev_dw + cs->current.cdw;
+ saved->ib = MALLOC(4 * saved->num_dw);
+ if (!saved->ib)
+ goto oom;
+
+ buf = saved->ib;
+ for (i = 0; i < cs->num_prev; ++i) {
+ memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
+ buf += cs->prev[i].cdw;
+ }
+ memcpy(buf, cs->current.buf, cs->current.cdw * 4);
+
+ if (!get_buffer_list)
+ return;
+
+ /* Save the buffer list. */
+ saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
+ saved->bo_list = CALLOC(saved->bo_count,
+ sizeof(saved->bo_list[0]));
+ if (!saved->bo_list) {
+ FREE(saved->ib);
+ goto oom;
+ }
+ ws->cs_get_buffer_list(cs, saved->bo_list);
+
+ return;
+
+oom:
+ fprintf(stderr, "%s: out of memory\n", __func__);
+ memset(saved, 0, sizeof(*saved));
+}
+
+void radeon_clear_saved_cs(struct radeon_saved_cs *saved)
+{
+ FREE(saved->ib);
+ FREE(saved->bo_list);
+
+ memset(saved, 0, sizeof(*saved));
+}
+
+static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ unsigned latest = rctx->ws->query_value(rctx->ws,
+ RADEON_GPU_RESET_COUNTER);
+
+ if (rctx->gpu_reset_counter == latest)
+ return PIPE_NO_RESET;
+
+ rctx->gpu_reset_counter = latest;
+ return PIPE_UNKNOWN_CONTEXT_RESET;
+}
+
+static void r600_set_debug_callback(struct pipe_context *ctx,
+ const struct pipe_debug_callback *cb)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+ if (cb)
+ rctx->debug = *cb;
+ else
+ memset(&rctx->debug, 0, sizeof(rctx->debug));
+}
+
+static void r600_set_device_reset_callback(struct pipe_context *ctx,
+ const struct pipe_device_reset_callback *cb)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+ if (cb)
+ rctx->device_reset_callback = *cb;
+ else
+ memset(&rctx->device_reset_callback, 0,
+ sizeof(rctx->device_reset_callback));
+}
+
+bool r600_check_device_reset(struct r600_common_context *rctx)
+{
+ enum pipe_reset_status status;
+
+ if (!rctx->device_reset_callback.reset)
+ return false;
+
+ if (!rctx->b.get_device_reset_status)
+ return false;
+
+ status = rctx->b.get_device_reset_status(&rctx->b);
+ if (status == PIPE_NO_RESET)
+ return false;
+
+ rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status);
+ return true;
+}
+
+static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ uint64_t offset, uint64_t size,
+ unsigned value)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+
+ rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE);
+}
+
+static bool r600_resource_commit(struct pipe_context *pctx,
+ struct pipe_resource *resource,
+ unsigned level, struct pipe_box *box,
+ bool commit)
+{
+ struct r600_common_context *ctx = (struct r600_common_context *)pctx;
+ struct r600_resource *res = r600_resource(resource);
+
+ /*
+ * Since buffer commitment changes cannot be pipelined, we need to
+ * (a) flush any pending commands that refer to the buffer we're about
+ * to change, and
+ * (b) wait for threaded submit to finish, including those that were
+ * triggered by some other, earlier operation.
+ */
+ if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
+ res->buf, RADEON_USAGE_READWRITE)) {
+ ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ }
+ if (radeon_emitted(ctx->dma.cs, 0) &&
+ ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
+ res->buf, RADEON_USAGE_READWRITE)) {
+ ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ }
+
+ ctx->ws->cs_sync_flush(ctx->dma.cs);
+ ctx->ws->cs_sync_flush(ctx->gfx.cs);
+
+ assert(resource->target == PIPE_BUFFER);
+
+ return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
+}
+
+bool r600_common_context_init(struct r600_common_context *rctx,
+ struct r600_common_screen *rscreen,
+ unsigned context_flags)
+{
+ slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
+ slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
+
+ rctx->screen = rscreen;
+ rctx->ws = rscreen->ws;
+ rctx->family = rscreen->family;
+ rctx->chip_class = rscreen->chip_class;
+
+ rctx->b.invalidate_resource = r600_invalidate_resource;
+ rctx->b.resource_commit = r600_resource_commit;
+ rctx->b.transfer_map = u_transfer_map_vtbl;
+ rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
+ rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
+ rctx->b.texture_subdata = u_default_texture_subdata;
+ rctx->b.memory_barrier = r600_memory_barrier;
+ rctx->b.flush = r600_flush_from_st;
+ rctx->b.set_debug_callback = r600_set_debug_callback;
+ rctx->b.fence_server_sync = r600_fence_server_sync;
+ rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
+
+ /* evergreen_compute.c has a special codepath for global buffers.
+ * Everything else can use the direct path.
+ */
+ if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) &&
+ (context_flags & PIPE_CONTEXT_COMPUTE_ONLY))
+ rctx->b.buffer_subdata = u_default_buffer_subdata;
+ else
+ rctx->b.buffer_subdata = r600_buffer_subdata;
+
+ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
+ rctx->b.get_device_reset_status = r600_get_reset_status;
+ rctx->gpu_reset_counter =
+ rctx->ws->query_value(rctx->ws,
+ RADEON_GPU_RESET_COUNTER);
+ }
+
+ rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
+
+ r600_init_context_texture_functions(rctx);
+ r600_init_viewport_functions(rctx);
+ r600_streamout_init(rctx);
+ r600_query_init(rctx);
+ cayman_init_msaa(&rctx->b);
+
+ rctx->allocator_zeroed_memory =
+ u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
+ 0, PIPE_USAGE_DEFAULT, 0, true);
+ if (!rctx->allocator_zeroed_memory)
+ return false;
+
+ rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
+ 0, PIPE_USAGE_STREAM);
+ if (!rctx->b.stream_uploader)
+ return false;
+
+ rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
+ 0, PIPE_USAGE_DEFAULT);
+ if (!rctx->b.const_uploader)
+ return false;
+
+ rctx->ctx = rctx->ws->ctx_create(rctx->ws);
+ if (!rctx->ctx)
+ return false;
+
+ if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
+ rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
+ r600_flush_dma_ring,
+ rctx);
+ rctx->dma.flush = r600_flush_dma_ring;
+ }
+
+ return true;
+}
+
+void r600_common_context_cleanup(struct r600_common_context *rctx)
+{
+ if (rctx->query_result_shader)
+ rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
+
+ if (rctx->gfx.cs)
+ rctx->ws->cs_destroy(rctx->gfx.cs);
+ if (rctx->dma.cs)
+ rctx->ws->cs_destroy(rctx->dma.cs);
+ if (rctx->ctx)
+ rctx->ws->ctx_destroy(rctx->ctx);
+
+ if (rctx->b.stream_uploader)
+ u_upload_destroy(rctx->b.stream_uploader);
+ if (rctx->b.const_uploader)
+ u_upload_destroy(rctx->b.const_uploader);
+
+ slab_destroy_child(&rctx->pool_transfers);
+ slab_destroy_child(&rctx->pool_transfers_unsync);
+
+ if (rctx->allocator_zeroed_memory) {
+ u_suballocator_destroy(rctx->allocator_zeroed_memory);
+ }
+ rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
+ rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
+ r600_resource_reference(&rctx->eop_bug_scratch, NULL);
+}
+
+/*
+ * pipe_screen
+ */
+
+static const struct debug_named_value common_debug_options[] = {
+ /* logging */
+ { "tex", DBG_TEX, "Print texture info" },
+ { "nir", DBG_NIR, "Enable experimental NIR shaders" },
+ { "compute", DBG_COMPUTE, "Print compute info" },
+ { "vm", DBG_VM, "Print virtual addresses when creating resources" },
+ { "info", DBG_INFO, "Print driver information" },
+
+ /* shaders */
+ { "fs", DBG_FS, "Print fetch shaders" },
+ { "vs", DBG_VS, "Print vertex shaders" },
+ { "gs", DBG_GS, "Print geometry shaders" },
+ { "ps", DBG_PS, "Print pixel shaders" },
+ { "cs", DBG_CS, "Print compute shaders" },
+ { "tcs", DBG_TCS, "Print tessellation control shaders" },
+ { "tes", DBG_TES, "Print tessellation evaluation shaders" },
+ { "noir", DBG_NO_IR, "Don't print the LLVM IR"},
+ { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
+ { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
+ { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" },
+ { "checkir", DBG_CHECK_IR, "Enable additional sanity checks on shader IR" },
+ { "nooptvariant", DBG_NO_OPT_VARIANT, "Disable compiling optimized shader variants." },
+
+ { "testdma", DBG_TEST_DMA, "Invoke SDMA tests and exit." },
+ { "testvmfaultcp", DBG_TEST_VMFAULT_CP, "Invoke a CP VM fault test and exit." },
+ { "testvmfaultsdma", DBG_TEST_VMFAULT_SDMA, "Invoke a SDMA VM fault test and exit." },
+ { "testvmfaultshader", DBG_TEST_VMFAULT_SHADER, "Invoke a shader VM fault test and exit." },
+
+ /* features */
+ { "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
+ { "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" },
+ /* GL uses the word INVALIDATE, gallium uses the word DISCARD */
+ { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" },
+ { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
+ { "notiling", DBG_NO_TILING, "Disable tiling" },
+ { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
+ { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
+ { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
+ { "nowc", DBG_NO_WC, "Disable GTT write combining" },
+ { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
+ { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" },
+
+ DEBUG_NAMED_VALUE_END /* must be last */
+};
+
+static const char* r600_get_vendor(struct pipe_screen* pscreen)
+{
+ return "X.Org";
+}
+
+static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
+{
+ return "AMD";
+}
+
+static const char *r600_get_marketing_name(struct radeon_winsys *ws)
+{
+ if (!ws->get_chip_name)
+ return NULL;
+ return ws->get_chip_name(ws);
+}
+
+static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
+{
+ switch (rscreen->info.family) {
+ case CHIP_R600: return "AMD R600";
+ case CHIP_RV610: return "AMD RV610";
+ case CHIP_RV630: return "AMD RV630";
+ case CHIP_RV670: return "AMD RV670";
+ case CHIP_RV620: return "AMD RV620";
+ case CHIP_RV635: return "AMD RV635";
+ case CHIP_RS780: return "AMD RS780";
+ case CHIP_RS880: return "AMD RS880";
+ case CHIP_RV770: return "AMD RV770";
+ case CHIP_RV730: return "AMD RV730";
+ case CHIP_RV710: return "AMD RV710";
+ case CHIP_RV740: return "AMD RV740";
+ case CHIP_CEDAR: return "AMD CEDAR";
+ case CHIP_REDWOOD: return "AMD REDWOOD";
+ case CHIP_JUNIPER: return "AMD JUNIPER";
+ case CHIP_CYPRESS: return "AMD CYPRESS";
+ case CHIP_HEMLOCK: return "AMD HEMLOCK";
+ case CHIP_PALM: return "AMD PALM";
+ case CHIP_SUMO: return "AMD SUMO";
+ case CHIP_SUMO2: return "AMD SUMO2";
+ case CHIP_BARTS: return "AMD BARTS";
+ case CHIP_TURKS: return "AMD TURKS";
+ case CHIP_CAICOS: return "AMD CAICOS";
+ case CHIP_CAYMAN: return "AMD CAYMAN";
+ case CHIP_ARUBA: return "AMD ARUBA";
+ default: return "AMD unknown";
+ }
+}
+
+static void r600_disk_cache_create(struct r600_common_screen *rscreen)
+{
+ /* Don't use the cache if shader dumping is enabled. */
+ if (rscreen->debug_flags & DBG_ALL_SHADERS)
+ return;
+
+ uint32_t mesa_timestamp;
+ if (disk_cache_get_function_timestamp(r600_disk_cache_create,
+ &mesa_timestamp)) {
+ char *timestamp_str;
+ int res = -1;
+
+ res = asprintf(×tamp_str, "%u",mesa_timestamp);
+ if (res != -1) {
+ /* These flags affect shader compilation. */
+ uint64_t shader_debug_flags =
+ rscreen->debug_flags &
+ (DBG_FS_CORRECT_DERIVS_AFTER_KILL |
+ DBG_UNSAFE_MATH);
+
+ rscreen->disk_shader_cache =
+ disk_cache_create(r600_get_family_name(rscreen),
+ timestamp_str,
+ shader_debug_flags);
+ free(timestamp_str);
+ }
+ }
+}
+
+static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+ return rscreen->disk_shader_cache;
+}
+
+static const char* r600_get_name(struct pipe_screen* pscreen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+
+ return rscreen->renderer_string;
+}
+
+static float r600_get_paramf(struct pipe_screen* pscreen,
+ enum pipe_capf param)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen;
+
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ if (rscreen->family >= CHIP_CEDAR)
+ return 16384.0f;
+ else
+ return 8192.0f;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 16.0f;
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0f;
+ }
+ return 0.0f;
+}
+
+static int r600_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param)
+{
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ return vl_profile_supported(screen, profile, entrypoint);
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return vl_video_buffer_max_size(screen);
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ return PIPE_FORMAT_NV12;
+ case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ return false;
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ return false;
+ case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+ return true;
+ case PIPE_VIDEO_CAP_MAX_LEVEL:
+ return vl_level_supported(screen, profile);
+ default:
+ return 0;
+ }
+}
+
+const char *r600_get_llvm_processor_name(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_R600:
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV670:
+ return "r600";
+ case CHIP_RV610:
+ case CHIP_RV620:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ return "rs880";
+ case CHIP_RV710:
+ return "rv710";
+ case CHIP_RV730:
+ return "rv730";
+ case CHIP_RV740:
+ case CHIP_RV770:
+ return "rv770";
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return "cedar";
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
+ return "sumo";
+ case CHIP_REDWOOD:
+ return "redwood";
+ case CHIP_JUNIPER:
+ return "juniper";
+ case CHIP_HEMLOCK:
+ case CHIP_CYPRESS:
+ return "cypress";
+ case CHIP_BARTS:
+ return "barts";
+ case CHIP_TURKS:
+ return "turks";
+ case CHIP_CAICOS:
+ return "caicos";
+ case CHIP_CAYMAN:
+ case CHIP_ARUBA:
+ return "cayman";
+
+ default:
+ return "";
+ }
+}
+
+static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
+ enum pipe_shader_ir ir_type)
+{
+ return 256;
+}
+
+static int r600_get_compute_param(struct pipe_screen *screen,
+ enum pipe_shader_ir ir_type,
+ enum pipe_compute_cap param,
+ void *ret)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+
+ //TODO: select these params by asic
+ switch (param) {
+ case PIPE_COMPUTE_CAP_IR_TARGET: {
+ const char *gpu;
+ const char *triple = "r600--";
+ gpu = r600_get_llvm_processor_name(rscreen->family);
+ if (ret) {
+ sprintf(ret, "%s-%s", gpu, triple);
+ }
+ /* +2 for dash and terminating NIL byte */
+ return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
+ }
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ if (ret) {
+ uint64_t *grid_dimension = ret;
+ grid_dimension[0] = 3;
+ }
+ return 1 * sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ if (ret) {
+ uint64_t *grid_size = ret;
+ grid_size[0] = 65535;
+ grid_size[1] = 65535;
+ grid_size[2] = 65535;
+ }
+ return 3 * sizeof(uint64_t) ;
+
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ if (ret) {
+ uint64_t *block_size = ret;
+ unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
+ block_size[0] = threads_per_block;
+ block_size[1] = threads_per_block;
+ block_size[2] = threads_per_block;
+ }
+ return 3 * sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ if (ret) {
+ uint64_t *max_threads_per_block = ret;
+ *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
+ }
+ return sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+ if (ret) {
+ uint32_t *address_bits = ret;
+ address_bits[0] = 32;
+ }
+ return 1 * sizeof(uint32_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ if (ret) {
+ uint64_t *max_global_size = ret;
+ uint64_t max_mem_alloc_size;
+
+ r600_get_compute_param(screen, ir_type,
+ PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+ &max_mem_alloc_size);
+
+ /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
+ * 1/4 of the MAX_GLOBAL_SIZE. Since the
+ * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
+ * make sure we never report more than
+ * 4 * MAX_MEM_ALLOC_SIZE.
+ */
+ *max_global_size = MIN2(4 * max_mem_alloc_size,
+ MAX2(rscreen->info.gart_size,
+ rscreen->info.vram_size));
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ if (ret) {
+ uint64_t *max_local_size = ret;
+ /* Value reported by the closed source driver. */
+ *max_local_size = 32768;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+ if (ret) {
+ uint64_t *max_input_size = ret;
+ /* Value reported by the closed source driver. */
+ *max_input_size = 1024;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ if (ret) {
+ uint64_t *max_mem_alloc_size = ret;
+
+ *max_mem_alloc_size = rscreen->info.max_alloc_size;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+ if (ret) {
+ uint32_t *max_clock_frequency = ret;
+ *max_clock_frequency = rscreen->info.max_shader_clock;
+ }
+ return sizeof(uint32_t);
+
+ case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+ if (ret) {
+ uint32_t *max_compute_units = ret;
+ *max_compute_units = rscreen->info.num_good_compute_units;
+ }
+ return sizeof(uint32_t);
+
+ case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+ if (ret) {
+ uint32_t *images_supported = ret;
+ *images_supported = 0;
+ }
+ return sizeof(uint32_t);
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+ break; /* unused */
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ if (ret) {
+ uint32_t *subgroup_size = ret;
+ *subgroup_size = r600_wavefront_size(rscreen->family);
+ }
+ return sizeof(uint32_t);
+ case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+ if (ret) {
+ uint64_t *max_variable_threads_per_block = ret;
+ *max_variable_threads_per_block = 0;
+ }
+ return sizeof(uint64_t);
+ }
+
+ fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
+ return 0;
+}
+
+static uint64_t r600_get_timestamp(struct pipe_screen *screen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+
+ return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
+ rscreen->info.clock_crystal_freq;
+}
+
+static void r600_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **dst,
+ struct pipe_fence_handle *src)
+{
+ struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws;
+ struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst;
+ struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src;
+
+ if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
+ ws->fence_reference(&(*rdst)->gfx, NULL);
+ ws->fence_reference(&(*rdst)->sdma, NULL);
+ FREE(*rdst);
+ }
+ *rdst = rsrc;
+}
+
+static boolean r600_fence_finish(struct pipe_screen *screen,
+ struct pipe_context *ctx,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
+{
+ struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
+ struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+ struct r600_common_context *rctx;
+ int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+
+ ctx = threaded_context_unwrap_sync(ctx);
+ rctx = ctx ? (struct r600_common_context*)ctx : NULL;
+
+ if (rfence->sdma) {
+ if (!rws->fence_wait(rws, rfence->sdma, timeout))
+ return false;
+
+ /* Recompute the timeout after waiting. */
+ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t time = os_time_get_nano();
+ timeout = abs_timeout > time ? abs_timeout - time : 0;
+ }
+ }
+
+ if (!rfence->gfx)
+ return true;
+
+ /* Flush the gfx IB if it hasn't been flushed yet. */
+ if (rctx &&
+ rfence->gfx_unflushed.ctx == rctx &&
+ rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
+ rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
+ rfence->gfx_unflushed.ctx = NULL;
+
+ if (!timeout)
+ return false;
+
+ /* Recompute the timeout after all that. */
+ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+ int64_t time = os_time_get_nano();
+ timeout = abs_timeout > time ? abs_timeout - time : 0;
+ }
+ }
+
+ return rws->fence_wait(rws, rfence->gfx, timeout);
+}
+
+static void r600_query_memory_info(struct pipe_screen *screen,
+ struct pipe_memory_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ unsigned vram_usage, gtt_usage;
+
+ info->total_device_memory = rscreen->info.vram_size / 1024;
+ info->total_staging_memory = rscreen->info.gart_size / 1024;
+
+ /* The real TTM memory usage is somewhat random, because:
+ *
+ * 1) TTM delays freeing memory, because it can only free it after
+ * fences expire.
+ *
+ * 2) The memory usage can be really low if big VRAM evictions are
+ * taking place, but the real usage is well above the size of VRAM.
+ *
+ * Instead, return statistics of this process.
+ */
+ vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
+ gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
+
+ info->avail_device_memory =
+ vram_usage <= info->total_device_memory ?
+ info->total_device_memory - vram_usage : 0;
+ info->avail_staging_memory =
+ gtt_usage <= info->total_staging_memory ?
+ info->total_staging_memory - gtt_usage : 0;
+
+ info->device_memory_evicted =
+ ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
+
+ if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
+ info->nr_device_memory_evictions =
+ ws->query_value(ws, RADEON_NUM_EVICTIONS);
+ else
+ /* Just return the number of evicted 64KB pages. */
+ info->nr_device_memory_evictions = info->device_memory_evicted / 64;
+}
+
+struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ if (templ->target == PIPE_BUFFER) {
+ return r600_buffer_create(screen, templ, 256);
+ } else {
+ return r600_texture_create(screen, templ);
+ }
+}
+
+bool r600_common_screen_init(struct r600_common_screen *rscreen,
+ struct radeon_winsys *ws)
+{
+ char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
+ struct utsname uname_data;
+ const char *chip_name;
+
+ ws->query_info(ws, &rscreen->info);
+ rscreen->ws = ws;
+
+ if ((chip_name = r600_get_marketing_name(ws)))
+ snprintf(family_name, sizeof(family_name), "%s / ",
+ r600_get_family_name(rscreen) + 4);
+ else
+ chip_name = r600_get_family_name(rscreen);
+
+ if (uname(&uname_data) == 0)
+ snprintf(kernel_version, sizeof(kernel_version),
+ " / %s", uname_data.release);
+
+ if (HAVE_LLVM > 0) {
+ snprintf(llvm_string, sizeof(llvm_string),
+ ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+ }
+
+ snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
+ "%s (%sDRM %i.%i.%i%s%s)",
+ chip_name, family_name, rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
+ kernel_version, llvm_string);
+
+ rscreen->b.get_name = r600_get_name;
+ rscreen->b.get_vendor = r600_get_vendor;
+ rscreen->b.get_device_vendor = r600_get_device_vendor;
+ rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
+ rscreen->b.get_compute_param = r600_get_compute_param;
+ rscreen->b.get_paramf = r600_get_paramf;
+ rscreen->b.get_timestamp = r600_get_timestamp;
+ rscreen->b.fence_finish = r600_fence_finish;
+ rscreen->b.fence_reference = r600_fence_reference;
+ rscreen->b.resource_destroy = u_resource_destroy_vtbl;
+ rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
+ rscreen->b.query_memory_info = r600_query_memory_info;
+
+ if (rscreen->info.has_hw_decode) {
+ rscreen->b.get_video_param = rvid_get_video_param;
+ rscreen->b.is_video_format_supported = rvid_is_format_supported;
+ } else {
+ rscreen->b.get_video_param = r600_get_video_param;
+ rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
+ }
+
+ r600_init_screen_texture_functions(rscreen);
+ r600_init_screen_query_functions(rscreen);
+
+ rscreen->family = rscreen->info.family;
+ rscreen->chip_class = rscreen->info.chip_class;
+ rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
+
+ r600_disk_cache_create(rscreen);
+
+ slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
+
+ rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
+ if (rscreen->force_aniso >= 0) {
+ printf("radeon: Forcing anisotropy filter to %ix\n",
+ /* round down to a power of two */
+ 1 << util_logbase2(rscreen->force_aniso));
+ }
+
+ (void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
+ (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
+
+ if (rscreen->debug_flags & DBG_INFO) {
+ printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
+ rscreen->info.pci_domain, rscreen->info.pci_bus,
+ rscreen->info.pci_dev, rscreen->info.pci_func);
+ printf("pci_id = 0x%x\n", rscreen->info.pci_id);
+ printf("family = %i (%s)\n", rscreen->info.family,
+ r600_get_family_name(rscreen));
+ printf("chip_class = %i\n", rscreen->info.chip_class);
+ printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
+ printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
+ printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
+ printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
+ printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
+ printf("max_alloc_size = %i MB\n",
+ (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
+ printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
+ printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
+ printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
+ printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
+ printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
+ printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
+ printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
+ printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
+ printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
+ printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
+ printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
+ printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
+ printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
+ printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
+ printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
+ printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
+ printf("has_userptr = %i\n", rscreen->info.has_userptr);
+ printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
+
+ printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
+ printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
+ printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
+ printf("max_se = %i\n", rscreen->info.max_se);
+ printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
+
+ printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
+ printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
+ printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
+ printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
+ printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
+ printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
+ printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
+ printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
+ }
+ return true;
+}
+
+void r600_destroy_common_screen(struct r600_common_screen *rscreen)
+{
+ r600_perfcounters_destroy(rscreen);
+ r600_gpu_load_kill_thread(rscreen);
+
+ mtx_destroy(&rscreen->gpu_load_mutex);
+ mtx_destroy(&rscreen->aux_context_lock);
+ rscreen->aux_context->destroy(rscreen->aux_context);
+
+ slab_destroy_parent(&rscreen->pool_transfers);
+
+ disk_cache_destroy(rscreen->disk_shader_cache);
+ rscreen->ws->destroy(rscreen->ws);
+ FREE(rscreen);
+}
+
+bool r600_can_dump_shader(struct r600_common_screen *rscreen,
+ unsigned processor)
+{
+ return rscreen->debug_flags & (1 << processor);
+}
+
+bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
+{
+ return (rscreen->debug_flags & DBG_CHECK_IR) ||
+ r600_can_dump_shader(rscreen, processor);
+}
+
+void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
+
+ mtx_lock(&rscreen->aux_context_lock);
+ rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value);
+ rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
+ mtx_unlock(&rscreen->aux_context_lock);
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_pipe_common.h mesa-17.3.3/src/gallium/drivers/r600/r600_pipe_common.h
--- mesa-17.2.4/src/gallium/drivers/r600/r600_pipe_common.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_pipe_common.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,932 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ *
+ */
+
+/**
+ * This file contains common screen and context structures and functions
+ * for r600g and radeonsi.
+ */
+
+#ifndef R600_PIPE_COMMON_H
+#define R600_PIPE_COMMON_H
+
+#include
+
+#include "amd/common/ac_binary.h"
+
+#include "radeon/radeon_winsys.h"
+
+#include "util/disk_cache.h"
+#include "util/u_blitter.h"
+#include "util/list.h"
+#include "util/u_range.h"
+#include "util/slab.h"
+#include "util/u_suballoc.h"
+#include "util/u_transfer.h"
+#include "util/u_threaded_context.h"
+
+struct u_log_context;
+
+#define ATI_VENDOR_ID 0x1002
+
+#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
+#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
+
+#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
+/* Pipeline & streamout query controls. */
+#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
+#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
+#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
+#define R600_CONTEXT_PRIVATE_FLAG (1u << 4)
+
+/* special primitive types */
+#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
+
+#define R600_NOT_QUERY 0xffffffff
+
+/* Debug flags. */
+#define DBG_VS (1 << PIPE_SHADER_VERTEX)
+#define DBG_PS (1 << PIPE_SHADER_FRAGMENT)
+#define DBG_GS (1 << PIPE_SHADER_GEOMETRY)
+#define DBG_TCS (1 << PIPE_SHADER_TESS_CTRL)
+#define DBG_TES (1 << PIPE_SHADER_TESS_EVAL)
+#define DBG_CS (1 << PIPE_SHADER_COMPUTE)
+#define DBG_ALL_SHADERS (DBG_FS - 1)
+#define DBG_FS (1 << 6) /* fetch shader */
+#define DBG_TEX (1 << 7)
+#define DBG_NIR (1 << 8)
+#define DBG_COMPUTE (1 << 9)
+/* gap */
+#define DBG_VM (1 << 11)
+#define DBG_NO_IR (1 << 12)
+#define DBG_NO_TGSI (1 << 13)
+#define DBG_NO_ASM (1 << 14)
+#define DBG_PREOPT_IR (1 << 15)
+#define DBG_CHECK_IR (1 << 16)
+#define DBG_NO_OPT_VARIANT (1 << 17)
+#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18)
+/* gaps */
+#define DBG_TEST_DMA (1 << 20)
+/* Bits 21-31 are reserved for the r600g driver. */
+/* features */
+#define DBG_NO_ASYNC_DMA (1ull << 32)
+#define DBG_NO_HYPERZ (1ull << 33)
+#define DBG_NO_DISCARD_RANGE (1ull << 34)
+#define DBG_NO_2D_TILING (1ull << 35)
+#define DBG_NO_TILING (1ull << 36)
+#define DBG_SWITCH_ON_EOP (1ull << 37)
+#define DBG_FORCE_DMA (1ull << 38)
+#define DBG_PRECOMPILE (1ull << 39)
+#define DBG_INFO (1ull << 40)
+#define DBG_NO_WC (1ull << 41)
+#define DBG_CHECK_VM (1ull << 42)
+/* gap */
+#define DBG_UNSAFE_MATH (1ull << 49)
+#define DBG_TEST_VMFAULT_CP (1ull << 51)
+#define DBG_TEST_VMFAULT_SDMA (1ull << 52)
+#define DBG_TEST_VMFAULT_SHADER (1ull << 53)
+
+#define R600_MAP_BUFFER_ALIGNMENT 64
+#define R600_MAX_VIEWPORTS 16
+
+#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
+
+enum r600_coherency {
+ R600_COHERENCY_NONE, /* no cache flushes needed */
+ R600_COHERENCY_SHADER,
+ R600_COHERENCY_CB_META,
+};
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+#define R600_BIG_ENDIAN 1
+#else
+#define R600_BIG_ENDIAN 0
+#endif
+
+struct r600_common_context;
+struct r600_perfcounters;
+struct tgsi_shader_info;
+struct r600_qbo_state;
+
+void radeon_shader_binary_init(struct ac_shader_binary *b);
+void radeon_shader_binary_clean(struct ac_shader_binary *b);
+
+/* Only 32-bit buffer allocations are supported, gallium doesn't support more
+ * at the moment.
+ */
+struct r600_resource {
+ struct threaded_resource b;
+
+ /* Winsys objects. */
+ struct pb_buffer *buf;
+ uint64_t gpu_address;
+ /* Memory usage if the buffer placement is optimal. */
+ uint64_t vram_usage;
+ uint64_t gart_usage;
+
+ /* Resource properties. */
+ uint64_t bo_size;
+ unsigned bo_alignment;
+ enum radeon_bo_domain domains;
+ enum radeon_bo_flag flags;
+ unsigned bind_history;
+
+ /* The buffer range which is initialized (with a write transfer,
+ * streamout, DMA, or as a random access target). The rest of
+ * the buffer is considered invalid and can be mapped unsynchronized.
+ *
+ * This allows unsychronized mapping of a buffer range which hasn't
+ * been used yet. It's for applications which forget to use
+ * the unsynchronized map flag and expect the driver to figure it out.
+ */
+ struct util_range valid_buffer_range;
+
+ /* Whether the resource has been exported via resource_get_handle. */
+ unsigned external_usage; /* PIPE_HANDLE_USAGE_* */
+
+ /* Whether this resource is referenced by bindless handles. */
+ bool texture_handle_allocated;
+ bool image_handle_allocated;
+};
+
+struct r600_transfer {
+ struct threaded_transfer b;
+ struct r600_resource *staging;
+ unsigned offset;
+};
+
+struct r600_fmask_info {
+ uint64_t offset;
+ uint64_t size;
+ unsigned alignment;
+ unsigned pitch_in_pixels;
+ unsigned bank_height;
+ unsigned slice_tile_max;
+ unsigned tile_mode_index;
+ unsigned tile_swizzle;
+};
+
+struct r600_cmask_info {
+ uint64_t offset;
+ uint64_t size;
+ unsigned alignment;
+ unsigned slice_tile_max;
+ uint64_t base_address_reg;
+};
+
+struct r600_texture {
+ struct r600_resource resource;
+
+ uint64_t size;
+ unsigned num_level0_transfers;
+ enum pipe_format db_render_format;
+ bool is_depth;
+ bool db_compatible;
+ bool can_sample_z;
+ bool can_sample_s;
+ unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
+ unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
+ struct r600_texture *flushed_depth_texture;
+ struct radeon_surf surface;
+
+ /* Colorbuffer compression and fast clear. */
+ struct r600_fmask_info fmask;
+ struct r600_cmask_info cmask;
+ struct r600_resource *cmask_buffer;
+ unsigned cb_color_info; /* fast clear enable bit */
+ unsigned color_clear_value[2];
+ unsigned last_msaa_resolve_target_micro_mode;
+
+ /* Depth buffer compression and fast clear. */
+ uint64_t htile_offset;
+ bool depth_cleared; /* if it was cleared at least once */
+ float depth_clear_value;
+ bool stencil_cleared; /* if it was cleared at least once */
+ uint8_t stencil_clear_value;
+
+ bool non_disp_tiling; /* R600-Cayman only */
+
+ /* Counter that should be non-zero if the texture is bound to a
+ * framebuffer. Implemented in radeonsi only.
+ */
+ uint32_t framebuffers_bound;
+};
+
+struct r600_surface {
+ struct pipe_surface base;
+
+ /* These can vary with block-compressed textures. */
+ unsigned width0;
+ unsigned height0;
+
+ bool color_initialized;
+ bool depth_initialized;
+
+ /* Misc. color flags. */
+ bool alphatest_bypass;
+ bool export_16bpc;
+ bool color_is_int8;
+ bool color_is_int10;
+
+ /* Color registers. */
+ unsigned cb_color_info;
+ unsigned cb_color_base;
+ unsigned cb_color_view;
+ unsigned cb_color_size; /* R600 only */
+ unsigned cb_color_dim; /* EG only */
+ unsigned cb_color_pitch; /* EG and later */
+ unsigned cb_color_slice; /* EG and later */
+ unsigned cb_color_attrib; /* EG and later */
+ unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
+ unsigned cb_color_fmask_slice; /* EG and later */
+ unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
+ unsigned cb_color_mask; /* R600 only */
+ struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
+ struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
+
+ /* DB registers. */
+ uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
+ uint64_t db_stencil_base; /* EG and later */
+ uint64_t db_htile_data_base;
+ unsigned db_depth_info; /* R600 only, then SI and later */
+ unsigned db_z_info; /* EG and later */
+ unsigned db_depth_view;
+ unsigned db_depth_size;
+ unsigned db_depth_slice; /* EG and later */
+ unsigned db_stencil_info; /* EG and later */
+ unsigned db_prefetch_limit; /* R600 only */
+ unsigned db_htile_surface;
+ unsigned db_preload_control; /* EG and later */
+};
+
+struct r600_mmio_counter {
+ unsigned busy;
+ unsigned idle;
+};
+
+union r600_mmio_counters {
+ struct {
+ /* For global GPU load including SDMA. */
+ struct r600_mmio_counter gpu;
+
+ /* GRBM_STATUS */
+ struct r600_mmio_counter spi;
+ struct r600_mmio_counter gui;
+ struct r600_mmio_counter ta;
+ struct r600_mmio_counter gds;
+ struct r600_mmio_counter vgt;
+ struct r600_mmio_counter ia;
+ struct r600_mmio_counter sx;
+ struct r600_mmio_counter wd;
+ struct r600_mmio_counter bci;
+ struct r600_mmio_counter sc;
+ struct r600_mmio_counter pa;
+ struct r600_mmio_counter db;
+ struct r600_mmio_counter cp;
+ struct r600_mmio_counter cb;
+
+ /* SRBM_STATUS2 */
+ struct r600_mmio_counter sdma;
+
+ /* CP_STAT */
+ struct r600_mmio_counter pfp;
+ struct r600_mmio_counter meq;
+ struct r600_mmio_counter me;
+ struct r600_mmio_counter surf_sync;
+ struct r600_mmio_counter cp_dma;
+ struct r600_mmio_counter scratch_ram;
+ } named;
+ unsigned array[0];
+};
+
+struct r600_memory_object {
+ struct pipe_memory_object b;
+ struct pb_buffer *buf;
+ uint32_t stride;
+ uint32_t offset;
+};
+
+struct r600_common_screen {
+ struct pipe_screen b;
+ struct radeon_winsys *ws;
+ enum radeon_family family;
+ enum chip_class chip_class;
+ struct radeon_info info;
+ uint64_t debug_flags;
+ bool has_cp_dma;
+ bool has_streamout;
+
+ struct disk_cache *disk_shader_cache;
+
+ struct slab_parent_pool pool_transfers;
+
+ /* Texture filter settings. */
+ int force_aniso; /* -1 = disabled */
+
+ /* Auxiliary context. Mainly used to initialize resources.
+ * It must be locked prior to using and flushed before unlocking. */
+ struct pipe_context *aux_context;
+ mtx_t aux_context_lock;
+
+ /* This must be in the screen, because UE4 uses one context for
+ * compilation and another one for rendering.
+ */
+ unsigned num_compilations;
+ /* Along with ST_DEBUG=precompile, this should show if applications
+ * are loading shaders on demand. This is a monotonic counter.
+ */
+ unsigned num_shaders_created;
+ unsigned num_shader_cache_hits;
+
+ /* GPU load thread. */
+ mtx_t gpu_load_mutex;
+ thrd_t gpu_load_thread;
+ union r600_mmio_counters mmio_counters;
+ volatile unsigned gpu_load_stop_thread; /* bool */
+
+ char renderer_string[100];
+
+ /* Performance counters. */
+ struct r600_perfcounters *perfcounters;
+
+ /* If pipe_screen wants to recompute and re-emit the framebuffer,
+ * sampler, and image states of all contexts, it should atomically
+ * increment this.
+ *
+ * Each context will compare this with its own last known value of
+ * the counter before drawing and re-emit the states accordingly.
+ */
+ unsigned dirty_tex_counter;
+
+ /* Atomically increment this counter when an existing texture's
+ * metadata is enabled or disabled in a way that requires changing
+ * contexts' compressed texture binding masks.
+ */
+ unsigned compressed_colortex_counter;
+
+ struct {
+ /* Context flags to set so that all writes from earlier jobs
+ * in the CP are seen by L2 clients.
+ */
+ unsigned cp_to_L2;
+
+ /* Context flags to set so that all writes from earlier jobs
+ * that end in L2 are seen by CP.
+ */
+ unsigned L2_to_cp;
+
+ /* Context flags to set so that all writes from earlier
+ * compute jobs are seen by L2 clients.
+ */
+ unsigned compute_to_L2;
+ } barrier_flags;
+
+ void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct radeon_bo_metadata *md);
+
+ void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct radeon_bo_metadata *md);
+};
+
+/* This encapsulates a state or an operation which can emitted into the GPU
+ * command stream. */
+struct r600_atom {
+ void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
+ unsigned num_dw;
+ unsigned short id;
+};
+
+struct r600_so_target {
+ struct pipe_stream_output_target b;
+
+ /* The buffer where BUFFER_FILLED_SIZE is stored. */
+ struct r600_resource *buf_filled_size;
+ unsigned buf_filled_size_offset;
+ bool buf_filled_size_valid;
+
+ unsigned stride_in_dw;
+};
+
+struct r600_streamout {
+ struct r600_atom begin_atom;
+ bool begin_emitted;
+ unsigned num_dw_for_end;
+
+ unsigned enabled_mask;
+ unsigned num_targets;
+ struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS];
+
+ unsigned append_bitmask;
+ bool suspended;
+
+ /* External state which comes from the vertex shader,
+ * it must be set explicitly when binding a shader. */
+ uint16_t *stride_in_dw;
+ unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+
+ /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
+ unsigned hw_enabled_mask;
+
+ /* The state of VGT_STRMOUT_(CONFIG|EN). */
+ struct r600_atom enable_atom;
+ bool streamout_enabled;
+ bool prims_gen_query_enabled;
+ int num_prims_gen_queries;
+};
+
+struct r600_signed_scissor {
+ int minx;
+ int miny;
+ int maxx;
+ int maxy;
+};
+
+struct r600_scissors {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ struct pipe_scissor_state states[R600_MAX_VIEWPORTS];
+};
+
+struct r600_viewports {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ unsigned depth_range_dirty_mask;
+ struct pipe_viewport_state states[R600_MAX_VIEWPORTS];
+ struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS];
+};
+
+struct r600_ring {
+ struct radeon_winsys_cs *cs;
+ void (*flush)(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence);
+};
+
+/* Saved CS data for debugging features. */
+struct radeon_saved_cs {
+ uint32_t *ib;
+ unsigned num_dw;
+
+ struct radeon_bo_list_item *bo_list;
+ unsigned bo_count;
+};
+
+struct r600_common_context {
+ struct pipe_context b; /* base class */
+
+ struct r600_common_screen *screen;
+ struct radeon_winsys *ws;
+ struct radeon_winsys_ctx *ctx;
+ enum radeon_family family;
+ enum chip_class chip_class;
+ struct r600_ring gfx;
+ struct r600_ring dma;
+ struct pipe_fence_handle *last_gfx_fence;
+ struct pipe_fence_handle *last_sdma_fence;
+ struct r600_resource *eop_bug_scratch;
+ unsigned num_gfx_cs_flushes;
+ unsigned initial_gfx_cs_size;
+ unsigned gpu_reset_counter;
+ unsigned last_dirty_tex_counter;
+ unsigned last_compressed_colortex_counter;
+ unsigned last_num_draw_calls;
+
+ struct threaded_context *tc;
+ struct u_suballocator *allocator_zeroed_memory;
+ struct slab_child_pool pool_transfers;
+ struct slab_child_pool pool_transfers_unsync; /* for threaded_context */
+
+ /* Current unaccounted memory usage. */
+ uint64_t vram;
+ uint64_t gtt;
+
+ /* States. */
+ struct r600_streamout streamout;
+ struct r600_scissors scissors;
+ struct r600_viewports viewports;
+ bool scissor_enabled;
+ bool clip_halfz;
+ bool vs_writes_viewport_index;
+ bool vs_disables_clipping_viewport;
+
+ /* Additional context states. */
+ unsigned flags; /* flush flags */
+
+ /* Queries. */
+ /* Maintain the list of active queries for pausing between IBs. */
+ int num_occlusion_queries;
+ int num_perfect_occlusion_queries;
+ struct list_head active_queries;
+ unsigned num_cs_dw_queries_suspend;
+ /* Misc stats. */
+ unsigned num_draw_calls;
+ unsigned num_decompress_calls;
+ unsigned num_mrt_draw_calls;
+ unsigned num_prim_restart_calls;
+ unsigned num_spill_draw_calls;
+ unsigned num_compute_calls;
+ unsigned num_spill_compute_calls;
+ unsigned num_dma_calls;
+ unsigned num_cp_dma_calls;
+ unsigned num_vs_flushes;
+ unsigned num_ps_flushes;
+ unsigned num_cs_flushes;
+ unsigned num_cb_cache_flushes;
+ unsigned num_db_cache_flushes;
+ unsigned num_L2_invalidates;
+ unsigned num_L2_writebacks;
+ unsigned num_resident_handles;
+ uint64_t num_alloc_tex_transfer_bytes;
+
+ /* Render condition. */
+ struct r600_atom render_cond_atom;
+ struct pipe_query *render_cond;
+ unsigned render_cond_mode;
+ bool render_cond_invert;
+ bool render_cond_force_off; /* for u_blitter */
+
+ /* MSAA sample locations.
+ * The first index is the sample index.
+ * The second index is the coordinate: X, Y. */
+ float sample_locations_1x[1][2];
+ float sample_locations_2x[2][2];
+ float sample_locations_4x[4][2];
+ float sample_locations_8x[8][2];
+ float sample_locations_16x[16][2];
+
+ struct pipe_debug_callback debug;
+ struct pipe_device_reset_callback device_reset_callback;
+ struct u_log_context *log;
+
+ void *query_result_shader;
+
+ /* Copy one resource to another using async DMA. */
+ void (*dma_copy)(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dst_x, unsigned dst_y, unsigned dst_z,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box);
+
+ void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value);
+
+ void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value,
+ enum r600_coherency coher);
+
+ void (*blit_decompress_depth)(struct pipe_context *ctx,
+ struct r600_texture *texture,
+ struct r600_texture *staging,
+ unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned last_layer,
+ unsigned first_sample, unsigned last_sample);
+
+ /* Reallocate the buffer and update all resource bindings where
+ * the buffer is bound, including all resource descriptors. */
+ void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
+
+ /* Update all resource bindings where the buffer is bound, including
+ * all resource descriptors. This is invalidate_buffer without
+ * the invalidation. */
+ void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
+ uint64_t old_gpu_address);
+
+ void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
+
+ /* This ensures there is enough space in the command stream. */
+ void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
+ bool include_draw_vbo);
+
+ void (*set_atom_dirty)(struct r600_common_context *ctx,
+ struct r600_atom *atom, bool dirty);
+
+ void (*check_vm_faults)(struct r600_common_context *ctx,
+ struct radeon_saved_cs *saved,
+ enum ring_type ring);
+};
+
+/* r600_buffer_common.c */
+bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
+ struct pb_buffer *buf,
+ enum radeon_bo_usage usage);
+void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+ struct r600_resource *resource,
+ unsigned usage);
+void r600_buffer_subdata(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned usage, unsigned offset,
+ unsigned size, const void *data);
+void r600_init_resource_fields(struct r600_common_screen *rscreen,
+ struct r600_resource *res,
+ uint64_t size, unsigned alignment);
+bool r600_alloc_resource(struct r600_common_screen *rscreen,
+ struct r600_resource *res);
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ unsigned alignment);
+struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned flags,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment);
+struct pipe_resource *
+r600_buffer_from_user_memory(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ void *user_memory);
+void
+r600_invalidate_resource(struct pipe_context *ctx,
+ struct pipe_resource *resource);
+void r600_replace_buffer_storage(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src);
+
+/* r600_common_pipe.c */
+void r600_gfx_write_event_eop(struct r600_common_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t new_fence, unsigned query_type);
+unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
+void r600_gfx_wait_fence(struct r600_common_context *ctx,
+ uint64_t va, uint32_t ref, uint32_t mask);
+void r600_draw_rectangle(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
+ int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances,
+ enum blitter_attrib_type type,
+ const union blitter_attrib *attrib);
+bool r600_common_screen_init(struct r600_common_screen *rscreen,
+ struct radeon_winsys *ws);
+void r600_destroy_common_screen(struct r600_common_screen *rscreen);
+void r600_preflush_suspend_features(struct r600_common_context *ctx);
+void r600_postflush_resume_features(struct r600_common_context *ctx);
+bool r600_common_context_init(struct r600_common_context *rctx,
+ struct r600_common_screen *rscreen,
+ unsigned context_flags);
+void r600_common_context_cleanup(struct r600_common_context *rctx);
+bool r600_can_dump_shader(struct r600_common_screen *rscreen,
+ unsigned processor);
+bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
+ unsigned processor);
+void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value);
+struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
+ const struct pipe_resource *templ);
+const char *r600_get_llvm_processor_name(enum radeon_family family);
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+ struct r600_resource *dst, struct r600_resource *src);
+void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
+ struct radeon_saved_cs *saved, bool get_buffer_list);
+void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
+bool r600_check_device_reset(struct r600_common_context *rctx);
+
+/* r600_gpu_load.c */
+void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
+uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
+unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
+ uint64_t begin);
+
+/* r600_perfcounters.c */
+void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
+
+/* r600_query.c */
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
+void r600_query_init(struct r600_common_context *rctx);
+void r600_suspend_queries(struct r600_common_context *ctx);
+void r600_resume_queries(struct r600_common_context *ctx);
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
+
+/* r600_streamout.c */
+void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
+void r600_set_streamout_targets(struct pipe_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offset);
+void r600_emit_streamout_end(struct r600_common_context *rctx);
+void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
+ unsigned type, int diff);
+void r600_streamout_init(struct r600_common_context *rctx);
+
+/* r600_test_dma.c */
+void r600_test_dma(struct r600_common_screen *rscreen);
+
+/* r600_texture.c */
+bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
+ struct r600_texture *rdst,
+ unsigned dst_level, unsigned dstx,
+ unsigned dsty, unsigned dstz,
+ struct r600_texture *rsrc,
+ unsigned src_level,
+ const struct pipe_box *src_box);
+void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned nr_samples,
+ struct r600_fmask_info *out);
+void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct r600_cmask_info *out);
+bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct r600_texture **staging);
+void r600_print_texture_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, struct u_log_context *log);
+struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ);
+struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_surface *templ,
+ unsigned width0, unsigned height0,
+ unsigned width, unsigned height);
+unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
+void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
+ struct pipe_framebuffer_state *fb,
+ struct r600_atom *fb_state,
+ unsigned *buffers, ubyte *dirty_cbufs,
+ const union pipe_color_union *color);
+void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
+void r600_init_context_texture_functions(struct r600_common_context *rctx);
+
+/* r600_viewport.c */
+void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
+ struct pipe_scissor_state *scissor);
+void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
+ bool scissor_enable, bool clip_halfz);
+void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
+ struct tgsi_shader_info *info);
+void r600_init_viewport_functions(struct r600_common_context *rctx);
+
+/* cayman_msaa.c */
+extern const uint32_t eg_sample_locs_2x[4];
+extern const unsigned eg_max_dist_2x;
+extern const uint32_t eg_sample_locs_4x[4];
+extern const unsigned eg_max_dist_4x;
+void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
+ unsigned sample_index, float *out_value);
+void cayman_init_msaa(struct pipe_context *ctx);
+void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
+void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
+ int ps_iter_samples, int overrast_samples,
+ unsigned sc_mode_cntl_1);
+
+
+/* Inline helpers. */
+
+static inline struct r600_resource *r600_resource(struct pipe_resource *r)
+{
+ return (struct r600_resource*)r;
+}
+
+static inline void
+r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
+{
+ pipe_resource_reference((struct pipe_resource **)ptr,
+ (struct pipe_resource *)res);
+}
+
+static inline void
+r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
+{
+ pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
+}
+
+static inline void
+r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_resource *res = (struct r600_resource *)r;
+
+ if (res) {
+ /* Add memory usage for need_gfx_cs_space */
+ rctx->vram += res->vram_usage;
+ rctx->gtt += res->gart_usage;
+ }
+}
+
+static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
+{
+ return rctx->streamout.streamout_enabled ||
+ rctx->streamout.prims_gen_query_enabled;
+}
+
+#define SQ_TEX_XY_FILTER_POINT 0x00
+#define SQ_TEX_XY_FILTER_BILINEAR 0x01
+#define SQ_TEX_XY_FILTER_ANISO_POINT 0x02
+#define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
+
+static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
+{
+ if (filter == PIPE_TEX_FILTER_LINEAR)
+ return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
+ : SQ_TEX_XY_FILTER_BILINEAR;
+ else
+ return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
+ : SQ_TEX_XY_FILTER_POINT;
+}
+
+static inline unsigned r600_tex_aniso_filter(unsigned filter)
+{
+ if (filter < 2)
+ return 0;
+ if (filter < 4)
+ return 1;
+ if (filter < 8)
+ return 2;
+ if (filter < 16)
+ return 3;
+ return 4;
+}
+
+static inline unsigned r600_wavefront_size(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ case CHIP_RS880:
+ return 16;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return 32;
+ default:
+ return 64;
+ }
+}
+
+static inline enum radeon_bo_priority
+r600_get_sampler_view_priority(struct r600_resource *res)
+{
+ if (res->b.b.target == PIPE_BUFFER)
+ return RADEON_PRIO_SAMPLER_BUFFER;
+
+ if (res->b.b.nr_samples > 1)
+ return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
+
+ return RADEON_PRIO_SAMPLER_TEXTURE;
+}
+
+static inline bool
+r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
+{
+ return (stencil_sampler && tex->can_sample_s) ||
+ (!stencil_sampler && tex->can_sample_z);
+}
+
+static inline bool
+r600_htile_enabled(struct r600_texture *tex, unsigned level)
+{
+ return tex->htile_offset && level == 0;
+}
+
+#define COMPUTE_DBG(rscreen, fmt, args...) \
+ do { \
+ if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
+ } while (0);
+
+#define R600_ERR(fmt, args...) \
+ fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
+
+/* For MSAA sample positions. */
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
+ (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
+ (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \
+ (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
+ (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
+
+static inline int S_FIXED(float value, unsigned frac_bits)
+{
+ return value * (1 << frac_bits);
+}
+
+#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_pipe.h mesa-17.3.3/src/gallium/drivers/r600/r600_pipe.h
--- mesa-17.2.4/src/gallium/drivers/r600/r600_pipe.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_pipe.h 2018-01-18 21:30:28.000000000 +0000
@@ -26,8 +26,8 @@
#ifndef R600_PIPE_H
#define R600_PIPE_H
-#include "radeon/r600_pipe_common.h"
-#include "radeon/r600_cs.h"
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
#include "r600_public.h"
#include "pipe/p_defines.h"
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_public.h mesa-17.3.3/src/gallium/drivers/r600/r600_public.h
--- mesa-17.2.4/src/gallium/drivers/r600/r600_public.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_public.h 2018-01-18 21:30:28.000000000 +0000
@@ -24,7 +24,9 @@
#define R600_PUBLIC_H
struct radeon_winsys;
+struct pipe_screen_config;
-struct pipe_screen *r600_screen_create(struct radeon_winsys *ws, unsigned flags);
+struct pipe_screen *r600_screen_create(struct radeon_winsys *ws,
+ const struct pipe_screen_config *config);
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_query.c mesa-17.3.3/src/gallium/drivers/r600/r600_query.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_query.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,2126 @@
+/*
+ * Copyright 2010 Jerome Glisse
+ * Copyright 2014 Marek Olšák
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "r600_query.h"
+#include "r600_pipe.h"
+#include "r600_cs.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include "os/os_time.h"
+#include "tgsi/tgsi_text.h"
+
+#define R600_MAX_STREAMS 4
+
+struct r600_hw_query_params {
+ unsigned start_offset;
+ unsigned end_offset;
+ unsigned fence_offset;
+ unsigned pair_stride;
+ unsigned pair_count;
+};
+
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+ struct r600_query b;
+
+ uint64_t begin_result;
+ uint64_t end_result;
+
+ uint64_t begin_time;
+ uint64_t end_time;
+
+ /* Fence for GPU_FINISHED. */
+ struct pipe_fence_handle *fence;
+};
+
+static void r600_query_sw_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL);
+ FREE(query);
+}
+
+static enum radeon_value_id winsys_id_from_type(unsigned type)
+{
+ switch (type) {
+ case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+ case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+ case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
+ case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
+ case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+ case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS;
+ case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
+ case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
+ case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER;
+ case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+ case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
+ case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS;
+ case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+ case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
+ case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+ case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+ case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+ case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+ case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
+ default: unreachable("query type does not correspond to winsys id");
+ }
+}
+
+static bool r600_query_sw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+ enum radeon_value_id ws_id;
+
+ switch(query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_GPU_FINISHED:
+ break;
+ case R600_QUERY_DRAW_CALLS:
+ query->begin_result = rctx->num_draw_calls;
+ break;
+ case R600_QUERY_DECOMPRESS_CALLS:
+ query->begin_result = rctx->num_decompress_calls;
+ break;
+ case R600_QUERY_MRT_DRAW_CALLS:
+ query->begin_result = rctx->num_mrt_draw_calls;
+ break;
+ case R600_QUERY_PRIM_RESTART_CALLS:
+ query->begin_result = rctx->num_prim_restart_calls;
+ break;
+ case R600_QUERY_SPILL_DRAW_CALLS:
+ query->begin_result = rctx->num_spill_draw_calls;
+ break;
+ case R600_QUERY_COMPUTE_CALLS:
+ query->begin_result = rctx->num_compute_calls;
+ break;
+ case R600_QUERY_SPILL_COMPUTE_CALLS:
+ query->begin_result = rctx->num_spill_compute_calls;
+ break;
+ case R600_QUERY_DMA_CALLS:
+ query->begin_result = rctx->num_dma_calls;
+ break;
+ case R600_QUERY_CP_DMA_CALLS:
+ query->begin_result = rctx->num_cp_dma_calls;
+ break;
+ case R600_QUERY_NUM_VS_FLUSHES:
+ query->begin_result = rctx->num_vs_flushes;
+ break;
+ case R600_QUERY_NUM_PS_FLUSHES:
+ query->begin_result = rctx->num_ps_flushes;
+ break;
+ case R600_QUERY_NUM_CS_FLUSHES:
+ query->begin_result = rctx->num_cs_flushes;
+ break;
+ case R600_QUERY_NUM_CB_CACHE_FLUSHES:
+ query->begin_result = rctx->num_cb_cache_flushes;
+ break;
+ case R600_QUERY_NUM_DB_CACHE_FLUSHES:
+ query->begin_result = rctx->num_db_cache_flushes;
+ break;
+ case R600_QUERY_NUM_L2_INVALIDATES:
+ query->begin_result = rctx->num_L2_invalidates;
+ break;
+ case R600_QUERY_NUM_L2_WRITEBACKS:
+ query->begin_result = rctx->num_L2_writebacks;
+ break;
+ case R600_QUERY_NUM_RESIDENT_HANDLES:
+ query->begin_result = rctx->num_resident_handles;
+ break;
+ case R600_QUERY_TC_OFFLOADED_SLOTS:
+ query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
+ break;
+ case R600_QUERY_TC_DIRECT_SLOTS:
+ query->begin_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
+ break;
+ case R600_QUERY_TC_NUM_SYNCS:
+ query->begin_result = rctx->tc ? rctx->tc->num_syncs : 0;
+ break;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_MAPPED_VRAM:
+ case R600_QUERY_MAPPED_GTT:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_VRAM_VIS_USAGE:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_GPU_TEMPERATURE:
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_NUM_MAPPED_BUFFERS:
+ query->begin_result = 0;
+ break;
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_NUM_GFX_IBS:
+ case R600_QUERY_NUM_SDMA_IBS:
+ case R600_QUERY_NUM_BYTES_MOVED:
+ case R600_QUERY_NUM_EVICTIONS:
+ case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
+ case R600_QUERY_GFX_BO_LIST_SIZE:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->begin_time = rctx->ws->query_value(rctx->ws,
+ RADEON_NUM_GFX_IBS);
+ break;
+ case R600_QUERY_CS_THREAD_BUSY:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->begin_time = os_time_get_nano();
+ break;
+ case R600_QUERY_GALLIUM_THREAD_BUSY:
+ query->begin_result =
+ rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
+ query->begin_time = os_time_get_nano();
+ break;
+ case R600_QUERY_GPU_LOAD:
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ case R600_QUERY_GPU_TA_BUSY:
+ case R600_QUERY_GPU_GDS_BUSY:
+ case R600_QUERY_GPU_VGT_BUSY:
+ case R600_QUERY_GPU_IA_BUSY:
+ case R600_QUERY_GPU_SX_BUSY:
+ case R600_QUERY_GPU_WD_BUSY:
+ case R600_QUERY_GPU_BCI_BUSY:
+ case R600_QUERY_GPU_SC_BUSY:
+ case R600_QUERY_GPU_PA_BUSY:
+ case R600_QUERY_GPU_DB_BUSY:
+ case R600_QUERY_GPU_CP_BUSY:
+ case R600_QUERY_GPU_CB_BUSY:
+ case R600_QUERY_GPU_SDMA_BUSY:
+ case R600_QUERY_GPU_PFP_BUSY:
+ case R600_QUERY_GPU_MEQ_BUSY:
+ case R600_QUERY_GPU_ME_BUSY:
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ case R600_QUERY_GPU_CP_DMA_BUSY:
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ query->begin_result = r600_begin_counter(rctx->screen,
+ query->b.type);
+ break;
+ case R600_QUERY_NUM_COMPILATIONS:
+ query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ case R600_QUERY_NUM_SHADER_CACHE_HITS:
+ query->begin_result =
+ p_atomic_read(&rctx->screen->num_shader_cache_hits);
+ break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
+ default:
+ unreachable("r600_query_sw_begin: bad query type");
+ }
+
+ return true;
+}
+
+static bool r600_query_sw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+ enum radeon_value_id ws_id;
+
+ switch(query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
+ break;
+ case R600_QUERY_DRAW_CALLS:
+ query->end_result = rctx->num_draw_calls;
+ break;
+ case R600_QUERY_DECOMPRESS_CALLS:
+ query->end_result = rctx->num_decompress_calls;
+ break;
+ case R600_QUERY_MRT_DRAW_CALLS:
+ query->end_result = rctx->num_mrt_draw_calls;
+ break;
+ case R600_QUERY_PRIM_RESTART_CALLS:
+ query->end_result = rctx->num_prim_restart_calls;
+ break;
+ case R600_QUERY_SPILL_DRAW_CALLS:
+ query->end_result = rctx->num_spill_draw_calls;
+ break;
+ case R600_QUERY_COMPUTE_CALLS:
+ query->end_result = rctx->num_compute_calls;
+ break;
+ case R600_QUERY_SPILL_COMPUTE_CALLS:
+ query->end_result = rctx->num_spill_compute_calls;
+ break;
+ case R600_QUERY_DMA_CALLS:
+ query->end_result = rctx->num_dma_calls;
+ break;
+ case R600_QUERY_CP_DMA_CALLS:
+ query->end_result = rctx->num_cp_dma_calls;
+ break;
+ case R600_QUERY_NUM_VS_FLUSHES:
+ query->end_result = rctx->num_vs_flushes;
+ break;
+ case R600_QUERY_NUM_PS_FLUSHES:
+ query->end_result = rctx->num_ps_flushes;
+ break;
+ case R600_QUERY_NUM_CS_FLUSHES:
+ query->end_result = rctx->num_cs_flushes;
+ break;
+ case R600_QUERY_NUM_CB_CACHE_FLUSHES:
+ query->end_result = rctx->num_cb_cache_flushes;
+ break;
+ case R600_QUERY_NUM_DB_CACHE_FLUSHES:
+ query->end_result = rctx->num_db_cache_flushes;
+ break;
+ case R600_QUERY_NUM_L2_INVALIDATES:
+ query->end_result = rctx->num_L2_invalidates;
+ break;
+ case R600_QUERY_NUM_L2_WRITEBACKS:
+ query->end_result = rctx->num_L2_writebacks;
+ break;
+ case R600_QUERY_NUM_RESIDENT_HANDLES:
+ query->end_result = rctx->num_resident_handles;
+ break;
+ case R600_QUERY_TC_OFFLOADED_SLOTS:
+ query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
+ break;
+ case R600_QUERY_TC_DIRECT_SLOTS:
+ query->end_result = rctx->tc ? rctx->tc->num_direct_slots : 0;
+ break;
+ case R600_QUERY_TC_NUM_SYNCS:
+ query->end_result = rctx->tc ? rctx->tc->num_syncs : 0;
+ break;
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_MAPPED_VRAM:
+ case R600_QUERY_MAPPED_GTT:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_VRAM_VIS_USAGE:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_GPU_TEMPERATURE:
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_NUM_MAPPED_BUFFERS:
+ case R600_QUERY_NUM_GFX_IBS:
+ case R600_QUERY_NUM_SDMA_IBS:
+ case R600_QUERY_NUM_BYTES_MOVED:
+ case R600_QUERY_NUM_EVICTIONS:
+ case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
+ enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ break;
+ }
+ case R600_QUERY_GFX_BO_LIST_SIZE:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->end_time = rctx->ws->query_value(rctx->ws,
+ RADEON_NUM_GFX_IBS);
+ break;
+ case R600_QUERY_CS_THREAD_BUSY:
+ ws_id = winsys_id_from_type(query->b.type);
+ query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+ query->end_time = os_time_get_nano();
+ break;
+ case R600_QUERY_GALLIUM_THREAD_BUSY:
+ query->end_result =
+ rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
+ query->end_time = os_time_get_nano();
+ break;
+ case R600_QUERY_GPU_LOAD:
+ case R600_QUERY_GPU_SHADERS_BUSY:
+ case R600_QUERY_GPU_TA_BUSY:
+ case R600_QUERY_GPU_GDS_BUSY:
+ case R600_QUERY_GPU_VGT_BUSY:
+ case R600_QUERY_GPU_IA_BUSY:
+ case R600_QUERY_GPU_SX_BUSY:
+ case R600_QUERY_GPU_WD_BUSY:
+ case R600_QUERY_GPU_BCI_BUSY:
+ case R600_QUERY_GPU_SC_BUSY:
+ case R600_QUERY_GPU_PA_BUSY:
+ case R600_QUERY_GPU_DB_BUSY:
+ case R600_QUERY_GPU_CP_BUSY:
+ case R600_QUERY_GPU_CB_BUSY:
+ case R600_QUERY_GPU_SDMA_BUSY:
+ case R600_QUERY_GPU_PFP_BUSY:
+ case R600_QUERY_GPU_MEQ_BUSY:
+ case R600_QUERY_GPU_ME_BUSY:
+ case R600_QUERY_GPU_SURF_SYNC_BUSY:
+ case R600_QUERY_GPU_CP_DMA_BUSY:
+ case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
+ query->end_result = r600_end_counter(rctx->screen,
+ query->b.type,
+ query->begin_result);
+ query->begin_result = 0;
+ break;
+ case R600_QUERY_NUM_COMPILATIONS:
+ query->end_result = p_atomic_read(&rctx->screen->num_compilations);
+ break;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ break;
+ case R600_QUERY_NUM_SHADER_CACHE_HITS:
+ query->end_result =
+ p_atomic_read(&rctx->screen->num_shader_cache_hits);
+ break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
+ default:
+ unreachable("r600_query_sw_end: bad query type");
+ }
+
+ return true;
+}
+
+static bool r600_query_sw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait,
+ union pipe_query_result *result)
+{
+ struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ /* Convert from cycles per millisecond to cycles per second (Hz). */
+ result->timestamp_disjoint.frequency =
+ (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
+ result->timestamp_disjoint.disjoint = false;
+ return true;
+ case PIPE_QUERY_GPU_FINISHED: {
+ struct pipe_screen *screen = rctx->b.screen;
+ struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b;
+
+ result->b = screen->fence_finish(screen, ctx, query->fence,
+ wait ? PIPE_TIMEOUT_INFINITE : 0);
+ return result->b;
+ }
+
+ case R600_QUERY_GFX_BO_LIST_SIZE:
+ result->u64 = (query->end_result - query->begin_result) /
+ (query->end_time - query->begin_time);
+ return true;
+ case R600_QUERY_CS_THREAD_BUSY:
+ case R600_QUERY_GALLIUM_THREAD_BUSY:
+ result->u64 = (query->end_result - query->begin_result) * 100 /
+ (query->end_time - query->begin_time);
+ return true;
+ case R600_QUERY_GPIN_ASIC_ID:
+ result->u32 = 0;
+ return true;
+ case R600_QUERY_GPIN_NUM_SIMD:
+ result->u32 = rctx->screen->info.num_good_compute_units;
+ return true;
+ case R600_QUERY_GPIN_NUM_RB:
+ result->u32 = rctx->screen->info.num_render_backends;
+ return true;
+ case R600_QUERY_GPIN_NUM_SPI:
+ result->u32 = 1; /* all supported chips have one SPI per SE */
+ return true;
+ case R600_QUERY_GPIN_NUM_SE:
+ result->u32 = rctx->screen->info.max_se;
+ return true;
+ }
+
+ result->u64 = query->end_result - query->begin_result;
+
+ switch (query->b.type) {
+ case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_GPU_TEMPERATURE:
+ result->u64 /= 1000;
+ break;
+ case R600_QUERY_CURRENT_GPU_SCLK:
+ case R600_QUERY_CURRENT_GPU_MCLK:
+ result->u64 *= 1000000;
+ break;
+ }
+
+ return true;
+}
+
+
+static struct r600_query_ops sw_query_ops = {
+ .destroy = r600_query_sw_destroy,
+ .begin = r600_query_sw_begin,
+ .end = r600_query_sw_end,
+ .get_result = r600_query_sw_get_result,
+ .get_result_resource = NULL
+};
+
+static struct pipe_query *r600_query_sw_create(unsigned query_type)
+{
+ struct r600_query_sw *query;
+
+ query = CALLOC_STRUCT(r600_query_sw);
+ if (!query)
+ return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &sw_query_ops;
+
+ return (struct pipe_query *)query;
+}
+
+void r600_query_hw_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+ struct r600_query_buffer *prev = query->buffer.previous;
+
+ /* Release all query buffers. */
+ while (prev) {
+ struct r600_query_buffer *qbuf = prev;
+ prev = prev->previous;
+ r600_resource_reference(&qbuf->buf, NULL);
+ FREE(qbuf);
+ }
+
+ r600_resource_reference(&query->buffer.buf, NULL);
+ r600_resource_reference(&query->workaround_buf, NULL);
+ FREE(rquery);
+}
+
+static struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query)
+{
+ unsigned buf_size = MAX2(query->result_size,
+ rscreen->info.min_alloc_size);
+
+ /* Queries are normally read by the CPU after
+ * being written by the gpu, hence staging is probably a good
+ * usage pattern.
+ */
+ struct r600_resource *buf = (struct r600_resource*)
+ pipe_buffer_create(&rscreen->b, 0,
+ PIPE_USAGE_STAGING, buf_size);
+ if (!buf)
+ return NULL;
+
+ if (!query->ops->prepare_buffer(rscreen, query, buf)) {
+ r600_resource_reference(&buf, NULL);
+ return NULL;
+ }
+
+ return buf;
+}
+
+static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer)
+{
+ /* Callers ensure that the buffer is currently unused by the GPU. */
+ uint32_t *results = rscreen->ws->buffer_map(buffer->buf, NULL,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+ if (!results)
+ return false;
+
+ memset(results, 0, buffer->b.b.width0);
+
+ if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ unsigned max_rbs = rscreen->info.num_render_backends;
+ unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
+ unsigned num_results;
+ unsigned i, j;
+
+ /* Set top bits for unused backends. */
+ num_results = buffer->b.b.width0 / query->result_size;
+ for (j = 0; j < num_results; j++) {
+ for (i = 0; i < max_rbs; i++) {
+ if (!(enabled_rb_mask & (1<buffer.buf = r600_new_query_buffer(rscreen, query);
+ if (!query->buffer.buf)
+ return false;
+
+ return true;
+}
+
+static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen,
+ unsigned query_type,
+ unsigned index)
+{
+ struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw);
+ if (!query)
+ return NULL;
+
+ query->b.type = query_type;
+ query->b.ops = &query_hw_ops;
+ query->ops = &query_hw_default_hw_ops;
+
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ query->result_size = 16 * rscreen->info.num_render_backends;
+ query->result_size += 16; /* for the fence + alignment */
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ query->result_size = 24;
+ query->num_cs_dw_begin = 8;
+ query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ query->result_size = 16;
+ query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
+ query->flags = R600_QUERY_HW_FLAG_NO_START;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32;
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6;
+ query->stream = index;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32 * R600_MAX_STREAMS;
+ query->num_cs_dw_begin = 6 * R600_MAX_STREAMS;
+ query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ /* 11 values on EG, 8 on R600. */
+ query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
+ query->result_size += 8; /* for the fence + alignment */
+ query->num_cs_dw_begin = 6;
+ query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
+ break;
+ default:
+ assert(0);
+ FREE(query);
+ return NULL;
+ }
+
+ if (!r600_query_hw_init(rscreen, query)) {
+ FREE(query);
+ return NULL;
+ }
+
+ return (struct pipe_query *)query;
+}
+
+static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
+ unsigned type, int diff)
+{
+ if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ bool old_enable = rctx->num_occlusion_queries != 0;
+ bool old_perfect_enable =
+ rctx->num_perfect_occlusion_queries != 0;
+ bool enable, perfect_enable;
+
+ rctx->num_occlusion_queries += diff;
+ assert(rctx->num_occlusion_queries >= 0);
+
+ if (type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ rctx->num_perfect_occlusion_queries += diff;
+ assert(rctx->num_perfect_occlusion_queries >= 0);
+ }
+
+ enable = rctx->num_occlusion_queries != 0;
+ perfect_enable = rctx->num_perfect_occlusion_queries != 0;
+
+ if (enable != old_enable || perfect_enable != old_perfect_enable) {
+ struct r600_context *ctx = (struct r600_context*)rctx;
+ r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
+ }
+ }
+}
+
+static unsigned event_type_for_stream(unsigned stream)
+{
+ switch (stream) {
+ default:
+ case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
+ case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
+ case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
+ case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
+ }
+}
+
+static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va,
+ unsigned stream)
+{
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+}
+
+static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ emit_sample_streamout(cs, va, query->stream);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
+ emit_sample_streamout(cs, va + 32 * stream, stream);
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ /* Write the timestamp after the last draw is done.
+ * (bottom-of-pipe)
+ */
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DATA_SEL_TIMESTAMP,
+ NULL, va, 0, query->b.type);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ break;
+ default:
+ assert(0);
+ }
+ r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
+}
+
+static void r600_query_hw_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
+{
+ uint64_t va;
+
+ if (!query->buffer.buf)
+ return; // previous buffer allocation failure
+
+ r600_update_occlusion_query_state(ctx, query->b.type, 1);
+ r600_update_prims_generated_query_state(ctx, query->b.type, 1);
+
+ ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
+ true);
+
+ /* Get a new query buffer if needed. */
+ if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
+ struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
+ *qbuf = query->buffer;
+ query->buffer.results_end = 0;
+ query->buffer.previous = qbuf;
+ query->buffer.buf = r600_new_query_buffer(ctx->screen, query);
+ if (!query->buffer.buf)
+ return;
+ }
+
+ /* emit begin query */
+ va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+ query->ops->emit_start(ctx, query, query->buffer.buf, va);
+
+ ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
+}
+
+static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query,
+ struct r600_resource *buffer,
+ uint64_t va)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+ uint64_t fence_va = 0;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ va += 8;
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ fence_va = va + ctx->screen->info.num_render_backends * 16 - 8;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ va += 16;
+ emit_sample_streamout(cs, va, query->stream);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ va += 16;
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
+ emit_sample_streamout(cs, va + 32 * stream, stream);
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ va += 8;
+ /* fall through */
+ case PIPE_QUERY_TIMESTAMP:
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
+ 0, query->b.type);
+ fence_va = va + 8;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS: {
+ unsigned sample_size = (query->result_size - 8) / 2;
+
+ va += sample_size;
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ fence_va = va + sample_size;
+ break;
+ }
+ default:
+ assert(0);
+ }
+ r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
+
+ if (fence_va)
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DATA_SEL_VALUE_32BIT,
+ query->buffer.buf, fence_va, 0x80000000,
+ query->b.type);
+}
+
+static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *query)
+{
+ uint64_t va;
+
+ if (!query->buffer.buf)
+ return; // previous buffer allocation failure
+
+ /* The queries which need begin already called this in begin_query. */
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+ ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false);
+ }
+
+ /* emit end query */
+ va = query->buffer.buf->gpu_address + query->buffer.results_end;
+
+ query->ops->emit_stop(ctx, query, query->buffer.buf, va);
+
+ query->buffer.results_end += query->result_size;
+
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
+ ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
+
+ r600_update_occlusion_query_state(ctx, query->b.type, -1);
+ r600_update_prims_generated_query_state(ctx, query->b.type, -1);
+}
+
+static void emit_set_predicate(struct r600_common_context *ctx,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t op)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, op | ((va >> 32) & 0xFF));
+ r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
+ RADEON_PRIO_QUERY);
+}
+
+static void r600_emit_query_predication(struct r600_common_context *ctx,
+ struct r600_atom *atom)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
+ struct r600_query_buffer *qbuf;
+ uint32_t op;
+ bool flag_wait, invert;
+
+ if (!query)
+ return;
+
+ invert = ctx->render_cond_invert;
+ flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+ ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
+
+ if (query->workaround_buf) {
+ op = PRED_OP(PREDICATION_OP_BOOL64);
+ } else {
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ op = PRED_OP(PREDICATION_OP_ZPASS);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
+ invert = !invert;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ }
+
+ /* if true then invert, see GL_ARB_conditional_render_inverted */
+ if (invert)
+ op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */
+ else
+ op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
+
+ /* Use the value written by compute shader as a workaround. Note that
+ * the wait flag does not apply in this predication mode.
+ *
+ * The shader outputs the result value to L2. Workarounds only affect VI
+ * and later, where the CP reads data from L2, so we don't need an
+ * additional flush.
+ */
+ if (query->workaround_buf) {
+ uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset;
+ emit_set_predicate(ctx, query->workaround_buf, va, op);
+ return;
+ }
+
+ op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
+
+ /* emit predicate packets for all data blocks */
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+ unsigned results_base = 0;
+ uint64_t va_base = qbuf->buf->gpu_address;
+
+ while (results_base < qbuf->results_end) {
+ uint64_t va = va_base + results_base;
+
+ if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
+ emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op);
+
+ /* set CONTINUE bit for all packets except the first */
+ op |= PREDICATION_CONTINUE;
+ }
+ } else {
+ emit_set_predicate(ctx, qbuf->buf, va, op);
+ op |= PREDICATION_CONTINUE;
+ }
+
+ results_base += query->result_size;
+ }
+ }
+}
+
+static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+{
+ struct r600_common_screen *rscreen =
+ (struct r600_common_screen *)ctx->screen;
+
+ if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT ||
+ query_type == PIPE_QUERY_GPU_FINISHED ||
+ query_type >= PIPE_QUERY_DRIVER_SPECIFIC)
+ return r600_query_sw_create(query_type);
+
+ return r600_query_hw_create(rscreen, query_type, index);
+}
+
+static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ rquery->ops->destroy(rctx->screen, rquery);
+}
+
+static boolean r600_begin_query(struct pipe_context *ctx,
+ struct pipe_query *query)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ return rquery->ops->begin(rctx, rquery);
+}
+
+void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
+ struct r600_query_hw *query)
+{
+ struct r600_query_buffer *prev = query->buffer.previous;
+
+ /* Discard the old query buffers. */
+ while (prev) {
+ struct r600_query_buffer *qbuf = prev;
+ prev = prev->previous;
+ r600_resource_reference(&qbuf->buf, NULL);
+ FREE(qbuf);
+ }
+
+ query->buffer.results_end = 0;
+ query->buffer.previous = NULL;
+
+ /* Obtain a new buffer if the current one can't be mapped without a stall. */
+ if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
+ r600_resource_reference(&query->buffer.buf, NULL);
+ query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
+ } else {
+ if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf))
+ r600_resource_reference(&query->buffer.buf, NULL);
+ }
+}
+
+bool r600_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
+ assert(0);
+ return false;
+ }
+
+ if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
+ r600_query_hw_reset_buffers(rctx, query);
+
+ r600_resource_reference(&query->workaround_buf, NULL);
+
+ r600_query_hw_emit_start(rctx, query);
+ if (!query->buffer.buf)
+ return false;
+
+ LIST_ADDTAIL(&query->list, &rctx->active_queries);
+ return true;
+}
+
+static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ return rquery->ops->end(rctx, rquery);
+}
+
+bool r600_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+
+ if (query->flags & R600_QUERY_HW_FLAG_NO_START)
+ r600_query_hw_reset_buffers(rctx, query);
+
+ r600_query_hw_emit_stop(rctx, query);
+
+ if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
+ LIST_DELINIT(&query->list);
+
+ if (!query->buffer.buf)
+ return false;
+
+ return true;
+}
+
+static void r600_get_hw_query_params(struct r600_common_context *rctx,
+ struct r600_query_hw *rquery, int index,
+ struct r600_hw_query_params *params)
+{
+ unsigned max_rbs = rctx->screen->info.num_render_backends;
+
+ params->pair_stride = 0;
+ params->pair_count = 1;
+
+ switch (rquery->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ params->start_offset = 0;
+ params->end_offset = 8;
+ params->fence_offset = max_rbs * 16;
+ params->pair_stride = 16;
+ params->pair_count = max_rbs;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ params->start_offset = 0;
+ params->end_offset = 8;
+ params->fence_offset = 16;
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ params->start_offset = 0;
+ params->end_offset = 0;
+ params->fence_offset = 8;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ params->start_offset = 8;
+ params->end_offset = 24;
+ params->fence_offset = params->end_offset + 4;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ params->start_offset = 0;
+ params->end_offset = 16;
+ params->fence_offset = params->end_offset + 4;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ params->start_offset = 8 - index * 8;
+ params->end_offset = 24 - index * 8;
+ params->fence_offset = params->end_offset + 4;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ params->pair_count = R600_MAX_STREAMS;
+ params->pair_stride = 32;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ params->start_offset = 0;
+ params->end_offset = 16;
+
+ /* We can re-use the high dword of the last 64-bit value as a
+ * fence: it is initialized as 0, and the high bit is set by
+ * the write of the streamout stats event.
+ */
+ params->fence_offset = rquery->result_size - 4;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ {
+ /* Offsets apply to EG+ */
+ static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80};
+ params->start_offset = offsets[index];
+ params->end_offset = 88 + offsets[index];
+ params->fence_offset = 2 * 88;
+ break;
+ }
+ default:
+ unreachable("r600_get_hw_query_params unsupported");
+ }
+}
+
+static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
+ bool test_status_bit)
+{
+ uint32_t *current_result = (uint32_t*)map;
+ uint64_t start, end;
+
+ start = (uint64_t)current_result[start_index] |
+ (uint64_t)current_result[start_index+1] << 32;
+ end = (uint64_t)current_result[end_index] |
+ (uint64_t)current_result[end_index+1] << 32;
+
+ if (!test_status_bit ||
+ ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
+ return end - start;
+ }
+ return 0;
+}
+
+static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query,
+ void *buffer,
+ union pipe_query_result *result)
+{
+ unsigned max_rbs = rscreen->info.num_render_backends;
+
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER: {
+ for (unsigned i = 0; i < max_rbs; ++i) {
+ unsigned results_base = i * 16;
+ result->u64 +=
+ r600_query_read_result(buffer + results_base, 0, 2, true);
+ }
+ break;
+ }
+ case PIPE_QUERY_OCCLUSION_PREDICATE: {
+ for (unsigned i = 0; i < max_rbs; ++i) {
+ unsigned results_base = i * 16;
+ result->b = result->b ||
+ r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
+ }
+ break;
+ }
+ case PIPE_QUERY_TIME_ELAPSED:
+ result->u64 += r600_query_read_result(buffer, 0, 2, false);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ result->u64 = *(uint64_t*)buffer;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ /* SAMPLE_STREAMOUTSTATS stores this structure:
+ * {
+ * u64 NumPrimitivesWritten;
+ * u64 PrimitiveStorageNeeded;
+ * }
+ * We only need NumPrimitivesWritten here. */
+ result->u64 += r600_query_read_result(buffer, 2, 6, true);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ /* Here we read PrimitiveStorageNeeded. */
+ result->u64 += r600_query_read_result(buffer, 0, 4, true);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ result->so_statistics.num_primitives_written +=
+ r600_query_read_result(buffer, 2, 6, true);
+ result->so_statistics.primitives_storage_needed +=
+ r600_query_read_result(buffer, 0, 4, true);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ result->b = result->b ||
+ r600_query_read_result(buffer, 2, 6, true) !=
+ r600_query_read_result(buffer, 0, 4, true);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
+ result->b = result->b ||
+ r600_query_read_result(buffer, 2, 6, true) !=
+ r600_query_read_result(buffer, 0, 4, true);
+ buffer = (char *)buffer + 32;
+ }
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ if (rscreen->chip_class >= EVERGREEN) {
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 22, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 24, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 26, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 28, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 30, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 32, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 34, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 36, false);
+ result->pipeline_statistics.hs_invocations +=
+ r600_query_read_result(buffer, 16, 38, false);
+ result->pipeline_statistics.ds_invocations +=
+ r600_query_read_result(buffer, 18, 40, false);
+ result->pipeline_statistics.cs_invocations +=
+ r600_query_read_result(buffer, 20, 42, false);
+ } else {
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 16, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 18, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 20, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 22, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 24, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 26, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 28, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 30, false);
+ }
+#if 0 /* for testing */
+ printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
+ "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
+ "Clipper prims=%llu, PS=%llu, CS=%llu\n",
+ result->pipeline_statistics.ia_vertices,
+ result->pipeline_statistics.ia_primitives,
+ result->pipeline_statistics.vs_invocations,
+ result->pipeline_statistics.hs_invocations,
+ result->pipeline_statistics.ds_invocations,
+ result->pipeline_statistics.gs_invocations,
+ result->pipeline_statistics.gs_primitives,
+ result->pipeline_statistics.c_invocations,
+ result->pipeline_statistics.c_primitives,
+ result->pipeline_statistics.ps_invocations,
+ result->pipeline_statistics.cs_invocations);
+#endif
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static boolean r600_get_query_result(struct pipe_context *ctx,
+ struct pipe_query *query, boolean wait,
+ union pipe_query_result *result)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ return rquery->ops->get_result(rctx, rquery, wait, result);
+}
+
+static void r600_get_query_result_resource(struct pipe_context *ctx,
+ struct pipe_query *query,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index,
+ resource, offset);
+}
+
+static void r600_query_hw_clear_result(struct r600_query_hw *query,
+ union pipe_query_result *result)
+{
+ util_query_clear_result(result, query->b.type);
+}
+
+bool r600_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait, union pipe_query_result *result)
+{
+ struct r600_common_screen *rscreen = rctx->screen;
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+ struct r600_query_buffer *qbuf;
+
+ query->ops->clear_result(query, result);
+
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+ unsigned usage = PIPE_TRANSFER_READ |
+ (wait ? 0 : PIPE_TRANSFER_DONTBLOCK);
+ unsigned results_base = 0;
+ void *map;
+
+ if (rquery->b.flushed)
+ map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
+ else
+ map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
+
+ if (!map)
+ return false;
+
+ while (results_base != qbuf->results_end) {
+ query->ops->add_result(rscreen, query, map + results_base,
+ result);
+ results_base += query->result_size;
+ }
+ }
+
+ /* Convert the time to expected units. */
+ if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
+ rquery->type == PIPE_QUERY_TIMESTAMP) {
+ result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq;
+ }
+ return true;
+}
+
+/* Create the compute shader that is used to collect the results.
+ *
+ * One compute grid with a single thread is launched for every query result
+ * buffer. The thread (optionally) reads a previous summary buffer, then
+ * accumulates data from the query result buffer, and writes the result either
+ * to a summary buffer to be consumed by the next grid invocation or to the
+ * user-supplied buffer.
+ *
+ * Data layout:
+ *
+ * CONST
+ * 0.x = end_offset
+ * 0.y = result_stride
+ * 0.z = result_count
+ * 0.w = bit field:
+ * 1: read previously accumulated values
+ * 2: write accumulated values for chaining
+ * 4: write result available
+ * 8: convert result to boolean (0/1)
+ * 16: only read one dword and use that as result
+ * 32: apply timestamp conversion
+ * 64: store full 64 bits result
+ * 128: store signed 32 bits result
+ * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs
+ * 1.x = fence_offset
+ * 1.y = pair_stride
+ * 1.z = pair_count
+ *
+ * BUFFER[0] = query result buffer
+ * BUFFER[1] = previous summary buffer
+ * BUFFER[2] = next summary buffer or user-supplied buffer
+ */
+static void r600_create_query_result_shader(struct r600_common_context *rctx)
+{
+ /* TEMP[0].xy = accumulated result so far
+ * TEMP[0].z = result not available
+ *
+ * TEMP[1].x = current result index
+ * TEMP[1].y = current pair index
+ */
+ static const char text_tmpl[] =
+ "COMP\n"
+ "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n"
+ "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
+ "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+ "DCL BUFFER[0]\n"
+ "DCL BUFFER[1]\n"
+ "DCL BUFFER[2]\n"
+ "DCL CONST[0][0..1]\n"
+ "DCL TEMP[0..5]\n"
+ "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
+ "IMM[1] UINT32 {1, 2, 4, 8}\n"
+ "IMM[2] UINT32 {16, 32, 64, 128}\n"
+ "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
+ "IMM[4] UINT32 {256, 0, 0, 0}\n"
+
+ "AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n"
+ "UIF TEMP[5]\n"
+ /* Check result availability. */
+ "LOAD TEMP[1].x, BUFFER[0], CONST[0][1].xxxx\n"
+ "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
+ "MOV TEMP[1], TEMP[0].zzzz\n"
+ "NOT TEMP[0].z, TEMP[0].zzzz\n"
+
+ /* Load result if available. */
+ "UIF TEMP[1]\n"
+ "LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n"
+ "ENDIF\n"
+ "ELSE\n"
+ /* Load previously accumulated result if requested. */
+ "MOV TEMP[0], IMM[0].xxxx\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n"
+ "UIF TEMP[4]\n"
+ "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n"
+ "ENDIF\n"
+
+ "MOV TEMP[1].x, IMM[0].xxxx\n"
+ "BGNLOOP\n"
+ /* Break if accumulated result so far is not available. */
+ "UIF TEMP[0].zzzz\n"
+ "BRK\n"
+ "ENDIF\n"
+
+ /* Break if result_index >= result_count. */
+ "USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n"
+ "UIF TEMP[5]\n"
+ "BRK\n"
+ "ENDIF\n"
+
+ /* Load fence and check result availability */
+ "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n"
+ "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
+ "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
+ "NOT TEMP[0].z, TEMP[0].zzzz\n"
+ "UIF TEMP[0].zzzz\n"
+ "BRK\n"
+ "ENDIF\n"
+
+ "MOV TEMP[1].y, IMM[0].xxxx\n"
+ "BGNLOOP\n"
+ /* Load start and end. */
+ "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n"
+ "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n"
+ "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
+
+ "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n"
+ "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
+
+ "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n"
+
+ "AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n"
+ "UIF TEMP[5].zzzz\n"
+ /* Load second start/end half-pair and
+ * take the difference
+ */
+ "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n"
+ "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
+ "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
+
+ "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
+ "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n"
+ "ENDIF\n"
+
+ "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n"
+
+ /* Increment pair index */
+ "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n"
+ "USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n"
+ "UIF TEMP[5]\n"
+ "BRK\n"
+ "ENDIF\n"
+ "ENDLOOP\n"
+
+ /* Increment result index */
+ "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n"
+ "ENDLOOP\n"
+ "ENDIF\n"
+
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n"
+ "UIF TEMP[4]\n"
+ /* Store accumulated data for chaining. */
+ "STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n"
+ "ELSE\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n"
+ "UIF TEMP[4]\n"
+ /* Store result availability. */
+ "NOT TEMP[0].z, TEMP[0]\n"
+ "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
+ "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n"
+
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
+ "UIF TEMP[4]\n"
+ "STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n"
+ "ENDIF\n"
+ "ELSE\n"
+ /* Store result if it is available. */
+ "NOT TEMP[4], TEMP[0].zzzz\n"
+ "UIF TEMP[4]\n"
+ /* Apply timestamp conversion */
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n"
+ "UIF TEMP[4]\n"
+ "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n"
+ "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n"
+ "ENDIF\n"
+
+ /* Convert to boolean */
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n"
+ "UIF TEMP[4]\n"
+ "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
+ "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
+ "MOV TEMP[0].y, IMM[0].xxxx\n"
+ "ENDIF\n"
+
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
+ "UIF TEMP[4]\n"
+ "STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n"
+ "ELSE\n"
+ /* Clamping */
+ "UIF TEMP[0].yyyy\n"
+ "MOV TEMP[0].x, IMM[0].wwww\n"
+ "ENDIF\n"
+
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n"
+ "UIF TEMP[4]\n"
+ "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
+ "ENDIF\n"
+
+ "STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n"
+ "ENDIF\n"
+ "ENDIF\n"
+ "ENDIF\n"
+ "ENDIF\n"
+
+ "END\n";
+
+ char text[sizeof(text_tmpl) + 32];
+ struct tgsi_token tokens[1024];
+ struct pipe_compute_state state = {};
+
+ /* Hard code the frequency into the shader so that the backend can
+ * use the full range of optimizations for divide-by-constant.
+ */
+ snprintf(text, sizeof(text), text_tmpl,
+ rctx->screen->info.clock_crystal_freq);
+
+ if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
+ assert(false);
+ return;
+ }
+
+ state.ir_type = PIPE_SHADER_IR_TGSI;
+ state.prog = tokens;
+
+ rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state);
+}
+
+static void r600_restore_qbo_state(struct r600_common_context *rctx,
+ struct r600_qbo_state *st)
+{
+ rctx->b.bind_compute_state(&rctx->b, st->saved_compute);
+
+ rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
+ pipe_resource_reference(&st->saved_const0.buffer, NULL);
+
+ rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
+ for (unsigned i = 0; i < 3; ++i)
+ pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
+}
+
+static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+ struct r600_query_buffer *qbuf;
+ struct r600_query_buffer *qbuf_prev;
+ struct pipe_resource *tmp_buffer = NULL;
+ unsigned tmp_buffer_offset = 0;
+ struct r600_qbo_state saved_state = {};
+ struct pipe_grid_info grid = {};
+ struct pipe_constant_buffer constant_buffer = {};
+ struct pipe_shader_buffer ssbo[3];
+ struct r600_hw_query_params params;
+ struct {
+ uint32_t end_offset;
+ uint32_t result_stride;
+ uint32_t result_count;
+ uint32_t config;
+ uint32_t fence_offset;
+ uint32_t pair_stride;
+ uint32_t pair_count;
+ } consts;
+
+ if (!rctx->query_result_shader) {
+ r600_create_query_result_shader(rctx);
+ if (!rctx->query_result_shader)
+ return;
+ }
+
+ if (query->buffer.previous) {
+ u_suballocator_alloc(rctx->allocator_zeroed_memory, 16, 16,
+ &tmp_buffer_offset, &tmp_buffer);
+ if (!tmp_buffer)
+ return;
+ }
+
+ rctx->save_qbo_state(&rctx->b, &saved_state);
+
+ r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, ¶ms);
+ consts.end_offset = params.end_offset - params.start_offset;
+ consts.fence_offset = params.fence_offset - params.start_offset;
+ consts.result_stride = query->result_size;
+ consts.pair_stride = params.pair_stride;
+ consts.pair_count = params.pair_count;
+
+ constant_buffer.buffer_size = sizeof(consts);
+ constant_buffer.user_buffer = &consts;
+
+ ssbo[1].buffer = tmp_buffer;
+ ssbo[1].buffer_offset = tmp_buffer_offset;
+ ssbo[1].buffer_size = 16;
+
+ ssbo[2] = ssbo[1];
+
+ rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader);
+
+ grid.block[0] = 1;
+ grid.block[1] = 1;
+ grid.block[2] = 1;
+ grid.grid[0] = 1;
+ grid.grid[1] = 1;
+ grid.grid[2] = 1;
+
+ consts.config = 0;
+ if (index < 0)
+ consts.config |= 4;
+ if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE)
+ consts.config |= 8;
+ else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+ query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+ consts.config |= 8 | 256;
+ else if (query->b.type == PIPE_QUERY_TIMESTAMP ||
+ query->b.type == PIPE_QUERY_TIME_ELAPSED)
+ consts.config |= 32;
+
+ switch (result_type) {
+ case PIPE_QUERY_TYPE_U64:
+ case PIPE_QUERY_TYPE_I64:
+ consts.config |= 64;
+ break;
+ case PIPE_QUERY_TYPE_I32:
+ consts.config |= 128;
+ break;
+ case PIPE_QUERY_TYPE_U32:
+ break;
+ }
+
+ rctx->flags |= rctx->screen->barrier_flags.cp_to_L2;
+
+ for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
+ if (query->b.type != PIPE_QUERY_TIMESTAMP) {
+ qbuf_prev = qbuf->previous;
+ consts.result_count = qbuf->results_end / query->result_size;
+ consts.config &= ~3;
+ if (qbuf != &query->buffer)
+ consts.config |= 1;
+ if (qbuf->previous)
+ consts.config |= 2;
+ } else {
+ /* Only read the last timestamp. */
+ qbuf_prev = NULL;
+ consts.result_count = 0;
+ consts.config |= 16;
+ params.start_offset += qbuf->results_end - query->result_size;
+ }
+
+ rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
+
+ ssbo[0].buffer = &qbuf->buf->b.b;
+ ssbo[0].buffer_offset = params.start_offset;
+ ssbo[0].buffer_size = qbuf->results_end - params.start_offset;
+
+ if (!qbuf->previous) {
+ ssbo[2].buffer = resource;
+ ssbo[2].buffer_offset = offset;
+ ssbo[2].buffer_size = 8;
+
+ }
+
+ rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
+
+ if (wait && qbuf == &query->buffer) {
+ uint64_t va;
+
+ /* Wait for result availability. Wait only for readiness
+ * of the last entry, since the fence writes should be
+ * serialized in the CP.
+ */
+ va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
+ va += params.fence_offset;
+
+ r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
+ }
+
+ rctx->b.launch_grid(&rctx->b, &grid);
+ rctx->flags |= rctx->screen->barrier_flags.compute_to_L2;
+ }
+
+ r600_restore_qbo_state(rctx, &saved_state);
+ pipe_resource_reference(&tmp_buffer, NULL);
+}
+
+static void r600_render_condition(struct pipe_context *ctx,
+ struct pipe_query *query,
+ boolean condition,
+ enum pipe_render_cond_flag mode)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_query_hw *rquery = (struct r600_query_hw *)query;
+ struct r600_query_buffer *qbuf;
+ struct r600_atom *atom = &rctx->render_cond_atom;
+
+ /* Compute the size of SET_PREDICATION packets. */
+ atom->num_dw = 0;
+ if (query) {
+ for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+ atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+
+ if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+ atom->num_dw *= R600_MAX_STREAMS;
+ }
+
+ rctx->render_cond = query;
+ rctx->render_cond_invert = condition;
+ rctx->render_cond_mode = mode;
+
+ rctx->set_atom_dirty(rctx, atom, query != NULL);
+}
+
+void r600_suspend_queries(struct r600_common_context *ctx)
+{
+ struct r600_query_hw *query;
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
+ r600_query_hw_emit_stop(ctx, query);
+ }
+ assert(ctx->num_cs_dw_queries_suspend == 0);
+}
+
+static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
+ struct list_head *query_list)
+{
+ struct r600_query_hw *query;
+ unsigned num_dw = 0;
+
+ LIST_FOR_EACH_ENTRY(query, query_list, list) {
+ /* begin + end */
+ num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
+
+ /* Workaround for the fact that
+ * num_cs_dw_nontimer_queries_suspend is incremented for every
+ * resumed query, which raises the bar in need_cs_space for
+ * queries about to be resumed.
+ */
+ num_dw += query->num_cs_dw_end;
+ }
+ /* primitives generated query */
+ num_dw += ctx->streamout.enable_atom.num_dw;
+ /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
+ num_dw += 13;
+
+ return num_dw;
+}
+
+void r600_resume_queries(struct r600_common_context *ctx)
+{
+ struct r600_query_hw *query;
+ unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
+
+ assert(ctx->num_cs_dw_queries_suspend == 0);
+
+ /* Check CS space here. Resuming must not be interrupted by flushes. */
+ ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true);
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) {
+ r600_query_hw_emit_start(ctx, query);
+ }
+}
+
+/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
+{
+ struct r600_common_context *ctx =
+ (struct r600_common_context*)rscreen->aux_context;
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+ struct r600_resource *buffer;
+ uint32_t *results;
+ unsigned i, mask = 0;
+ unsigned max_rbs = ctx->screen->info.num_render_backends;
+
+ assert(rscreen->chip_class <= CAYMAN);
+
+ /* if backend_map query is supported by the kernel */
+ if (rscreen->info.r600_gb_backend_map_valid) {
+ unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
+ unsigned backend_map = rscreen->info.r600_gb_backend_map;
+ unsigned item_width, item_mask;
+
+ if (ctx->chip_class >= EVERGREEN) {
+ item_width = 4;
+ item_mask = 0x7;
+ } else {
+ item_width = 2;
+ item_mask = 0x3;
+ }
+
+ while (num_tile_pipes--) {
+ i = backend_map & item_mask;
+ mask |= (1<>= item_width;
+ }
+ if (mask != 0) {
+ rscreen->info.enabled_rb_mask = mask;
+ return;
+ }
+ }
+
+ /* otherwise backup path for older kernels */
+
+ /* create buffer for event data */
+ buffer = (struct r600_resource*)
+ pipe_buffer_create(ctx->b.screen, 0,
+ PIPE_USAGE_STAGING, max_rbs * 16);
+ if (!buffer)
+ return;
+
+ /* initialize buffer with zeroes */
+ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
+ if (results) {
+ memset(results, 0, max_rbs * 4 * 4);
+
+ /* emit EVENT_WRITE for ZPASS_DONE */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, buffer->gpu_address);
+ radeon_emit(cs, buffer->gpu_address >> 32);
+
+ r600_emit_reloc(ctx, &ctx->gfx, buffer,
+ RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
+
+ /* analyze results */
+ results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
+ if (results) {
+ for(i = 0; i < max_rbs; i++) {
+ /* at least highest bit will be set if backend is used */
+ if (results[i*4 + 1])
+ mask |= (1<info.enabled_rb_mask = mask;
+}
+
+#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
+ { \
+ .name = name_, \
+ .query_type = R600_QUERY_##query_type_, \
+ .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+ .group_id = group_id_ \
+ }
+
+#define X(name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
+
+#define XG(group_, name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
+
+static struct pipe_driver_query_info r600_driver_query_list[] = {
+ X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
+ X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
+ X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
+ X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
+ X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE),
+ X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE),
+ X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE),
+ X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
+ X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
+ X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
+ X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
+ X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
+ X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
+ X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
+ X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
+ X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE),
+ X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE),
+ X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
+ X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
+ X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE),
+ X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE),
+ X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE),
+ X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE),
+ X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE),
+ X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE),
+ X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
+ X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
+ X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
+ X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
+ X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
+ X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE),
+ X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
+ X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
+ X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE),
+ X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
+ X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
+ X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
+ X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+ X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
+ X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
+
+ /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
+ * which use it as a fallback path to detect the GPU type.
+ *
+ * Note: The names of these queries are significant for GPUPerfStudio
+ * (and possibly their order as well). */
+ XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
+ XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
+ XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
+ XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
+ XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
+
+ X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
+ X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
+ X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE),
+
+ /* The following queries must be at the end of the list because their
+ * availability is adjusted dynamically based on the DRM version. */
+ X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
+ X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE),
+ X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE),
+ X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE),
+ X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE),
+ X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE),
+ X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE),
+ X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE),
+ X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE),
+ X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE),
+ X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE),
+ X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE),
+ X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE),
+ X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE),
+ X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE),
+ X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE),
+ X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
+ X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
+ X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
+ X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
+ X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
+};
+
+#undef X
+#undef XG
+#undef XFULL
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+ return ARRAY_SIZE(r600_driver_query_list);
+ else
+ return ARRAY_SIZE(r600_driver_query_list) - 25;
+}
+
+static int r600_get_driver_query_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ unsigned num_queries = r600_get_num_queries(rscreen);
+
+ if (!info) {
+ unsigned num_perfcounters =
+ r600_get_perfcounter_info(rscreen, 0, NULL);
+
+ return num_queries + num_perfcounters;
+ }
+
+ if (index >= num_queries)
+ return r600_get_perfcounter_info(rscreen, index - num_queries, info);
+
+ *info = r600_driver_query_list[index];
+
+ switch (info->query_type) {
+ case R600_QUERY_REQUESTED_VRAM:
+ case R600_QUERY_VRAM_USAGE:
+ case R600_QUERY_MAPPED_VRAM:
+ info->max_value.u64 = rscreen->info.vram_size;
+ break;
+ case R600_QUERY_REQUESTED_GTT:
+ case R600_QUERY_GTT_USAGE:
+ case R600_QUERY_MAPPED_GTT:
+ info->max_value.u64 = rscreen->info.gart_size;
+ break;
+ case R600_QUERY_GPU_TEMPERATURE:
+ info->max_value.u64 = 125;
+ break;
+ case R600_QUERY_VRAM_VIS_USAGE:
+ info->max_value.u64 = rscreen->info.vram_vis_size;
+ break;
+ }
+
+ if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
+ info->group_id += rscreen->perfcounters->num_groups;
+
+ return 1;
+}
+
+/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
+ * performance counter groups, so be careful when changing this and related
+ * functions.
+ */
+static int r600_get_driver_query_group_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ unsigned num_pc_groups = 0;
+
+ if (rscreen->perfcounters)
+ num_pc_groups = rscreen->perfcounters->num_groups;
+
+ if (!info)
+ return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
+
+ if (index < num_pc_groups)
+ return r600_get_perfcounter_group_info(rscreen, index, info);
+
+ index -= num_pc_groups;
+ if (index >= R600_NUM_SW_QUERY_GROUPS)
+ return 0;
+
+ info->name = "GPIN";
+ info->max_active_queries = 5;
+ info->num_queries = 5;
+ return 1;
+}
+
+void r600_query_init(struct r600_common_context *rctx)
+{
+ rctx->b.create_query = r600_create_query;
+ rctx->b.create_batch_query = r600_create_batch_query;
+ rctx->b.destroy_query = r600_destroy_query;
+ rctx->b.begin_query = r600_begin_query;
+ rctx->b.end_query = r600_end_query;
+ rctx->b.get_query_result = r600_get_query_result;
+ rctx->b.get_query_result_resource = r600_get_query_result_resource;
+ rctx->render_cond_atom.emit = r600_emit_query_predication;
+
+ if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
+ rctx->b.render_condition = r600_render_condition;
+
+ LIST_INITHEAD(&rctx->active_queries);
+}
+
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
+{
+ rscreen->b.get_driver_query_info = r600_get_driver_query_info;
+ rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_query.h mesa-17.3.3/src/gallium/drivers/r600/r600_query.h
--- mesa-17.2.4/src/gallium/drivers/r600/r600_query.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_query.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Nicolai Hähnle
+ *
+ */
+
+#ifndef R600_QUERY_H
+#define R600_QUERY_H
+
+#include "util/u_threaded_context.h"
+
+struct pipe_context;
+struct pipe_query;
+struct pipe_resource;
+
+struct r600_common_context;
+struct r600_common_screen;
+struct r600_query;
+struct r600_query_hw;
+struct r600_resource;
+
+enum {
+ R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
+ R600_QUERY_DECOMPRESS_CALLS,
+ R600_QUERY_MRT_DRAW_CALLS,
+ R600_QUERY_PRIM_RESTART_CALLS,
+ R600_QUERY_SPILL_DRAW_CALLS,
+ R600_QUERY_COMPUTE_CALLS,
+ R600_QUERY_SPILL_COMPUTE_CALLS,
+ R600_QUERY_DMA_CALLS,
+ R600_QUERY_CP_DMA_CALLS,
+ R600_QUERY_NUM_VS_FLUSHES,
+ R600_QUERY_NUM_PS_FLUSHES,
+ R600_QUERY_NUM_CS_FLUSHES,
+ R600_QUERY_NUM_CB_CACHE_FLUSHES,
+ R600_QUERY_NUM_DB_CACHE_FLUSHES,
+ R600_QUERY_NUM_L2_INVALIDATES,
+ R600_QUERY_NUM_L2_WRITEBACKS,
+ R600_QUERY_NUM_RESIDENT_HANDLES,
+ R600_QUERY_TC_OFFLOADED_SLOTS,
+ R600_QUERY_TC_DIRECT_SLOTS,
+ R600_QUERY_TC_NUM_SYNCS,
+ R600_QUERY_CS_THREAD_BUSY,
+ R600_QUERY_GALLIUM_THREAD_BUSY,
+ R600_QUERY_REQUESTED_VRAM,
+ R600_QUERY_REQUESTED_GTT,
+ R600_QUERY_MAPPED_VRAM,
+ R600_QUERY_MAPPED_GTT,
+ R600_QUERY_BUFFER_WAIT_TIME,
+ R600_QUERY_NUM_MAPPED_BUFFERS,
+ R600_QUERY_NUM_GFX_IBS,
+ R600_QUERY_NUM_SDMA_IBS,
+ R600_QUERY_GFX_BO_LIST_SIZE,
+ R600_QUERY_NUM_BYTES_MOVED,
+ R600_QUERY_NUM_EVICTIONS,
+ R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
+ R600_QUERY_VRAM_USAGE,
+ R600_QUERY_VRAM_VIS_USAGE,
+ R600_QUERY_GTT_USAGE,
+ R600_QUERY_GPU_TEMPERATURE,
+ R600_QUERY_CURRENT_GPU_SCLK,
+ R600_QUERY_CURRENT_GPU_MCLK,
+ R600_QUERY_GPU_LOAD,
+ R600_QUERY_GPU_SHADERS_BUSY,
+ R600_QUERY_GPU_TA_BUSY,
+ R600_QUERY_GPU_GDS_BUSY,
+ R600_QUERY_GPU_VGT_BUSY,
+ R600_QUERY_GPU_IA_BUSY,
+ R600_QUERY_GPU_SX_BUSY,
+ R600_QUERY_GPU_WD_BUSY,
+ R600_QUERY_GPU_BCI_BUSY,
+ R600_QUERY_GPU_SC_BUSY,
+ R600_QUERY_GPU_PA_BUSY,
+ R600_QUERY_GPU_DB_BUSY,
+ R600_QUERY_GPU_CP_BUSY,
+ R600_QUERY_GPU_CB_BUSY,
+ R600_QUERY_GPU_SDMA_BUSY,
+ R600_QUERY_GPU_PFP_BUSY,
+ R600_QUERY_GPU_MEQ_BUSY,
+ R600_QUERY_GPU_ME_BUSY,
+ R600_QUERY_GPU_SURF_SYNC_BUSY,
+ R600_QUERY_GPU_CP_DMA_BUSY,
+ R600_QUERY_GPU_SCRATCH_RAM_BUSY,
+ R600_QUERY_NUM_COMPILATIONS,
+ R600_QUERY_NUM_SHADERS_CREATED,
+ R600_QUERY_NUM_SHADER_CACHE_HITS,
+ R600_QUERY_GPIN_ASIC_ID,
+ R600_QUERY_GPIN_NUM_SIMD,
+ R600_QUERY_GPIN_NUM_RB,
+ R600_QUERY_GPIN_NUM_SPI,
+ R600_QUERY_GPIN_NUM_SE,
+
+ R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
+};
+
+enum {
+ R600_QUERY_GROUP_GPIN = 0,
+ R600_NUM_SW_QUERY_GROUPS
+};
+
+struct r600_query_ops {
+ void (*destroy)(struct r600_common_screen *, struct r600_query *);
+ bool (*begin)(struct r600_common_context *, struct r600_query *);
+ bool (*end)(struct r600_common_context *, struct r600_query *);
+ bool (*get_result)(struct r600_common_context *,
+ struct r600_query *, bool wait,
+ union pipe_query_result *result);
+ void (*get_result_resource)(struct r600_common_context *,
+ struct r600_query *, bool wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
+};
+
+struct r600_query {
+ struct threaded_query b;
+ struct r600_query_ops *ops;
+
+ /* The type of query */
+ unsigned type;
+};
+
+enum {
+ R600_QUERY_HW_FLAG_NO_START = (1 << 0),
+ /* gap */
+ /* whether begin_query doesn't clear the result */
+ R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
+};
+
+struct r600_query_hw_ops {
+ bool (*prepare_buffer)(struct r600_common_screen *,
+ struct r600_query_hw *,
+ struct r600_resource *);
+ void (*emit_start)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_stop)(struct r600_common_context *,
+ struct r600_query_hw *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
+ void (*add_result)(struct r600_common_screen *screen,
+ struct r600_query_hw *, void *buffer,
+ union pipe_query_result *result);
+};
+
+struct r600_query_buffer {
+ /* The buffer where query results are stored. */
+ struct r600_resource *buf;
+ /* Offset of the next free result after current query data */
+ unsigned results_end;
+ /* If a query buffer is full, a new buffer is created and the old one
+ * is put in here. When we calculate the result, we sum up the samples
+ * from all buffers. */
+ struct r600_query_buffer *previous;
+};
+
+struct r600_query_hw {
+ struct r600_query b;
+ struct r600_query_hw_ops *ops;
+ unsigned flags;
+
+ /* The query buffer and how many results are in it. */
+ struct r600_query_buffer buffer;
+ /* Size of the result in memory for both begin_query and end_query,
+ * this can be one or two numbers, or it could even be a size of a structure. */
+ unsigned result_size;
+ /* The number of dwords for begin_query or end_query. */
+ unsigned num_cs_dw_begin;
+ unsigned num_cs_dw_end;
+ /* Linked list of queries */
+ struct list_head list;
+ /* For transform feedback: which stream the query is for */
+ unsigned stream;
+
+ /* Workaround via compute shader */
+ struct r600_resource *workaround_buf;
+ unsigned workaround_offset;
+};
+
+bool r600_query_hw_init(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query);
+void r600_query_hw_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery);
+bool r600_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+bool r600_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+bool r600_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait,
+ union pipe_query_result *result);
+
+/* Performance counters */
+enum {
+ /* This block is part of the shader engine */
+ R600_PC_BLOCK_SE = (1 << 0),
+
+ /* Expose per-instance groups instead of summing all instances (within
+ * an SE). */
+ R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
+
+ /* Expose per-SE groups instead of summing instances across SEs. */
+ R600_PC_BLOCK_SE_GROUPS = (1 << 2),
+
+ /* Shader block */
+ R600_PC_BLOCK_SHADER = (1 << 3),
+
+ /* Non-shader block with perfcounters windowed by shaders. */
+ R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
+};
+
+/* Describes a hardware block with performance counters. Multiple instances of
+ * each block, possibly per-SE, may exist on the chip. Depending on the block
+ * and on the user's configuration, we either
+ * (a) expose every instance as a performance counter group,
+ * (b) expose a single performance counter group that reports the sum over all
+ * instances, or
+ * (c) expose one performance counter group per instance, but summed over all
+ * shader engines.
+ */
+struct r600_perfcounter_block {
+ const char *basename;
+ unsigned flags;
+ unsigned num_counters;
+ unsigned num_selectors;
+ unsigned num_instances;
+
+ unsigned num_groups;
+ char *group_names;
+ unsigned group_name_stride;
+
+ char *selector_names;
+ unsigned selector_name_stride;
+
+ void *data;
+};
+
+struct r600_perfcounters {
+ unsigned num_groups;
+ unsigned num_blocks;
+ struct r600_perfcounter_block *blocks;
+
+ unsigned num_start_cs_dwords;
+ unsigned num_stop_cs_dwords;
+ unsigned num_instance_cs_dwords;
+ unsigned num_shaders_cs_dwords;
+
+ unsigned num_shader_types;
+ const char * const *shader_type_suffixes;
+ const unsigned *shader_type_bits;
+
+ void (*get_size)(struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors,
+ unsigned *num_select_dw, unsigned *num_read_dw);
+
+ void (*emit_instance)(struct r600_common_context *,
+ int se, int instance);
+ void (*emit_shaders)(struct r600_common_context *, unsigned shaders);
+ void (*emit_select)(struct r600_common_context *,
+ struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors);
+ void (*emit_start)(struct r600_common_context *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_stop)(struct r600_common_context *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_read)(struct r600_common_context *,
+ struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors,
+ struct r600_resource *buffer, uint64_t va);
+
+ void (*cleanup)(struct r600_common_screen *);
+
+ bool separate_se;
+ bool separate_instance;
+};
+
+struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
+ unsigned num_queries,
+ unsigned *query_types);
+
+int r600_get_perfcounter_info(struct r600_common_screen *,
+ unsigned index,
+ struct pipe_driver_query_info *info);
+int r600_get_perfcounter_group_info(struct r600_common_screen *,
+ unsigned index,
+ struct pipe_driver_query_group_info *info);
+
+bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
+void r600_perfcounters_add_block(struct r600_common_screen *,
+ struct r600_perfcounters *,
+ const char *name, unsigned flags,
+ unsigned counters, unsigned selectors,
+ unsigned instances, void *data);
+void r600_perfcounters_do_destroy(struct r600_perfcounters *);
+void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
+ struct r600_query_hw *query);
+
+struct r600_qbo_state {
+ void *saved_compute;
+ struct pipe_constant_buffer saved_const0;
+ struct pipe_shader_buffer saved_ssbo[3];
+};
+
+#endif /* R600_QUERY_H */
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_shader.c mesa-17.3.3/src/gallium/drivers/r600/r600_shader.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_shader.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_shader.c 2018-01-18 21:30:28.000000000 +0000
@@ -2870,6 +2870,13 @@
int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx;
int out_comp = i >= outer_comps ? i - outer_comps : i;
+ if (ctx->shader->tcs_prim_mode == PIPE_PRIM_LINES) {
+ if (out_comp == 1)
+ out_comp = 0;
+ else if (out_comp == 0)
+ out_comp = 1;
+ }
+
r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
treg[i / 2], (2 * (i % 2)),
temp_reg, 0,
@@ -3031,9 +3038,11 @@
ctx.file_offset[i] = 0;
}
- if (ctx.type == PIPE_SHADER_VERTEX) {
+ if (ctx.type == PIPE_SHADER_VERTEX) {
+
ctx.file_offset[TGSI_FILE_INPUT] = 1;
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
+ if (ctx.info.num_inputs)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
}
if (ctx.type == PIPE_SHADER_FRAGMENT) {
if (ctx.bc->chip_class >= EVERGREEN)
@@ -3651,7 +3660,7 @@
last = r600_isa_cf(ctx.bc->cf_last->op);
/* alu clause instructions don't have EOP bit, so add NOP */
- if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS)
+ if (!last || last->flags & CF_ALU)
r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
ctx.bc->cf_last->end_of_program = 1;
@@ -4045,7 +4054,6 @@
struct r600_bytecode_alu alu;
unsigned write_mask = inst->Dst[0].Register.WriteMask;
int i, j, r;
- int firsti = write_mask == 0xc ? 2 : 0;
for (i = 0; i <= 3; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@@ -4066,15 +4074,18 @@
return r;
}
- /* MOV first two channels to writemask dst0 */
- for (i = 0; i <= 1; i++) {
+ /* Replicate significand result across channels. */
+ for (i = 0; i <= 3; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
- alu.src[0].chan = i + 2;
+ alu.src[0].chan = (i & 1) + 2;
alu.src[0].sel = ctx->temp_reg;
- tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
@@ -4602,127 +4613,6 @@
return 0;
}
-static int tgsi_scs(struct r600_shader_ctx *ctx)
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bytecode_alu alu;
- int i, r;
-
- /* We'll only need the trig stuff if we are going to write to the
- * X or Y components of the destination vector.
- */
- if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
- r = tgsi_setup_trig(ctx);
- if (r)
- return r;
- }
-
- /* dst.x = COS */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- if (ctx->bc->chip_class == CAYMAN) {
- for (i = 0 ; i < 3; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_COS;
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-
- if (i == 0)
- alu.dst.write = 1;
- else
- alu.dst.write = 0;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- if (i == 2)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- } else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_COS;
- tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
-
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- }
-
- /* dst.y = SIN */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- if (ctx->bc->chip_class == CAYMAN) {
- for (i = 0 ; i < 3; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_SIN;
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (i == 1)
- alu.dst.write = 1;
- else
- alu.dst.write = 0;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- if (i == 2)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- } else {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_SIN;
- tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
-
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 0;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- }
-
- /* dst.z = 0.0; */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
- alu.op = ALU_OP1_MOV;
-
- tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
-
- alu.src[0].sel = V_SQ_ALU_SRC_0;
- alu.src[0].chan = 0;
-
- alu.last = 1;
-
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- /* dst.w = 1.0; */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
- alu.op = ALU_OP1_MOV;
-
- tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
-
- alu.src[0].sel = V_SQ_ALU_SRC_1;
- alu.src[0].chan = 0;
-
- alu.last = 1;
-
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- return 0;
-}
-
static int tgsi_kill(struct r600_shader_ctx *ctx)
{
const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -6617,13 +6507,6 @@
alu.src[0].chan = alu.src[1].chan = 0;
}
break;
- case TGSI_OPCODE_DPH:
- if (i == 3) {
- alu.src[0].sel = V_SQ_ALU_SRC_1;
- alu.src[0].chan = 0;
- alu.src[0].neg = 0;
- }
- break;
default:
break;
}
@@ -6797,13 +6680,12 @@
/* Texture fetch instructions can only use gprs as source.
* Also they cannot negate the source or take the absolute value */
- const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
+ const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQS &&
tgsi_tex_src_requires_loading(ctx, 0)) ||
read_compressed_msaa || txf_add_offsets;
boolean src_loaded = FALSE;
- unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
+ unsigned sampler_src_reg = 1;
int8_t offset_x = 0, offset_y = 0, offset_z = 0;
boolean has_txq_cube_array_z = false;
unsigned sampler_index_mode;
@@ -6911,8 +6793,7 @@
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
static const unsigned src0_swizzle[] = {2, 2, 0, 1};
static const unsigned src1_swizzle[] = {1, 0, 2, 2};
@@ -7555,8 +7436,7 @@
}
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) {
tex.src_sel_x = 4;
tex.src_sel_y = 4;
tex.src_sel_z = 4;
@@ -7854,78 +7734,6 @@
return 0;
}
-static int tgsi_xpd(struct r600_shader_ctx *ctx)
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- static const unsigned int src0_swizzle[] = {2, 0, 1};
- static const unsigned int src1_swizzle[] = {1, 2, 0};
- struct r600_bytecode_alu alu;
- uint32_t use_temp = 0;
- int i, r;
-
- if (inst->Dst[0].Register.WriteMask != 0xf)
- use_temp = 1;
-
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP2_MUL;
- if (i < 3) {
- r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
- r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
- } else {
- alu.src[0].sel = V_SQ_ALU_SRC_0;
- alu.src[0].chan = i;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.src[1].chan = i;
- }
-
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = i;
- alu.dst.write = 1;
-
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_MULADD;
-
- if (i < 3) {
- r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
- r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
- } else {
- alu.src[0].sel = V_SQ_ALU_SRC_0;
- alu.src[0].chan = i;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.src[1].chan = i;
- }
-
- alu.src[2].sel = ctx->temp_reg;
- alu.src[2].neg = 1;
- alu.src[2].chan = i;
-
- if (use_temp)
- alu.dst.sel = ctx->temp_reg;
- else
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- alu.dst.chan = i;
- alu.dst.write = 1;
- alu.is_op3 = 1;
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- if (use_temp)
- return tgsi_helper_copy(ctx, inst);
- return 0;
-}
-
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -8815,45 +8623,6 @@
return 0;
}
-static int tgsi_loop_breakc(struct r600_shader_ctx *ctx)
-{
- int r;
- unsigned int fscp;
-
- for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
- {
- if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type)
- break;
- }
- if (fscp == 0) {
- R600_ERR("BREAKC not inside loop/endloop pair\n");
- return -EINVAL;
- }
-
- if (ctx->bc->chip_class == EVERGREEN &&
- ctx->bc->family != CHIP_CYPRESS &&
- ctx->bc->family != CHIP_JUNIPER) {
- /* HW bug: ALU_BREAK does not save the active mask correctly */
- r = tgsi_uif(ctx);
- if (r)
- return r;
-
- r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK);
- if (r)
- return r;
- fc_set_mid(ctx, fscp - 1);
-
- return tgsi_endif(ctx);
- } else {
- r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK);
- if (r)
- return r;
- fc_set_mid(ctx, fscp - 1);
- }
-
- return 0;
-}
-
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
{
unsigned int fscp;
@@ -9084,15 +8853,16 @@
[TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
- [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
- [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
+ /* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */
+ [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2},
+ [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
- [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [21] = { ALU_OP0_NOP, tgsi_unsupported},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
@@ -9102,11 +8872,11 @@
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
@@ -9138,7 +8908,7 @@
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
- [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [67] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9152,8 +8922,8 @@
[TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
[TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [81] = { ALU_OP0_NOP, tgsi_unsupported},
+ [82] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
[TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
[TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
@@ -9164,7 +8934,7 @@
[TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
[TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
[TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
- [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [93] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
[TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
@@ -9174,7 +8944,7 @@
[TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9184,9 +8954,9 @@
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc},
+ [115] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
[TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
[TGSI_OPCODE_DFMA] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9234,9 +9004,9 @@
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [163] = { ALU_OP0_NOP, tgsi_unsupported},
+ [164] = { ALU_OP0_NOP, tgsi_unsupported},
+ [165] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9282,15 +9052,15 @@
[TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
- [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
- [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
+ [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2},
+ [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
- [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [21] = { ALU_OP0_NOP, tgsi_unsupported},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
@@ -9300,11 +9070,11 @@
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
@@ -9336,7 +9106,7 @@
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
- [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [67] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9350,8 +9120,8 @@
[TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
[TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [82] = { ALU_OP0_NOP, tgsi_unsupported},
+ [83] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
[TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans},
[TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
@@ -9362,7 +9132,7 @@
[TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
[TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
[TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
- [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [93] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
[TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
@@ -9372,7 +9142,7 @@
[TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9382,9 +9152,9 @@
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported},
+ [115] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
[TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* Refer below for TGSI_OPCODE_DFMA */
@@ -9432,9 +9202,9 @@
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [163] = { ALU_OP0_NOP, tgsi_unsupported},
+ [164] = { ALU_OP0_NOP, tgsi_unsupported},
+ [165] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
[TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9505,15 +9275,15 @@
[TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
- [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
- [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
+ [TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2},
+ [TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
[TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
- [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported},
+ [21] = { ALU_OP0_NOP, tgsi_unsupported},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
@@ -9523,11 +9293,11 @@
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow},
- [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd},
+ [31] = { ALU_OP0_NOP, tgsi_unsupported},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [35] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
@@ -9559,7 +9329,7 @@
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
- [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
+ [67] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9573,8 +9343,8 @@
[TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif},
[TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported},
+ [82] = { ALU_OP0_NOP, tgsi_unsupported},
+ [83] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2},
[TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2},
[TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2},
@@ -9585,7 +9355,7 @@
[TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2},
[TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod},
[TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2},
- [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported},
+ [93] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex},
[TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
@@ -9595,7 +9365,7 @@
[TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop},
[TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
+ [103] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
[TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex},
[TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported},
[106] = { ALU_OP0_NOP, tgsi_unsupported},
@@ -9605,9 +9375,9 @@
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported},
+ [113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported},
+ [115] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
[TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* Refer below for TGSI_OPCODE_DFMA */
@@ -9655,9 +9425,9 @@
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported},
+ [163] = { ALU_OP0_NOP, tgsi_unsupported},
+ [164] = { ALU_OP0_NOP, tgsi_unsupported},
+ [165] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
[TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported},
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_state.c mesa-17.3.3/src/gallium/drivers/r600/r600_state.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_state.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_state.c 2018-01-18 21:30:28.000000000 +0000
@@ -1060,8 +1060,7 @@
surf->db_depth_size = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice);
surf->db_prefetch_limit = (rtex->surface.u.legacy.level[level].nblk_y / 8) - 1;
- /* use htile only for first level */
- if (rtex->htile_offset && !level) {
+ if (r600_htile_enabled(rtex, level)) {
surf->db_htile_data_base = rtex->htile_offset >> 8;
surf->db_htile_surface = S_028D24_HTILE_WIDTH(1) |
S_028D24_HTILE_HEIGHT(1) |
@@ -1898,6 +1897,9 @@
struct r600_cso_state *state = (struct r600_cso_state*)a;
struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
+ if (!shader)
+ return;
+
radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer,
@@ -2546,6 +2548,12 @@
r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
S_028850_NUM_GPRS(rshader->bc.ngpr) |
+ /*
+ * docs are misleading about the dx10_clamp bit. This only affects
+ * instructions using CLAMP dst modifier, in which case they will
+ * return 0 with this set for a NaN (otherwise NaN).
+ */
+ S_028850_DX10_CLAMP(1) |
S_028850_STACK_SIZE(rshader->bc.nstack) |
S_028850_UNCACHED_FIRST_INST(ufi));
r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
@@ -2595,6 +2603,7 @@
S_0286C4_VS_EXPORT_COUNT(nparams - 1));
r600_store_context_reg(cb, R_028868_SQ_PGM_RESOURCES_VS,
S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_DX10_CLAMP(1) |
S_028868_STACK_SIZE(rshader->bc.nstack));
if (rshader->vs_position_window_space) {
r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
@@ -2679,6 +2688,7 @@
r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_GS,
S_02887C_NUM_GPRS(rshader->bc.ngpr) |
+ S_02887C_DX10_CLAMP(1) |
S_02887C_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_02886C_SQ_PGM_START_GS, 0);
/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
@@ -2693,6 +2703,7 @@
r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
S_028890_NUM_GPRS(rshader->bc.ngpr) |
+ S_028890_DX10_CLAMP(1) |
S_028890_STACK_SIZE(rshader->bc.nstack));
r600_store_context_reg(cb, R_028880_SQ_PGM_START_ES, 0);
/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_state_common.c mesa-17.3.3/src/gallium/drivers/r600/r600_state_common.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_state_common.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_state_common.c 2018-01-18 21:30:28.000000000 +0000
@@ -2030,7 +2030,7 @@
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
- if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+ if (rtex->surface.has_stencil)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (rctx->framebuffer.compressed_cb_mask) {
@@ -2284,6 +2284,8 @@
format = PIPE_FORMAT_A4R4_UNORM;
desc = util_format_description(format);
+ if (!desc)
+ goto out_unknown;
/* Depth and stencil swizzling is handled separately. */
if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) {
@@ -2403,10 +2405,6 @@
}
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- if (!util_format_s3tc_enabled) {
- goto out_unknown;
- }
-
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
@@ -2650,6 +2648,8 @@
const struct util_format_description *desc = util_format_description(format);
int channel = util_format_get_first_non_void_channel(format);
bool is_float;
+ if (!desc)
+ return ~0U;
#define HAS_SIZE(x,y,z,w) \
(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
@@ -2910,13 +2910,6 @@
}
}
-static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
-{
- struct r600_context *rctx = (struct r600_context*)ctx;
-
- r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
-}
-
static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo)
{
@@ -2965,6 +2958,5 @@
rctx->b.b.set_active_query_state = r600_set_active_query_state;
rctx->b.b.draw_vbo = r600_draw_vbo;
rctx->b.invalidate_buffer = r600_invalidate_buffer;
- rctx->b.set_occlusion_query_state = r600_set_occlusion_query_state;
rctx->b.need_gfx_cs_space = r600_need_gfx_cs_space;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_streamout.c mesa-17.3.3/src/gallium/drivers/r600/r600_streamout.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_streamout.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_streamout.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,365 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ *
+ */
+
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+
+#include "util/u_memory.h"
+#include "evergreend.h"
+
+#define R_008490_CP_STRMOUT_CNTL 0x008490
+#define R_028AB0_VGT_STRMOUT_EN 0x028AB0
+#define R_028B20_VGT_STRMOUT_BUFFER_EN 0x028B20
+
+static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable);
+
+static struct pipe_stream_output_target *
+r600_create_so_target(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_so_target *t;
+ struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+
+ t = CALLOC_STRUCT(r600_so_target);
+ if (!t) {
+ return NULL;
+ }
+
+ u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
+ &t->buf_filled_size_offset,
+ (struct pipe_resource**)&t->buf_filled_size);
+ if (!t->buf_filled_size) {
+ FREE(t);
+ return NULL;
+ }
+
+ t->b.reference.count = 1;
+ t->b.context = ctx;
+ pipe_resource_reference(&t->b.buffer, buffer);
+ t->b.buffer_offset = buffer_offset;
+ t->b.buffer_size = buffer_size;
+
+ util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+ buffer_offset + buffer_size);
+ return &t->b;
+}
+
+static void r600_so_target_destroy(struct pipe_context *ctx,
+ struct pipe_stream_output_target *target)
+{
+ struct r600_so_target *t = (struct r600_so_target*)target;
+ pipe_resource_reference(&t->b.buffer, NULL);
+ r600_resource_reference(&t->buf_filled_size, NULL);
+ FREE(t);
+}
+
+void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
+{
+ struct r600_atom *begin = &rctx->streamout.begin_atom;
+ unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
+ unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask &
+ rctx->streamout.append_bitmask);
+
+ if (!num_bufs)
+ return;
+
+ rctx->streamout.num_dw_for_end =
+ 12 + /* flush_vgt_streamout */
+ num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
+
+ begin->num_dw = 12; /* flush_vgt_streamout */
+
+ begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
+
+ if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
+ begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
+
+ begin->num_dw +=
+ num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */
+ (num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
+ (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
+
+ rctx->set_atom_dirty(rctx, begin, true);
+
+ r600_set_streamout_enable(rctx, true);
+}
+
+void r600_set_streamout_targets(struct pipe_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ unsigned i;
+ unsigned enabled_mask = 0, append_bitmask = 0;
+
+ /* Stop streamout. */
+ if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
+ r600_emit_streamout_end(rctx);
+ }
+
+ /* Set the new targets. */
+ for (i = 0; i < num_targets; i++) {
+ pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
+ if (!targets[i])
+ continue;
+
+ r600_context_add_resource_size(ctx, targets[i]->buffer);
+ enabled_mask |= 1 << i;
+ if (offsets[i] == ((unsigned)-1))
+ append_bitmask |= 1 << i;
+ }
+ for (; i < rctx->streamout.num_targets; i++) {
+ pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
+ }
+
+ rctx->streamout.enabled_mask = enabled_mask;
+
+ rctx->streamout.num_targets = num_targets;
+ rctx->streamout.append_bitmask = append_bitmask;
+
+ if (num_targets) {
+ r600_streamout_buffers_dirty(rctx);
+ } else {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
+ r600_set_streamout_enable(rctx, false);
+ }
+}
+
+static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ unsigned reg_strmout_cntl;
+
+ /* The register is at different places on different ASICs. */
+ if (rctx->chip_class >= EVERGREEN) {
+ reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+ } else {
+ reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL;
+ }
+
+ radeon_set_config_reg(cs, reg_strmout_cntl, 0);
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+}
+
+static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct r600_so_target **t = rctx->streamout.targets;
+ uint16_t *stride_in_dw = rctx->streamout.stride_in_dw;
+ unsigned i, update_flags = 0;
+
+ r600_flush_vgt_streamout(rctx);
+
+ for (i = 0; i < rctx->streamout.num_targets; i++) {
+ if (!t[i])
+ continue;
+
+ t[i]->stride_in_dw = stride_in_dw[i];
+
+ uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
+
+ update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
+
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
+ radeon_emit(cs, (t[i]->b.buffer_offset +
+ t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+ radeon_emit(cs, va >> 8); /* BUFFER_BASE */
+
+ r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
+ RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
+
+ /* R7xx requires this packet after updating BUFFER_BASE.
+ * Without this, R7xx locks up. */
+ if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
+ radeon_emit(cs, i);
+ radeon_emit(cs, va >> 8);
+
+ r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
+ RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
+ }
+
+ if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
+ uint64_t va = t[i]->buf_filled_size->gpu_address +
+ t[i]->buf_filled_size_offset;
+
+ /* Append. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va); /* src address lo */
+ radeon_emit(cs, va >> 32); /* src address hi */
+
+ r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
+ RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
+ } else {
+ /* Start from the beginning. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
+ radeon_emit(cs, 0); /* unused */
+ }
+ }
+
+ if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) {
+ radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
+ radeon_emit(cs, update_flags);
+ }
+ rctx->streamout.begin_emitted = true;
+}
+
+void r600_emit_streamout_end(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct r600_so_target **t = rctx->streamout.targets;
+ unsigned i;
+ uint64_t va;
+
+ r600_flush_vgt_streamout(rctx);
+
+ for (i = 0; i < rctx->streamout.num_targets; i++) {
+ if (!t[i])
+ continue;
+
+ va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+ STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+ radeon_emit(cs, va); /* dst address lo */
+ radeon_emit(cs, va >> 32); /* dst address hi */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+
+ r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
+ RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
+
+ /* Zero the buffer size. The counters (primitives generated,
+ * primitives emitted) may be enabled even if there is not
+ * buffer bound. This ensures that the primitives-emitted query
+ * won't increment. */
+ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+
+ t[i]->buf_filled_size_valid = true;
+ }
+
+ rctx->streamout.begin_emitted = false;
+ rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
+}
+
+/* STREAMOUT CONFIG DERIVED STATE
+ *
+ * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
+ * The buffer mask is an independent state, so no writes occur if there
+ * are no buffers bound.
+ */
+
+static void r600_emit_streamout_enable(struct r600_common_context *rctx,
+ struct r600_atom *atom)
+{
+ unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN;
+ unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
+ unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN;
+ unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask &
+ rctx->streamout.enabled_stream_buffers_mask;
+
+ if (rctx->chip_class >= EVERGREEN) {
+ strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG;
+
+ strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG;
+ strmout_config_val |=
+ S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
+ S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
+ S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
+ }
+ radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+ radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val);
+}
+
+static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
+{
+ bool old_strmout_en = r600_get_strmout_en(rctx);
+ unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask;
+
+ rctx->streamout.streamout_enabled = enable;
+
+ rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask |
+ (rctx->streamout.enabled_mask << 4) |
+ (rctx->streamout.enabled_mask << 8) |
+ (rctx->streamout.enabled_mask << 12);
+
+ if ((old_strmout_en != r600_get_strmout_en(rctx)) ||
+ (old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+ }
+}
+
+void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
+ unsigned type, int diff)
+{
+ if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
+ bool old_strmout_en = r600_get_strmout_en(rctx);
+
+ rctx->streamout.num_prims_gen_queries += diff;
+ assert(rctx->streamout.num_prims_gen_queries >= 0);
+
+ rctx->streamout.prims_gen_query_enabled =
+ rctx->streamout.num_prims_gen_queries != 0;
+
+ if (old_strmout_en != r600_get_strmout_en(rctx)) {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+ }
+ }
+}
+
+void r600_streamout_init(struct r600_common_context *rctx)
+{
+ rctx->b.create_stream_output_target = r600_create_so_target;
+ rctx->b.stream_output_target_destroy = r600_so_target_destroy;
+ rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
+ rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
+ rctx->streamout.enable_atom.num_dw = 6;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_test_dma.c mesa-17.3.3/src/gallium/drivers/r600/r600_test_dma.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_test_dma.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_test_dma.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,398 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* This file implements randomized SDMA texture blit tests. */
+
+#include "r600_pipe_common.h"
+#include "util/u_surface.h"
+#include "util/rand_xor.h"
+
+static uint64_t seed_xorshift128plus[2];
+
+#define RAND_NUM_SIZE 8
+
+/* The GPU blits are emulated on the CPU using these CPU textures. */
+
+struct cpu_texture {
+ uint8_t *ptr;
+ uint64_t size;
+ uint64_t layer_stride;
+ unsigned stride;
+};
+
+static void alloc_cpu_texture(struct cpu_texture *tex,
+ struct pipe_resource *templ, int bpp)
+{
+ tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE);
+ tex->layer_stride = (uint64_t)tex->stride * templ->height0;
+ tex->size = tex->layer_stride * templ->array_size;
+ tex->ptr = malloc(tex->size);
+ assert(tex->ptr);
+}
+
+static void set_random_pixels(struct pipe_context *ctx,
+ struct pipe_resource *tex,
+ struct cpu_texture *cpu)
+{
+ struct pipe_transfer *t;
+ uint8_t *map;
+ int x,y,z;
+
+ map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE,
+ 0, 0, 0, tex->width0, tex->height0,
+ tex->array_size, &t);
+ assert(map);
+
+ for (z = 0; z < tex->array_size; z++) {
+ for (y = 0; y < tex->height0; y++) {
+ uint64_t *ptr = (uint64_t*)
+ (map + t->layer_stride*z + t->stride*y);
+ uint64_t *ptr_cpu = (uint64_t*)
+ (cpu->ptr + cpu->layer_stride*z + cpu->stride*y);
+ unsigned size = cpu->stride / RAND_NUM_SIZE;
+
+ assert(t->stride % RAND_NUM_SIZE == 0);
+ assert(cpu->stride % RAND_NUM_SIZE == 0);
+
+ for (x = 0; x < size; x++) {
+ *ptr++ = *ptr_cpu++ =
+ rand_xorshift128plus(seed_xorshift128plus);
+ }
+ }
+ }
+
+ pipe_transfer_unmap(ctx, t);
+}
+
+static bool compare_textures(struct pipe_context *ctx,
+ struct pipe_resource *tex,
+ struct cpu_texture *cpu, int bpp)
+{
+ struct pipe_transfer *t;
+ uint8_t *map;
+ int y,z;
+ bool pass = true;
+
+ map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ,
+ 0, 0, 0, tex->width0, tex->height0,
+ tex->array_size, &t);
+ assert(map);
+
+ for (z = 0; z < tex->array_size; z++) {
+ for (y = 0; y < tex->height0; y++) {
+ uint8_t *ptr = map + t->layer_stride*z + t->stride*y;
+ uint8_t *cpu_ptr = cpu->ptr +
+ cpu->layer_stride*z + cpu->stride*y;
+
+ if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) {
+ pass = false;
+ goto done;
+ }
+ }
+ }
+done:
+ pipe_transfer_unmap(ctx, t);
+ return pass;
+}
+
+static enum pipe_format get_format_from_bpp(int bpp)
+{
+ switch (bpp) {
+ case 1:
+ return PIPE_FORMAT_R8_UINT;
+ case 2:
+ return PIPE_FORMAT_R16_UINT;
+ case 4:
+ return PIPE_FORMAT_R32_UINT;
+ case 8:
+ return PIPE_FORMAT_R32G32_UINT;
+ case 16:
+ return PIPE_FORMAT_R32G32B32A32_UINT;
+ default:
+ assert(0);
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+static const char *array_mode_to_string(struct r600_common_screen *rscreen,
+ struct radeon_surf *surf)
+{
+ if (rscreen->chip_class >= GFX9) {
+ /* TODO */
+ return " UNKNOWN";
+ } else {
+ switch (surf->u.legacy.level[0].mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ return "LINEAR_ALIGNED";
+ case RADEON_SURF_MODE_1D:
+ return "1D_TILED_THIN1";
+ case RADEON_SURF_MODE_2D:
+ return "2D_TILED_THIN1";
+ default:
+ assert(0);
+ return " UNKNOWN";
+ }
+ }
+}
+
+static unsigned generate_max_tex_side(unsigned max_tex_side)
+{
+ switch (rand() % 4) {
+ case 0:
+ /* Try to hit large sizes in 1/4 of the cases. */
+ return max_tex_side;
+ case 1:
+ /* Try to hit 1D tiling in 1/4 of the cases. */
+ return 128;
+ default:
+ /* Try to hit common sizes in 2/4 of the cases. */
+ return 2048;
+ }
+}
+
+void r600_test_dma(struct r600_common_screen *rscreen)
+{
+ struct pipe_screen *screen = &rscreen->b;
+ struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ uint64_t max_alloc_size;
+ unsigned i, iterations, num_partial_copies, max_levels, max_tex_side;
+ unsigned num_pass = 0, num_fail = 0;
+
+ max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+ max_tex_side = 1 << (max_levels - 1);
+
+ /* Max 128 MB allowed for both textures. */
+ max_alloc_size = 128 * 1024 * 1024;
+
+ /* the seed for random test parameters */
+ srand(0x9b47d95b);
+ /* the seed for random pixel data */
+ s_rand_xorshift128plus(seed_xorshift128plus, false);
+
+ iterations = 1000000000; /* just kill it when you are bored */
+ num_partial_copies = 30;
+
+ /* These parameters are randomly generated per test:
+ * - whether to do one whole-surface copy or N partial copies per test
+ * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
+ * - which texture dimensions to use
+ * - whether to use VRAM (all tiling modes) and GTT (staging, linear
+ * only) allocations
+ * - random initial pixels in src
+ * - generate random subrectangle copies for partial blits
+ */
+ for (i = 0; i < iterations; i++) {
+ struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
+ struct r600_texture *rdst;
+ struct r600_texture *rsrc;
+ struct cpu_texture src_cpu, dst_cpu;
+ unsigned bpp, max_width, max_height, max_depth, j, num;
+ unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen;
+ unsigned max_tex_layers;
+ bool pass;
+ bool do_partial_copies = rand() & 1;
+
+ /* generate a random test case */
+ tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY;
+ tsrc.depth0 = tdst.depth0 = 1;
+
+ bpp = 1 << (rand() % 5);
+ tsrc.format = tdst.format = get_format_from_bpp(bpp);
+
+ max_tex_side_gen = generate_max_tex_side(max_tex_side);
+ max_tex_layers = rand() % 4 ? 1 : 5;
+
+ tsrc.width0 = (rand() % max_tex_side_gen) + 1;
+ tsrc.height0 = (rand() % max_tex_side_gen) + 1;
+ tsrc.array_size = (rand() % max_tex_layers) + 1;
+
+ /* Have a 1/4 chance of getting power-of-two dimensions. */
+ if (rand() % 4 == 0) {
+ tsrc.width0 = util_next_power_of_two(tsrc.width0);
+ tsrc.height0 = util_next_power_of_two(tsrc.height0);
+ }
+
+ if (!do_partial_copies) {
+ /* whole-surface copies only, same dimensions */
+ tdst = tsrc;
+ } else {
+ max_tex_side_gen = generate_max_tex_side(max_tex_side);
+ max_tex_layers = rand() % 4 ? 1 : 5;
+
+ /* many partial copies, dimensions can be different */
+ tdst.width0 = (rand() % max_tex_side_gen) + 1;
+ tdst.height0 = (rand() % max_tex_side_gen) + 1;
+ tdst.array_size = (rand() % max_tex_layers) + 1;
+
+ /* Have a 1/4 chance of getting power-of-two dimensions. */
+ if (rand() % 4 == 0) {
+ tdst.width0 = util_next_power_of_two(tdst.width0);
+ tdst.height0 = util_next_power_of_two(tdst.height0);
+ }
+ }
+
+ /* check texture sizes */
+ if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp +
+ (uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp >
+ max_alloc_size) {
+ /* too large, try again */
+ i--;
+ continue;
+ }
+
+ /* VRAM + the tiling mode depends on dimensions (3/4 of cases),
+ * or GTT + linear only (1/4 of cases)
+ */
+ tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
+ tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
+
+ /* Allocate textures (both the GPU and CPU copies).
+ * The CPU will emulate what the GPU should be doing.
+ */
+ src = screen->resource_create(screen, &tsrc);
+ dst = screen->resource_create(screen, &tdst);
+ assert(src);
+ assert(dst);
+ rdst = (struct r600_texture*)dst;
+ rsrc = (struct r600_texture*)src;
+ alloc_cpu_texture(&src_cpu, &tsrc, bpp);
+ alloc_cpu_texture(&dst_cpu, &tdst, bpp);
+
+ printf("%4u: dst = (%5u x %5u x %u, %s), "
+ " src = (%5u x %5u x %u, %s), bpp = %2u, ",
+ i, tdst.width0, tdst.height0, tdst.array_size,
+ array_mode_to_string(rscreen, &rdst->surface),
+ tsrc.width0, tsrc.height0, tsrc.array_size,
+ array_mode_to_string(rscreen, &rsrc->surface), bpp);
+ fflush(stdout);
+
+ /* set src pixels */
+ set_random_pixels(ctx, src, &src_cpu);
+
+ /* clear dst pixels */
+ rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true);
+ memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
+
+ /* preparation */
+ max_width = MIN2(tsrc.width0, tdst.width0);
+ max_height = MIN2(tsrc.height0, tdst.height0);
+ max_depth = MIN2(tsrc.array_size, tdst.array_size);
+
+ num = do_partial_copies ? num_partial_copies : 1;
+ for (j = 0; j < num; j++) {
+ int width, height, depth;
+ int srcx, srcy, srcz, dstx, dsty, dstz;
+ struct pipe_box box;
+ unsigned old_num_draw_calls = rctx->num_draw_calls;
+ unsigned old_num_dma_calls = rctx->num_dma_calls;
+
+ if (!do_partial_copies) {
+ /* copy whole src to dst */
+ width = max_width;
+ height = max_height;
+ depth = max_depth;
+
+ srcx = srcy = srcz = dstx = dsty = dstz = 0;
+ } else {
+ /* random sub-rectangle copies from src to dst */
+ depth = (rand() % max_depth) + 1;
+ srcz = rand() % (tsrc.array_size - depth + 1);
+ dstz = rand() % (tdst.array_size - depth + 1);
+
+ /* special code path to hit the tiled partial copies */
+ if (!rsrc->surface.is_linear &&
+ !rdst->surface.is_linear &&
+ rand() & 1) {
+ if (max_width < 8 || max_height < 8)
+ continue;
+ width = ((rand() % (max_width / 8)) + 1) * 8;
+ height = ((rand() % (max_height / 8)) + 1) * 8;
+
+ srcx = rand() % (tsrc.width0 - width + 1) & ~0x7;
+ srcy = rand() % (tsrc.height0 - height + 1) & ~0x7;
+
+ dstx = rand() % (tdst.width0 - width + 1) & ~0x7;
+ dsty = rand() % (tdst.height0 - height + 1) & ~0x7;
+ } else {
+ /* just make sure that it doesn't divide by zero */
+ assert(max_width > 0 && max_height > 0);
+
+ width = (rand() % max_width) + 1;
+ height = (rand() % max_height) + 1;
+
+ srcx = rand() % (tsrc.width0 - width + 1);
+ srcy = rand() % (tsrc.height0 - height + 1);
+
+ dstx = rand() % (tdst.width0 - width + 1);
+ dsty = rand() % (tdst.height0 - height + 1);
+ }
+
+ /* special code path to hit out-of-bounds reads in L2T */
+ if (rsrc->surface.is_linear &&
+ !rdst->surface.is_linear &&
+ rand() % 4 == 0) {
+ srcx = 0;
+ srcy = 0;
+ srcz = 0;
+ }
+ }
+
+ /* GPU copy */
+ u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
+ rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);
+
+ /* See which engine was used. */
+ gfx_blits += rctx->num_draw_calls > old_num_draw_calls;
+ dma_blits += rctx->num_dma_calls > old_num_dma_calls;
+
+ /* CPU copy */
+ util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride,
+ dst_cpu.layer_stride,
+ dstx, dsty, dstz, width, height, depth,
+ src_cpu.ptr, src_cpu.stride,
+ src_cpu.layer_stride,
+ srcx, srcy, srcz);
+ }
+
+ pass = compare_textures(ctx, dst, &dst_cpu, bpp);
+ if (pass)
+ num_pass++;
+ else
+ num_fail++;
+
+ printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
+ gfx_blits, dma_blits, pass ? "pass" : "fail",
+ num_pass, num_pass+num_fail);
+
+ /* cleanup */
+ pipe_resource_reference(&src, NULL);
+ pipe_resource_reference(&dst, NULL);
+ free(src_cpu.ptr);
+ free(dst_cpu.ptr);
+ }
+
+ ctx->destroy(ctx);
+ exit(0);
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_texture.c mesa-17.3.3/src/gallium/drivers/r600/r600_texture.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_texture.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_texture.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,1953 @@
+/*
+ * Copyright 2010 Jerome Glisse
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ * Corbin Simpson
+ */
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+#include "r600_query.h"
+#include "util/u_format.h"
+#include "util/u_log.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "util/u_surface.h"
+#include "os/os_time.h"
+#include
+#include
+
+static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex);
+static enum radeon_surf_mode
+r600_choose_tiling(struct r600_common_screen *rscreen,
+ const struct pipe_resource *templ);
+
+
+bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
+ struct r600_texture *rdst,
+ unsigned dst_level, unsigned dstx,
+ unsigned dsty, unsigned dstz,
+ struct r600_texture *rsrc,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ if (!rctx->dma.cs)
+ return false;
+
+ if (rdst->surface.bpe != rsrc->surface.bpe)
+ return false;
+
+ /* MSAA: Blits don't exist in the real world. */
+ if (rsrc->resource.b.b.nr_samples > 1 ||
+ rdst->resource.b.b.nr_samples > 1)
+ return false;
+
+ /* Depth-stencil surfaces:
+ * When dst is linear, the DB->CB copy preserves HTILE.
+ * When dst is tiled, the 3D path must be used to update HTILE.
+ */
+ if (rsrc->is_depth || rdst->is_depth)
+ return false;
+
+ /* CMASK as:
+ * src: Both texture and SDMA paths need decompression. Use SDMA.
+ * dst: If overwriting the whole texture, discard CMASK and use
+ * SDMA. Otherwise, use the 3D path.
+ */
+ if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
+ /* The CMASK clear is only enabled for the first level. */
+ assert(dst_level == 0);
+ if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level,
+ dstx, dsty, dstz, src_box->width,
+ src_box->height, src_box->depth))
+ return false;
+
+ r600_texture_discard_cmask(rctx->screen, rdst);
+ }
+
+ /* All requirements are met. Prepare textures for SDMA. */
+ if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level))
+ rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b);
+
+ assert(!(rsrc->dirty_level_mask & (1 << src_level)));
+ assert(!(rdst->dirty_level_mask & (1 << dst_level)));
+
+ return true;
+}
+
+/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
+static void r600_copy_region_with_blit(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct pipe_blit_info blit;
+
+ memset(&blit, 0, sizeof(blit));
+ blit.src.resource = src;
+ blit.src.format = src->format;
+ blit.src.level = src_level;
+ blit.src.box = *src_box;
+ blit.dst.resource = dst;
+ blit.dst.format = dst->format;
+ blit.dst.level = dst_level;
+ blit.dst.box.x = dstx;
+ blit.dst.box.y = dsty;
+ blit.dst.box.z = dstz;
+ blit.dst.box.width = src_box->width;
+ blit.dst.box.height = src_box->height;
+ blit.dst.box.depth = src_box->depth;
+ blit.mask = util_format_get_mask(src->format) &
+ util_format_get_mask(dst->format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ if (blit.mask) {
+ pipe->blit(pipe, &blit);
+ }
+}
+
+/* Copy from a full GPU texture to a transfer's staging one. */
+static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+ struct pipe_resource *dst = &rtransfer->staging->b.b;
+ struct pipe_resource *src = transfer->resource;
+
+ if (src->nr_samples > 1) {
+ r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
+ src, transfer->level, &transfer->box);
+ return;
+ }
+
+ rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
+ &transfer->box);
+}
+
+/* Copy from a transfer's staging texture to a full GPU one. */
+static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+ struct pipe_resource *dst = transfer->resource;
+ struct pipe_resource *src = &rtransfer->staging->b.b;
+ struct pipe_box sbox;
+
+ u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
+
+ if (dst->nr_samples > 1) {
+ r600_copy_region_with_blit(ctx, dst, transfer->level,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ src, 0, &sbox);
+ return;
+ }
+
+ rctx->dma_copy(ctx, dst, transfer->level,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ src, 0, &sbox);
+}
+
+static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, unsigned level,
+ const struct pipe_box *box,
+ unsigned *stride,
+ unsigned *layer_stride)
+{
+ *stride = rtex->surface.u.legacy.level[level].nblk_x *
+ rtex->surface.bpe;
+ *layer_stride = rtex->surface.u.legacy.level[level].slice_size;
+
+ if (!box)
+ return rtex->surface.u.legacy.level[level].offset;
+
+ /* Each texture is an array of mipmap levels. Each level is
+ * an array of slices. */
+ return rtex->surface.u.legacy.level[level].offset +
+ box->z * rtex->surface.u.legacy.level[level].slice_size +
+ (box->y / rtex->surface.blk_h *
+ rtex->surface.u.legacy.level[level].nblk_x +
+ box->x / rtex->surface.blk_w) * rtex->surface.bpe;
+}
+
+static int r600_init_surface(struct r600_common_screen *rscreen,
+ struct radeon_surf *surface,
+ const struct pipe_resource *ptex,
+ enum radeon_surf_mode array_mode,
+ unsigned pitch_in_bytes_override,
+ unsigned offset,
+ bool is_imported,
+ bool is_scanout,
+ bool is_flushed_depth)
+{
+ const struct util_format_description *desc =
+ util_format_description(ptex->format);
+ bool is_depth, is_stencil;
+ int r;
+ unsigned i, bpe, flags = 0;
+
+ is_depth = util_format_has_depth(desc);
+ is_stencil = util_format_has_stencil(desc);
+
+ if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
+ ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+ bpe = 4; /* stencil is allocated separately on evergreen */
+ } else {
+ bpe = util_format_get_blocksize(ptex->format);
+ assert(util_is_power_of_two(bpe));
+ }
+
+ if (!is_flushed_depth && is_depth) {
+ flags |= RADEON_SURF_ZBUFFER;
+
+ if (is_stencil)
+ flags |= RADEON_SURF_SBUFFER;
+ }
+
+ if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
+ /* This should catch bugs in gallium users setting incorrect flags. */
+ assert(ptex->nr_samples <= 1 &&
+ ptex->array_size == 1 &&
+ ptex->depth0 == 1 &&
+ ptex->last_level == 0 &&
+ !(flags & RADEON_SURF_Z_OR_SBUFFER));
+
+ flags |= RADEON_SURF_SCANOUT;
+ }
+
+ if (ptex->bind & PIPE_BIND_SHARED)
+ flags |= RADEON_SURF_SHAREABLE;
+ if (is_imported)
+ flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
+ if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
+ flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
+
+ r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe,
+ array_mode, surface);
+ if (r) {
+ return r;
+ }
+
+ if (pitch_in_bytes_override &&
+ pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) {
+ /* old ddx on evergreen over estimate alignment for 1d, only 1 level
+ * for those
+ */
+ surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe;
+ surface->u.legacy.level[0].slice_size = pitch_in_bytes_override *
+ surface->u.legacy.level[0].nblk_y;
+ }
+
+ if (offset) {
+ for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
+ surface->u.legacy.level[i].offset += offset;
+ }
+
+ return 0;
+}
+
+static void r600_texture_init_metadata(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct radeon_bo_metadata *metadata)
+{
+ struct radeon_surf *surface = &rtex->surface;
+
+ memset(metadata, 0, sizeof(*metadata));
+
+ metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
+ RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
+ RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
+ metadata->u.legacy.bankw = surface->u.legacy.bankw;
+ metadata->u.legacy.bankh = surface->u.legacy.bankh;
+ metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
+ metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
+ metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
+ metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
+ metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+}
+
+static void r600_surface_import_metadata(struct r600_common_screen *rscreen,
+ struct radeon_surf *surf,
+ struct radeon_bo_metadata *metadata,
+ enum radeon_surf_mode *array_mode,
+ bool *is_scanout)
+{
+ surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
+ surf->u.legacy.bankw = metadata->u.legacy.bankw;
+ surf->u.legacy.bankh = metadata->u.legacy.bankh;
+ surf->u.legacy.tile_split = metadata->u.legacy.tile_split;
+ surf->u.legacy.mtilea = metadata->u.legacy.mtilea;
+ surf->u.legacy.num_banks = metadata->u.legacy.num_banks;
+
+ if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ *array_mode = RADEON_SURF_MODE_2D;
+ else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED)
+ *array_mode = RADEON_SURF_MODE_1D;
+ else
+ *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ *is_scanout = metadata->u.legacy.scanout;
+}
+
+static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
+ struct r600_texture *rtex)
+{
+ struct r600_common_screen *rscreen = rctx->screen;
+ struct pipe_context *ctx = &rctx->b;
+
+ if (ctx == rscreen->aux_context)
+ mtx_lock(&rscreen->aux_context_lock);
+
+ ctx->flush_resource(ctx, &rtex->resource.b.b);
+ ctx->flush(ctx, NULL, 0);
+
+ if (ctx == rscreen->aux_context)
+ mtx_unlock(&rscreen->aux_context_lock);
+}
+
+static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ if (!rtex->cmask.size)
+ return;
+
+ assert(rtex->resource.b.b.nr_samples <= 1);
+
+ /* Disable CMASK. */
+ memset(&rtex->cmask, 0, sizeof(rtex->cmask));
+ rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
+ rtex->dirty_level_mask = 0;
+
+ rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
+
+ if (rtex->cmask_buffer != &rtex->resource)
+ r600_resource_reference(&rtex->cmask_buffer, NULL);
+
+ /* Notify all contexts about the change. */
+ p_atomic_inc(&rscreen->dirty_tex_counter);
+ p_atomic_inc(&rscreen->compressed_colortex_counter);
+}
+
+static void r600_reallocate_texture_inplace(struct r600_common_context *rctx,
+ struct r600_texture *rtex,
+ unsigned new_bind_flag,
+ bool invalidate_storage)
+{
+ struct pipe_screen *screen = rctx->b.screen;
+ struct r600_texture *new_tex;
+ struct pipe_resource templ = rtex->resource.b.b;
+ unsigned i;
+
+ templ.bind |= new_bind_flag;
+
+ /* r600g doesn't react to dirty_tex_descriptor_counter */
+ if (rctx->chip_class < SI)
+ return;
+
+ if (rtex->resource.b.is_shared)
+ return;
+
+ if (new_bind_flag == PIPE_BIND_LINEAR) {
+ if (rtex->surface.is_linear)
+ return;
+
+ /* This fails with MSAA, depth, and compressed textures. */
+ if (r600_choose_tiling(rctx->screen, &templ) !=
+ RADEON_SURF_MODE_LINEAR_ALIGNED)
+ return;
+ }
+
+ new_tex = (struct r600_texture*)screen->resource_create(screen, &templ);
+ if (!new_tex)
+ return;
+
+ /* Copy the pixels to the new texture. */
+ if (!invalidate_storage) {
+ for (i = 0; i <= templ.last_level; i++) {
+ struct pipe_box box;
+
+ u_box_3d(0, 0, 0,
+ u_minify(templ.width0, i), u_minify(templ.height0, i),
+ util_max_layer(&templ, i) + 1, &box);
+
+ rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0,
+ &rtex->resource.b.b, i, &box);
+ }
+ }
+
+ if (new_bind_flag == PIPE_BIND_LINEAR) {
+ r600_texture_discard_cmask(rctx->screen, rtex);
+ }
+
+ /* Replace the structure fields of rtex. */
+ rtex->resource.b.b.bind = templ.bind;
+ pb_reference(&rtex->resource.buf, new_tex->resource.buf);
+ rtex->resource.gpu_address = new_tex->resource.gpu_address;
+ rtex->resource.vram_usage = new_tex->resource.vram_usage;
+ rtex->resource.gart_usage = new_tex->resource.gart_usage;
+ rtex->resource.bo_size = new_tex->resource.bo_size;
+ rtex->resource.bo_alignment = new_tex->resource.bo_alignment;
+ rtex->resource.domains = new_tex->resource.domains;
+ rtex->resource.flags = new_tex->resource.flags;
+ rtex->size = new_tex->size;
+ rtex->db_render_format = new_tex->db_render_format;
+ rtex->db_compatible = new_tex->db_compatible;
+ rtex->can_sample_z = new_tex->can_sample_z;
+ rtex->can_sample_s = new_tex->can_sample_s;
+ rtex->surface = new_tex->surface;
+ rtex->fmask = new_tex->fmask;
+ rtex->cmask = new_tex->cmask;
+ rtex->cb_color_info = new_tex->cb_color_info;
+ rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
+ rtex->htile_offset = new_tex->htile_offset;
+ rtex->depth_cleared = new_tex->depth_cleared;
+ rtex->stencil_cleared = new_tex->stencil_cleared;
+ rtex->non_disp_tiling = new_tex->non_disp_tiling;
+ rtex->framebuffers_bound = new_tex->framebuffers_bound;
+
+ if (new_bind_flag == PIPE_BIND_LINEAR) {
+ assert(!rtex->htile_offset);
+ assert(!rtex->cmask.size);
+ assert(!rtex->fmask.size);
+ assert(!rtex->is_depth);
+ }
+
+ r600_texture_reference(&new_tex, NULL);
+
+ p_atomic_inc(&rctx->screen->dirty_tex_counter);
+}
+
+static boolean r600_texture_get_handle(struct pipe_screen* screen,
+ struct pipe_context *ctx,
+ struct pipe_resource *resource,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_common_context *rctx;
+ struct r600_resource *res = (struct r600_resource*)resource;
+ struct r600_texture *rtex = (struct r600_texture*)resource;
+ struct radeon_bo_metadata metadata;
+ bool update_metadata = false;
+ unsigned stride, offset, slice_size;
+
+ ctx = threaded_context_unwrap_sync(ctx);
+ rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context);
+
+ if (resource->target != PIPE_BUFFER) {
+ /* This is not supported now, but it might be required for OpenCL
+ * interop in the future.
+ */
+ if (resource->nr_samples > 1 || rtex->is_depth)
+ return false;
+
+ /* Move a suballocated texture into a non-suballocated allocation. */
+ if (rscreen->ws->buffer_is_suballocated(res->buf) ||
+ rtex->surface.tile_swizzle) {
+ assert(!res->b.is_shared);
+ r600_reallocate_texture_inplace(rctx, rtex,
+ PIPE_BIND_SHARED, false);
+ rctx->b.flush(&rctx->b, NULL, 0);
+ assert(res->b.b.bind & PIPE_BIND_SHARED);
+ assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
+ assert(rtex->surface.tile_swizzle == 0);
+ }
+
+ if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
+ rtex->cmask.size) {
+ /* Eliminate fast clear (CMASK) */
+ r600_eliminate_fast_color_clear(rctx, rtex);
+
+ /* Disable CMASK if flush_resource isn't going
+ * to be called.
+ */
+ if (rtex->cmask.size)
+ r600_texture_discard_cmask(rscreen, rtex);
+ }
+
+ /* Set metadata. */
+ if (!res->b.is_shared || update_metadata) {
+ r600_texture_init_metadata(rscreen, rtex, &metadata);
+ if (rscreen->query_opaque_metadata)
+ rscreen->query_opaque_metadata(rscreen, rtex,
+ &metadata);
+
+ rscreen->ws->buffer_set_metadata(res->buf, &metadata);
+ }
+
+ offset = rtex->surface.u.legacy.level[0].offset;
+ stride = rtex->surface.u.legacy.level[0].nblk_x *
+ rtex->surface.bpe;
+ slice_size = rtex->surface.u.legacy.level[0].slice_size;
+ } else {
+ /* Move a suballocated buffer into a non-suballocated allocation. */
+ if (rscreen->ws->buffer_is_suballocated(res->buf)) {
+ assert(!res->b.is_shared);
+
+ /* Allocate a new buffer with PIPE_BIND_SHARED. */
+ struct pipe_resource templ = res->b.b;
+ templ.bind |= PIPE_BIND_SHARED;
+
+ struct pipe_resource *newb =
+ screen->resource_create(screen, &templ);
+ if (!newb)
+ return false;
+
+ /* Copy the old buffer contents to the new one. */
+ struct pipe_box box;
+ u_box_1d(0, newb->width0, &box);
+ rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0,
+ &res->b.b, 0, &box);
+ /* Move the new buffer storage to the old pipe_resource. */
+ r600_replace_buffer_storage(&rctx->b, &res->b.b, newb);
+ pipe_resource_reference(&newb, NULL);
+
+ assert(res->b.b.bind & PIPE_BIND_SHARED);
+ assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
+ }
+
+ /* Buffers */
+ offset = 0;
+ stride = 0;
+ slice_size = 0;
+ }
+
+ if (res->b.is_shared) {
+ /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
+ * doesn't set it.
+ */
+ res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
+ if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
+ res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
+ } else {
+ res->b.is_shared = true;
+ res->external_usage = usage;
+ }
+
+ return rscreen->ws->buffer_get_handle(res->buf, stride, offset,
+ slice_size, whandle);
+}
+
+static void r600_texture_destroy(struct pipe_screen *screen,
+ struct pipe_resource *ptex)
+{
+ struct r600_texture *rtex = (struct r600_texture*)ptex;
+ struct r600_resource *resource = &rtex->resource;
+
+ r600_texture_reference(&rtex->flushed_depth_texture, NULL);
+
+ if (rtex->cmask_buffer != &rtex->resource) {
+ r600_resource_reference(&rtex->cmask_buffer, NULL);
+ }
+ pb_reference(&resource->buf, NULL);
+ FREE(rtex);
+}
+
+static const struct u_resource_vtbl r600_texture_vtbl;
+
+/* The number of samples can be specified independently of the texture. */
+void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned nr_samples,
+ struct r600_fmask_info *out)
+{
+ /* FMASK is allocated like an ordinary texture. */
+ struct pipe_resource templ = rtex->resource.b.b;
+ struct radeon_surf fmask = {};
+ unsigned flags, bpe;
+
+ memset(out, 0, sizeof(*out));
+
+ templ.nr_samples = 1;
+ flags = rtex->surface.flags | RADEON_SURF_FMASK;
+
+ /* Use the same parameters and tile mode. */
+ fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
+ fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
+ fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
+ fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
+
+ if (nr_samples <= 4)
+ fmask.u.legacy.bankh = 4;
+
+ switch (nr_samples) {
+ case 2:
+ case 4:
+ bpe = 1;
+ break;
+ case 8:
+ bpe = 4;
+ break;
+ default:
+ R600_ERR("Invalid sample count for FMASK allocation.\n");
+ return;
+ }
+
+ /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
+ * This can be fixed by writing a separate FMASK allocator specifically
+ * for R600-R700 asics. */
+ if (rscreen->chip_class <= R700) {
+ bpe *= 2;
+ }
+
+ if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
+ RADEON_SURF_MODE_2D, &fmask)) {
+ R600_ERR("Got error in surface_init while allocating FMASK.\n");
+ return;
+ }
+
+ assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
+
+ out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
+ if (out->slice_tile_max)
+ out->slice_tile_max -= 1;
+
+ out->tile_mode_index = fmask.u.legacy.tiling_index[0];
+ out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
+ out->bank_height = fmask.u.legacy.bankh;
+ out->tile_swizzle = fmask.tile_swizzle;
+ out->alignment = MAX2(256, fmask.surf_alignment);
+ out->size = fmask.surf_size;
+}
+
+static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ r600_texture_get_fmask_info(rscreen, rtex,
+ rtex->resource.b.b.nr_samples, &rtex->fmask);
+
+ rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
+ rtex->size = rtex->fmask.offset + rtex->fmask.size;
+}
+
+void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ struct r600_cmask_info *out)
+{
+ unsigned cmask_tile_width = 8;
+ unsigned cmask_tile_height = 8;
+ unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
+ unsigned element_bits = 4;
+ unsigned cmask_cache_bits = 1024;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
+ unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+
+ unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
+ unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
+ unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
+ unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
+ unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
+
+ unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width);
+ unsigned height = align(rtex->resource.b.b.height0, macro_tile_height);
+
+ unsigned base_align = num_pipes * pipe_interleave_bytes;
+ unsigned slice_bytes =
+ ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
+
+ assert(macro_tile_width % 128 == 0);
+ assert(macro_tile_height % 128 == 0);
+
+ out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
+ out->alignment = MAX2(256, base_align);
+ out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
+ align(slice_bytes, base_align);
+}
+
+static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+
+ rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
+ rtex->size = rtex->cmask.offset + rtex->cmask.size;
+
+ rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+}
+
+static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ if (rtex->cmask_buffer)
+ return;
+
+ assert(rtex->cmask.size == 0);
+
+ r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+
+ rtex->cmask_buffer = (struct r600_resource *)
+ r600_aligned_buffer_create(&rscreen->b,
+ R600_RESOURCE_FLAG_UNMAPPABLE,
+ PIPE_USAGE_DEFAULT,
+ rtex->cmask.size,
+ rtex->cmask.alignment);
+ if (rtex->cmask_buffer == NULL) {
+ rtex->cmask.size = 0;
+ return;
+ }
+
+ /* update colorbuffer state bits */
+ rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
+
+ rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+
+ p_atomic_inc(&rscreen->compressed_colortex_counter);
+}
+
+static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ unsigned cl_width, cl_height, width, height;
+ unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
+
+ rtex->surface.htile_size = 0;
+
+ if (rscreen->chip_class <= EVERGREEN &&
+ rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
+ return;
+
+ /* HW bug on R6xx. */
+ if (rscreen->chip_class == R600 &&
+ (rtex->resource.b.b.width0 > 7680 ||
+ rtex->resource.b.b.height0 > 7680))
+ return;
+
+ switch (num_pipes) {
+ case 1:
+ cl_width = 32;
+ cl_height = 16;
+ break;
+ case 2:
+ cl_width = 32;
+ cl_height = 32;
+ break;
+ case 4:
+ cl_width = 64;
+ cl_height = 32;
+ break;
+ case 8:
+ cl_width = 64;
+ cl_height = 64;
+ break;
+ case 16:
+ cl_width = 128;
+ cl_height = 64;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ width = align(rtex->resource.b.b.width0, cl_width * 8);
+ height = align(rtex->resource.b.b.height0, cl_height * 8);
+
+ slice_elements = (width * height) / (8 * 8);
+ slice_bytes = slice_elements * 4;
+
+ pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+ base_align = num_pipes * pipe_interleave_bytes;
+
+ rtex->surface.htile_alignment = base_align;
+ rtex->surface.htile_size =
+ (util_max_layer(&rtex->resource.b.b, 0) + 1) *
+ align(slice_bytes, base_align);
+}
+
+static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
+{
+ r600_texture_get_htile_size(rscreen, rtex);
+
+ if (!rtex->surface.htile_size)
+ return;
+
+ rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment);
+ rtex->size = rtex->htile_offset + rtex->surface.htile_size;
+}
+
+void r600_print_texture_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, struct u_log_context *log)
+{
+ int i;
+
+ /* Common parameters. */
+ u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
+ "blk_h=%u, array_size=%u, last_level=%u, "
+ "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
+ rtex->resource.b.b.width0, rtex->resource.b.b.height0,
+ rtex->resource.b.b.depth0, rtex->surface.blk_w,
+ rtex->surface.blk_h,
+ rtex->resource.b.b.array_size, rtex->resource.b.b.last_level,
+ rtex->surface.bpe, rtex->resource.b.b.nr_samples,
+ rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
+
+ u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
+ "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
+ rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
+ rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
+ rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config,
+ (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
+
+ if (rtex->fmask.size)
+ u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
+ "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
+ rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
+ rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
+ rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
+
+ if (rtex->cmask.size)
+ u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
+ "slice_tile_max=%u\n",
+ rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
+ rtex->cmask.slice_tile_max);
+
+ if (rtex->htile_offset)
+ u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", "
+ "alignment=%u\n",
+ rtex->htile_offset, rtex->surface.htile_size,
+ rtex->surface.htile_alignment);
+
+ for (i = 0; i <= rtex->resource.b.b.last_level; i++)
+ u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
+ "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+ "mode=%u, tiling_index = %u\n",
+ i, rtex->surface.u.legacy.level[i].offset,
+ rtex->surface.u.legacy.level[i].slice_size,
+ u_minify(rtex->resource.b.b.width0, i),
+ u_minify(rtex->resource.b.b.height0, i),
+ u_minify(rtex->resource.b.b.depth0, i),
+ rtex->surface.u.legacy.level[i].nblk_x,
+ rtex->surface.u.legacy.level[i].nblk_y,
+ rtex->surface.u.legacy.level[i].mode,
+ rtex->surface.u.legacy.tiling_index[i]);
+
+ if (rtex->surface.has_stencil) {
+ u_log_printf(log, " StencilLayout: tilesplit=%u\n",
+ rtex->surface.u.legacy.stencil_tile_split);
+ for (i = 0; i <= rtex->resource.b.b.last_level; i++) {
+ u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", "
+ "slice_size=%"PRIu64", npix_x=%u, "
+ "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+ "mode=%u, tiling_index = %u\n",
+ i, rtex->surface.u.legacy.stencil_level[i].offset,
+ rtex->surface.u.legacy.stencil_level[i].slice_size,
+ u_minify(rtex->resource.b.b.width0, i),
+ u_minify(rtex->resource.b.b.height0, i),
+ u_minify(rtex->resource.b.b.depth0, i),
+ rtex->surface.u.legacy.stencil_level[i].nblk_x,
+ rtex->surface.u.legacy.stencil_level[i].nblk_y,
+ rtex->surface.u.legacy.stencil_level[i].mode,
+ rtex->surface.u.legacy.stencil_tiling_index[i]);
+ }
+ }
+}
+
+/* Common processing for r600_texture_create and r600_texture_from_handle */
+static struct r600_texture *
+r600_texture_create_object(struct pipe_screen *screen,
+ const struct pipe_resource *base,
+ struct pb_buffer *buf,
+ struct radeon_surf *surface)
+{
+ struct r600_texture *rtex;
+ struct r600_resource *resource;
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+
+ rtex = CALLOC_STRUCT(r600_texture);
+ if (!rtex)
+ return NULL;
+
+ resource = &rtex->resource;
+ resource->b.b = *base;
+ resource->b.b.next = NULL;
+ resource->b.vtbl = &r600_texture_vtbl;
+ pipe_reference_init(&resource->b.b.reference, 1);
+ resource->b.b.screen = screen;
+
+ /* don't include stencil-only formats which we don't support for rendering */
+ rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
+
+ rtex->surface = *surface;
+ rtex->size = rtex->surface.surf_size;
+ rtex->db_render_format = base->format;
+
+ /* Tiled depth textures utilize the non-displayable tile order.
+ * This must be done after r600_setup_surface.
+ * Applies to R600-Cayman. */
+ rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
+ /* Applies to GCN. */
+ rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
+
+ if (rtex->is_depth) {
+ if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
+ R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
+ rscreen->chip_class >= EVERGREEN) {
+ rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
+ rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
+ } else {
+ if (rtex->resource.b.b.nr_samples <= 1 &&
+ (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
+ rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT))
+ rtex->can_sample_z = true;
+ }
+
+ if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
+ R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
+ rtex->db_compatible = true;
+
+ if (!(rscreen->debug_flags & DBG_NO_HYPERZ))
+ r600_texture_allocate_htile(rscreen, rtex);
+ }
+ } else {
+ if (base->nr_samples > 1) {
+ if (!buf) {
+ r600_texture_allocate_fmask(rscreen, rtex);
+ r600_texture_allocate_cmask(rscreen, rtex);
+ rtex->cmask_buffer = &rtex->resource;
+ }
+ if (!rtex->fmask.size || !rtex->cmask.size) {
+ FREE(rtex);
+ return NULL;
+ }
+ }
+ }
+
+ /* Now create the backing buffer. */
+ if (!buf) {
+ r600_init_resource_fields(rscreen, resource, rtex->size,
+ rtex->surface.surf_alignment);
+
+ /* Displayable surfaces are not suballocated. */
+ if (resource->b.b.bind & PIPE_BIND_SCANOUT)
+ resource->flags |= RADEON_FLAG_NO_SUBALLOC;
+
+ if (!r600_alloc_resource(rscreen, resource)) {
+ FREE(rtex);
+ return NULL;
+ }
+ } else {
+ resource->buf = buf;
+ resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
+ resource->bo_size = buf->size;
+ resource->bo_alignment = buf->alignment;
+ resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
+ if (resource->domains & RADEON_DOMAIN_VRAM)
+ resource->vram_usage = buf->size;
+ else if (resource->domains & RADEON_DOMAIN_GTT)
+ resource->gart_usage = buf->size;
+ }
+
+ if (rtex->cmask.size) {
+ /* Initialize the cmask to 0xCC (= compressed state). */
+ r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
+ rtex->cmask.offset, rtex->cmask.size,
+ 0xCCCCCCCC);
+ }
+ if (rtex->htile_offset) {
+ uint32_t clear_value = 0;
+
+ r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
+ rtex->htile_offset,
+ rtex->surface.htile_size,
+ clear_value);
+ }
+
+ /* Initialize the CMASK base register value. */
+ rtex->cmask.base_address_reg =
+ (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
+
+ if (rscreen->debug_flags & DBG_VM) {
+ fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
+ rtex->resource.gpu_address,
+ rtex->resource.gpu_address + rtex->resource.buf->size,
+ base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
+ base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
+ }
+
+ if (rscreen->debug_flags & DBG_TEX) {
+ puts("Texture:");
+ struct u_log_context log;
+ u_log_context_init(&log);
+ r600_print_texture_info(rscreen, rtex, &log);
+ u_log_new_page_print(&log, stdout);
+ fflush(stdout);
+ u_log_context_destroy(&log);
+ }
+
+ return rtex;
+}
+
+static enum radeon_surf_mode
+r600_choose_tiling(struct r600_common_screen *rscreen,
+ const struct pipe_resource *templ)
+{
+ const struct util_format_description *desc = util_format_description(templ->format);
+ bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
+ bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
+ !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
+
+ /* MSAA resources must be 2D tiled. */
+ if (templ->nr_samples > 1)
+ return RADEON_SURF_MODE_2D;
+
+ /* Transfer resources should be linear. */
+ if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
+ if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
+ (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
+ (templ->target == PIPE_TEXTURE_2D ||
+ templ->target == PIPE_TEXTURE_3D))
+ force_tiling = true;
+
+ /* Handle common candidates for the linear mode.
+ * Compressed textures and DB surfaces must always be tiled.
+ */
+ if (!force_tiling &&
+ !is_depth_stencil &&
+ !util_format_is_compressed(templ->format)) {
+ if (rscreen->debug_flags & DBG_NO_TILING)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ if (templ->bind & PIPE_BIND_LINEAR)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* Textures with a very small height are recommended to be linear. */
+ if (templ->target == PIPE_TEXTURE_1D ||
+ templ->target == PIPE_TEXTURE_1D_ARRAY ||
+ /* Only very thin and long 2D textures should benefit from
+ * linear_aligned. */
+ (templ->width0 > 8 && templ->height0 <= 2))
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ /* Textures likely to be mapped often. */
+ if (templ->usage == PIPE_USAGE_STAGING ||
+ templ->usage == PIPE_USAGE_STREAM)
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+ }
+
+ /* Make small textures 1D tiled. */
+ if (templ->width0 <= 16 || templ->height0 <= 16 ||
+ (rscreen->debug_flags & DBG_NO_2D_TILING))
+ return RADEON_SURF_MODE_1D;
+
+ /* The allocator will switch to 1D if needed. */
+ return RADEON_SURF_MODE_2D;
+}
+
+struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_surf surface = {0};
+ bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+ int r;
+
+ r = r600_init_surface(rscreen, &surface, templ,
+ r600_choose_tiling(rscreen, templ), 0, 0,
+ false, false, is_flushed_depth);
+ if (r) {
+ return NULL;
+ }
+
+ return (struct pipe_resource *)
+ r600_texture_create_object(screen, templ, NULL, &surface);
+}
+
+static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle,
+ unsigned usage)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct pb_buffer *buf = NULL;
+ unsigned stride = 0, offset = 0;
+ enum radeon_surf_mode array_mode;
+ struct radeon_surf surface = {};
+ int r;
+ struct radeon_bo_metadata metadata = {};
+ struct r600_texture *rtex;
+ bool is_scanout;
+
+ /* Support only 2D textures without mipmaps */
+ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
+ templ->depth0 != 1 || templ->last_level != 0)
+ return NULL;
+
+ buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
+ if (!buf)
+ return NULL;
+
+ rscreen->ws->buffer_get_metadata(buf, &metadata);
+ r600_surface_import_metadata(rscreen, &surface, &metadata,
+ &array_mode, &is_scanout);
+
+ r = r600_init_surface(rscreen, &surface, templ, array_mode, stride,
+ offset, true, is_scanout, false);
+ if (r) {
+ return NULL;
+ }
+
+ rtex = r600_texture_create_object(screen, templ, buf, &surface);
+ if (!rtex)
+ return NULL;
+
+ rtex->resource.b.is_shared = true;
+ rtex->resource.external_usage = usage;
+
+ if (rscreen->apply_opaque_metadata)
+ rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
+
+ assert(rtex->surface.tile_swizzle == 0);
+ return &rtex->resource.b.b;
+}
+
+bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct r600_texture **staging)
+{
+ struct r600_texture *rtex = (struct r600_texture*)texture;
+ struct pipe_resource resource;
+ struct r600_texture **flushed_depth_texture = staging ?
+ staging : &rtex->flushed_depth_texture;
+ enum pipe_format pipe_format = texture->format;
+
+ if (!staging) {
+ if (rtex->flushed_depth_texture)
+ return true; /* it's ready */
+
+ if (!rtex->can_sample_z && rtex->can_sample_s) {
+ switch (pipe_format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ /* Save memory by not allocating the S plane. */
+ pipe_format = PIPE_FORMAT_Z32_FLOAT;
+ break;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ /* Save memory bandwidth by not copying the
+ * stencil part during flush.
+ *
+ * This potentially increases memory bandwidth
+ * if an application uses both Z and S texturing
+ * simultaneously (a flushed Z24S8 texture
+ * would be stored compactly), but how often
+ * does that really happen?
+ */
+ pipe_format = PIPE_FORMAT_Z24X8_UNORM;
+ break;
+ default:;
+ }
+ } else if (!rtex->can_sample_s && rtex->can_sample_z) {
+ assert(util_format_has_stencil(util_format_description(pipe_format)));
+
+ /* DB->CB copies to an 8bpp surface don't work. */
+ pipe_format = PIPE_FORMAT_X24S8_UINT;
+ }
+ }
+
+ memset(&resource, 0, sizeof(resource));
+ resource.target = texture->target;
+ resource.format = pipe_format;
+ resource.width0 = texture->width0;
+ resource.height0 = texture->height0;
+ resource.depth0 = texture->depth0;
+ resource.array_size = texture->array_size;
+ resource.last_level = texture->last_level;
+ resource.nr_samples = texture->nr_samples;
+ resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
+ resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
+ resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+
+ if (staging)
+ resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
+
+ *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
+ if (*flushed_depth_texture == NULL) {
+ R600_ERR("failed to create temporary texture to hold flushed depth\n");
+ return false;
+ }
+
+ (*flushed_depth_texture)->non_disp_tiling = false;
+ return true;
+}
+
+/**
+ * Initialize the pipe_resource descriptor to be of the same size as the box,
+ * which is supposed to hold a subregion of the texture "orig" at the given
+ * mipmap level.
+ */
+static void r600_init_temp_resource_from_box(struct pipe_resource *res,
+ struct pipe_resource *orig,
+ const struct pipe_box *box,
+ unsigned level, unsigned flags)
+{
+ memset(res, 0, sizeof(*res));
+ res->format = orig->format;
+ res->width0 = box->width;
+ res->height0 = box->height;
+ res->depth0 = 1;
+ res->array_size = 1;
+ res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
+ res->flags = flags;
+
+ /* We must set the correct texture target and dimensions for a 3D box. */
+ if (box->depth > 1 && util_max_layer(orig, level) > 0) {
+ res->target = PIPE_TEXTURE_2D_ARRAY;
+ res->array_size = box->depth;
+ } else {
+ res->target = PIPE_TEXTURE_2D;
+ }
+}
+
+static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned transfer_usage,
+ const struct pipe_box *box)
+{
+ /* r600g doesn't react to dirty_tex_descriptor_counter */
+ return rscreen->chip_class >= SI &&
+ !rtex->resource.b.is_shared &&
+ !(transfer_usage & PIPE_TRANSFER_READ) &&
+ rtex->resource.b.b.last_level == 0 &&
+ util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
+ box->x, box->y, box->z,
+ box->width, box->height,
+ box->depth);
+}
+
+static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
+ struct r600_texture *rtex)
+{
+ struct r600_common_screen *rscreen = rctx->screen;
+
+ /* There is no point in discarding depth and tiled buffers. */
+ assert(!rtex->is_depth);
+ assert(rtex->surface.is_linear);
+
+ /* Reallocate the buffer in the same pipe_resource. */
+ r600_alloc_resource(rscreen, &rtex->resource);
+
+ /* Initialize the CMASK base address (needed even without CMASK). */
+ rtex->cmask.base_address_reg =
+ (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
+
+ p_atomic_inc(&rscreen->dirty_tex_counter);
+
+ rctx->num_alloc_tex_transfer_bytes += rtex->size;
+}
+
+static void *r600_texture_transfer_map(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_texture *rtex = (struct r600_texture*)texture;
+ struct r600_transfer *trans;
+ struct r600_resource *buf;
+ unsigned offset = 0;
+ char *map;
+ bool use_staging_texture = false;
+
+ assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
+ assert(box->width && box->height && box->depth);
+
+ /* Depth textures use staging unconditionally. */
+ if (!rtex->is_depth) {
+ /* Degrade the tile mode if we get too many transfers on APUs.
+ * On dGPUs, the staging texture is always faster.
+ * Only count uploads that are at least 4x4 pixels large.
+ */
+ if (!rctx->screen->info.has_dedicated_vram &&
+ level == 0 &&
+ box->width >= 4 && box->height >= 4 &&
+ p_atomic_inc_return(&rtex->num_level0_transfers) == 10) {
+ bool can_invalidate =
+ r600_can_invalidate_texture(rctx->screen, rtex,
+ usage, box);
+
+ r600_reallocate_texture_inplace(rctx, rtex,
+ PIPE_BIND_LINEAR,
+ can_invalidate);
+ }
+
+ /* Tiled textures need to be converted into a linear texture for CPU
+ * access. The staging texture is always linear and is placed in GART.
+ *
+ * Reading from VRAM or GTT WC is slow, always use the staging
+ * texture in this case.
+ *
+ * Use the staging texture for uploads if the underlying BO
+ * is busy.
+ */
+ if (!rtex->surface.is_linear)
+ use_staging_texture = true;
+ else if (usage & PIPE_TRANSFER_READ)
+ use_staging_texture =
+ rtex->resource.domains & RADEON_DOMAIN_VRAM ||
+ rtex->resource.flags & RADEON_FLAG_GTT_WC;
+ /* Write & linear only: */
+ else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
+ RADEON_USAGE_READWRITE) ||
+ !rctx->ws->buffer_wait(rtex->resource.buf, 0,
+ RADEON_USAGE_READWRITE)) {
+ /* It's busy. */
+ if (r600_can_invalidate_texture(rctx->screen, rtex,
+ usage, box))
+ r600_texture_invalidate_storage(rctx, rtex);
+ else
+ use_staging_texture = true;
+ }
+ }
+
+ trans = CALLOC_STRUCT(r600_transfer);
+ if (!trans)
+ return NULL;
+ pipe_resource_reference(&trans->b.b.resource, texture);
+ trans->b.b.level = level;
+ trans->b.b.usage = usage;
+ trans->b.b.box = *box;
+
+ if (rtex->is_depth) {
+ struct r600_texture *staging_depth;
+
+ if (rtex->resource.b.b.nr_samples > 1) {
+ /* MSAA depth buffers need to be converted to single sample buffers.
+ *
+ * Mapping MSAA depth buffers can occur if ReadPixels is called
+ * with a multisample GLX visual.
+ *
+ * First downsample the depth buffer to a temporary texture,
+ * then decompress the temporary one to staging.
+ *
+ * Only the region being mapped is transfered.
+ */
+ struct pipe_resource resource;
+
+ r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
+
+ if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
+ R600_ERR("failed to create temporary texture to hold untiled copy\n");
+ FREE(trans);
+ return NULL;
+ }
+
+ if (usage & PIPE_TRANSFER_READ) {
+ struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
+ if (!temp) {
+ R600_ERR("failed to create a temporary depth texture\n");
+ FREE(trans);
+ return NULL;
+ }
+
+ r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
+ rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
+ 0, 0, 0, box->depth, 0, 0);
+ pipe_resource_reference(&temp, NULL);
+ }
+
+ /* Just get the strides. */
+ r600_texture_get_offset(rctx->screen, staging_depth, level, NULL,
+ &trans->b.b.stride,
+ &trans->b.b.layer_stride);
+ } else {
+ /* XXX: only readback the rectangle which is being mapped? */
+ /* XXX: when discard is true, no need to read back from depth texture */
+ if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
+ R600_ERR("failed to create temporary texture to hold untiled copy\n");
+ FREE(trans);
+ return NULL;
+ }
+
+ rctx->blit_decompress_depth(ctx, rtex, staging_depth,
+ level, level,
+ box->z, box->z + box->depth - 1,
+ 0, 0);
+
+ offset = r600_texture_get_offset(rctx->screen, staging_depth,
+ level, box,
+ &trans->b.b.stride,
+ &trans->b.b.layer_stride);
+ }
+
+ trans->staging = (struct r600_resource*)staging_depth;
+ buf = trans->staging;
+ } else if (use_staging_texture) {
+ struct pipe_resource resource;
+ struct r600_texture *staging;
+
+ r600_init_temp_resource_from_box(&resource, texture, box, level,
+ R600_RESOURCE_FLAG_TRANSFER);
+ resource.usage = (usage & PIPE_TRANSFER_READ) ?
+ PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
+
+ /* Create the temporary texture. */
+ staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
+ if (!staging) {
+ R600_ERR("failed to create temporary texture to hold untiled copy\n");
+ FREE(trans);
+ return NULL;
+ }
+ trans->staging = &staging->resource;
+
+ /* Just get the strides. */
+ r600_texture_get_offset(rctx->screen, staging, 0, NULL,
+ &trans->b.b.stride,
+ &trans->b.b.layer_stride);
+
+ if (usage & PIPE_TRANSFER_READ)
+ r600_copy_to_staging_texture(ctx, trans);
+ else
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+
+ buf = trans->staging;
+ } else {
+ /* the resource is mapped directly */
+ offset = r600_texture_get_offset(rctx->screen, rtex, level, box,
+ &trans->b.b.stride,
+ &trans->b.b.layer_stride);
+ buf = &rtex->resource;
+ }
+
+ if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
+ r600_resource_reference(&trans->staging, NULL);
+ FREE(trans);
+ return NULL;
+ }
+
+ *ptransfer = &trans->b.b;
+ return map + offset;
+}
+
+static void r600_texture_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer* transfer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+ struct pipe_resource *texture = transfer->resource;
+ struct r600_texture *rtex = (struct r600_texture*)texture;
+
+ if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
+ if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
+ ctx->resource_copy_region(ctx, texture, transfer->level,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ &rtransfer->staging->b.b, transfer->level,
+ &transfer->box);
+ } else {
+ r600_copy_from_staging_texture(ctx, rtransfer);
+ }
+ }
+
+ if (rtransfer->staging) {
+ rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
+ r600_resource_reference(&rtransfer->staging, NULL);
+ }
+
+ /* Heuristic for {upload, draw, upload, draw, ..}:
+ *
+ * Flush the gfx IB if we've allocated too much texture storage.
+ *
+ * The idea is that we don't want to build IBs that use too much
+ * memory and put pressure on the kernel memory manager and we also
+ * want to make temporary and invalidated buffers go idle ASAP to
+ * decrease the total memory usage or make them reusable. The memory
+ * usage will be slightly higher than given here because of the buffer
+ * cache in the winsys.
+ *
+ * The result is that the kernel memory manager is never a bottleneck.
+ */
+ if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) {
+ rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+ rctx->num_alloc_tex_transfer_bytes = 0;
+ }
+
+ pipe_resource_reference(&transfer->resource, NULL);
+ FREE(transfer);
+}
+
+static const struct u_resource_vtbl r600_texture_vtbl =
+{
+ NULL, /* get_handle */
+ r600_texture_destroy, /* resource_destroy */
+ r600_texture_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ r600_texture_transfer_unmap, /* transfer_unmap */
+};
+
+struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_surface *templ,
+ unsigned width0, unsigned height0,
+ unsigned width, unsigned height)
+{
+ struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
+
+ if (!surface)
+ return NULL;
+
+ assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
+ assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
+
+ pipe_reference_init(&surface->base.reference, 1);
+ pipe_resource_reference(&surface->base.texture, texture);
+ surface->base.context = pipe;
+ surface->base.format = templ->format;
+ surface->base.width = width;
+ surface->base.height = height;
+ surface->base.u = templ->u;
+
+ surface->width0 = width0;
+ surface->height0 = height0;
+
+ return &surface->base;
+}
+
+static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *tex,
+ const struct pipe_surface *templ)
+{
+ unsigned level = templ->u.tex.level;
+ unsigned width = u_minify(tex->width0, level);
+ unsigned height = u_minify(tex->height0, level);
+ unsigned width0 = tex->width0;
+ unsigned height0 = tex->height0;
+
+ if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
+ const struct util_format_description *tex_desc
+ = util_format_description(tex->format);
+ const struct util_format_description *templ_desc
+ = util_format_description(templ->format);
+
+ assert(tex_desc->block.bits == templ_desc->block.bits);
+
+ /* Adjust size of surface if and only if the block width or
+ * height is changed. */
+ if (tex_desc->block.width != templ_desc->block.width ||
+ tex_desc->block.height != templ_desc->block.height) {
+ unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
+ unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
+
+ width = nblks_x * templ_desc->block.width;
+ height = nblks_y * templ_desc->block.height;
+
+ width0 = util_format_get_nblocksx(tex->format, width0);
+ height0 = util_format_get_nblocksy(tex->format, height0);
+ }
+ }
+
+ return r600_create_surface_custom(pipe, tex, templ,
+ width0, height0,
+ width, height);
+}
+
+static void r600_surface_destroy(struct pipe_context *pipe,
+ struct pipe_surface *surface)
+{
+ struct r600_surface *surf = (struct r600_surface*)surface;
+ r600_resource_reference(&surf->cb_buffer_fmask, NULL);
+ r600_resource_reference(&surf->cb_buffer_cmask, NULL);
+ pipe_resource_reference(&surface->texture, NULL);
+ FREE(surface);
+}
+
+static void r600_clear_texture(struct pipe_context *pipe,
+ struct pipe_resource *tex,
+ unsigned level,
+ const struct pipe_box *box,
+ const void *data)
+{
+ struct pipe_screen *screen = pipe->screen;
+ struct r600_texture *rtex = (struct r600_texture*)tex;
+ struct pipe_surface tmpl = {{0}};
+ struct pipe_surface *sf;
+ const struct util_format_description *desc =
+ util_format_description(tex->format);
+
+ tmpl.format = tex->format;
+ tmpl.u.tex.first_layer = box->z;
+ tmpl.u.tex.last_layer = box->z + box->depth - 1;
+ tmpl.u.tex.level = level;
+ sf = pipe->create_surface(pipe, tex, &tmpl);
+ if (!sf)
+ return;
+
+ if (rtex->is_depth) {
+ unsigned clear;
+ float depth;
+ uint8_t stencil = 0;
+
+ /* Depth is always present. */
+ clear = PIPE_CLEAR_DEPTH;
+ desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
+
+ if (rtex->surface.has_stencil) {
+ clear |= PIPE_CLEAR_STENCIL;
+ desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
+ }
+
+ pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
+ box->x, box->y,
+ box->width, box->height, false);
+ } else {
+ union pipe_color_union color;
+
+ /* pipe_color_union requires the full vec4 representation. */
+ if (util_format_is_pure_uint(tex->format))
+ desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
+ else if (util_format_is_pure_sint(tex->format))
+ desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
+ else
+ desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
+
+ if (screen->is_format_supported(screen, tex->format,
+ tex->target, 0,
+ PIPE_BIND_RENDER_TARGET)) {
+ pipe->clear_render_target(pipe, sf, &color,
+ box->x, box->y,
+ box->width, box->height, false);
+ } else {
+ /* Software fallback - just for R9G9B9E5_FLOAT */
+ util_clear_render_target(pipe, sf, &color,
+ box->x, box->y,
+ box->width, box->height);
+ }
+ }
+ pipe_surface_reference(&sf, NULL);
+}
+
+unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
+
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
+ return V_0280A0_SWAP_STD;
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return ~0U;
+
+ switch (desc->nr_channels) {
+ case 1:
+ if (HAS_SWIZZLE(0,X))
+ return V_0280A0_SWAP_STD; /* X___ */
+ else if (HAS_SWIZZLE(3,X))
+ return V_0280A0_SWAP_ALT_REV; /* ___X */
+ break;
+ case 2:
+ if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
+ (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
+ (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
+ return V_0280A0_SWAP_STD; /* XY__ */
+ else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
+ (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
+ (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
+ /* YX__ */
+ return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV);
+ else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
+ return V_0280A0_SWAP_ALT; /* X__Y */
+ else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
+ return V_0280A0_SWAP_ALT_REV; /* Y__X */
+ break;
+ case 3:
+ if (HAS_SWIZZLE(0,X))
+ return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD);
+ else if (HAS_SWIZZLE(0,Z))
+ return V_0280A0_SWAP_STD_REV; /* ZYX */
+ break;
+ case 4:
+ /* check the middle channels, the 1st and 4th channel can be NONE */
+ if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
+ return V_0280A0_SWAP_STD; /* XYZW */
+ } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
+ return V_0280A0_SWAP_STD_REV; /* WZYX */
+ } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
+ return V_0280A0_SWAP_ALT; /* ZYXW */
+ } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
+ /* YZWX */
+ if (desc->is_array)
+ return V_0280A0_SWAP_ALT_REV;
+ else
+ return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV);
+ }
+ break;
+ }
+ return ~0U;
+}
+
+/* FAST COLOR CLEAR */
+
+static void evergreen_set_clear_color(struct r600_texture *rtex,
+ enum pipe_format surface_format,
+ const union pipe_color_union *color)
+{
+ union util_color uc;
+
+ memset(&uc, 0, sizeof(uc));
+
+ if (rtex->surface.bpe == 16) {
+ /* DCC fast clear only:
+ * CLEAR_WORD0 = R = G = B
+ * CLEAR_WORD1 = A
+ */
+ assert(color->ui[0] == color->ui[1] &&
+ color->ui[0] == color->ui[2]);
+ uc.ui[0] = color->ui[0];
+ uc.ui[1] = color->ui[3];
+ } else if (util_format_is_pure_uint(surface_format)) {
+ util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
+ } else if (util_format_is_pure_sint(surface_format)) {
+ util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
+ } else {
+ util_pack_color(color->f, surface_format, &uc);
+ }
+
+ memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
+}
+
+void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
+ struct pipe_framebuffer_state *fb,
+ struct r600_atom *fb_state,
+ unsigned *buffers, ubyte *dirty_cbufs,
+ const union pipe_color_union *color)
+{
+ int i;
+
+ /* This function is broken in BE, so just disable this path for now */
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ return;
+#endif
+
+ if (rctx->render_cond)
+ return;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ struct r600_texture *tex;
+ unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
+
+ if (!fb->cbufs[i])
+ continue;
+
+ /* if this colorbuffer is not being cleared */
+ if (!(*buffers & clear_bit))
+ continue;
+
+ tex = (struct r600_texture *)fb->cbufs[i]->texture;
+
+ /* the clear is allowed if all layers are bound */
+ if (fb->cbufs[i]->u.tex.first_layer != 0 ||
+ fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
+ continue;
+ }
+
+ /* cannot clear mipmapped textures */
+ if (fb->cbufs[i]->texture->last_level != 0) {
+ continue;
+ }
+
+ /* only supported on tiled surfaces */
+ if (tex->surface.is_linear) {
+ continue;
+ }
+
+ /* shared textures can't use fast clear without an explicit flush,
+ * because there is no way to communicate the clear color among
+ * all clients
+ */
+ if (tex->resource.b.is_shared &&
+ !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
+ continue;
+
+ {
+ /* 128-bit formats are unusupported */
+ if (tex->surface.bpe > 8) {
+ continue;
+ }
+
+ /* ensure CMASK is enabled */
+ r600_texture_alloc_cmask_separate(rctx->screen, tex);
+ if (tex->cmask.size == 0) {
+ continue;
+ }
+
+ /* Do the fast clear. */
+ rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
+ tex->cmask.offset, tex->cmask.size, 0,
+ R600_COHERENCY_CB_META);
+
+ bool need_compressed_update = !tex->dirty_level_mask;
+
+ tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+
+ if (need_compressed_update)
+ p_atomic_inc(&rctx->screen->compressed_colortex_counter);
+ }
+
+ evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
+
+ if (dirty_cbufs)
+ *dirty_cbufs |= 1 << i;
+ rctx->set_atom_dirty(rctx, fb_state, true);
+ *buffers &= ~clear_bit;
+ }
+}
+
+static struct pipe_memory_object *
+r600_memobj_from_handle(struct pipe_screen *screen,
+ struct winsys_handle *whandle,
+ bool dedicated)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object);
+ struct pb_buffer *buf = NULL;
+ uint32_t stride, offset;
+
+ if (!memobj)
+ return NULL;
+
+ buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
+ &stride, &offset);
+ if (!buf) {
+ free(memobj);
+ return NULL;
+ }
+
+ memobj->b.dedicated = dedicated;
+ memobj->buf = buf;
+ memobj->stride = stride;
+ memobj->offset = offset;
+
+ return (struct pipe_memory_object *)memobj;
+
+}
+
+static void
+r600_memobj_destroy(struct pipe_screen *screen,
+ struct pipe_memory_object *_memobj)
+{
+ struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
+
+ pb_reference(&memobj->buf, NULL);
+ free(memobj);
+}
+
+static struct pipe_resource *
+r600_texture_from_memobj(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ struct pipe_memory_object *_memobj,
+ uint64_t offset)
+{
+ int r;
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
+ struct r600_texture *rtex;
+ struct radeon_surf surface = {};
+ struct radeon_bo_metadata metadata = {};
+ enum radeon_surf_mode array_mode;
+ bool is_scanout;
+ struct pb_buffer *buf = NULL;
+
+ if (memobj->b.dedicated) {
+ rscreen->ws->buffer_get_metadata(memobj->buf, &metadata);
+ r600_surface_import_metadata(rscreen, &surface, &metadata,
+ &array_mode, &is_scanout);
+ } else {
+ /**
+ * The bo metadata is unset for un-dedicated images. So we fall
+ * back to linear. See answer to question 5 of the
+ * VK_KHX_external_memory spec for some details.
+ *
+ * It is possible that this case isn't going to work if the
+ * surface pitch isn't correctly aligned by default.
+ *
+ * In order to support it correctly we require multi-image
+ * metadata to be syncrhonized between radv and radeonsi. The
+ * semantics of associating multiple image metadata to a memory
+ * object on the vulkan export side are not concretely defined
+ * either.
+ *
+ * All the use cases we are aware of at the moment for memory
+ * objects use dedicated allocations. So lets keep the initial
+ * implementation simple.
+ *
+ * A possible alternative is to attempt to reconstruct the
+ * tiling information when the TexParameter TEXTURE_TILING_EXT
+ * is set.
+ */
+ array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+ is_scanout = false;
+
+ }
+
+ r = r600_init_surface(rscreen, &surface, templ,
+ array_mode, memobj->stride,
+ offset, true, is_scanout,
+ false);
+ if (r)
+ return NULL;
+
+ rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface);
+ if (!rtex)
+ return NULL;
+
+ /* r600_texture_create_object doesn't increment refcount of
+ * memobj->buf, so increment it here.
+ */
+ pb_reference(&buf, memobj->buf);
+
+ rtex->resource.b.is_shared = true;
+ rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE;
+
+ if (rscreen->apply_opaque_metadata)
+ rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
+
+ return &rtex->resource.b.b;
+}
+
+void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
+{
+ rscreen->b.resource_from_handle = r600_texture_from_handle;
+ rscreen->b.resource_get_handle = r600_texture_get_handle;
+ rscreen->b.resource_from_memobj = r600_texture_from_memobj;
+ rscreen->b.memobj_create_from_handle = r600_memobj_from_handle;
+ rscreen->b.memobj_destroy = r600_memobj_destroy;
+}
+
+void r600_init_context_texture_functions(struct r600_common_context *rctx)
+{
+ rctx->b.create_surface = r600_create_surface;
+ rctx->b.surface_destroy = r600_surface_destroy;
+ rctx->b.clear_texture = r600_clear_texture;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_uvd.c mesa-17.3.3/src/gallium/drivers/r600/r600_uvd.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_uvd.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_uvd.c 2018-01-18 21:30:28.000000000 +0000
@@ -45,9 +45,9 @@
#include "vl/vl_mpeg12_decoder.h"
#include "r600_pipe.h"
-#include "radeon/radeon_video.h"
-#include "radeon/radeon_uvd.h"
-#include "radeon/radeon_vce.h"
+#include "radeon_video.h"
+#include "radeon_uvd.h"
+#include "radeon_vce.h"
#include "r600d.h"
#define R600_UVD_ENABLE_TILING 0
@@ -162,7 +162,7 @@
msg->body.decode.dt_field_mode = buf->base.interlaced;
msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.info.r600_num_banks));
- ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface, RUVD_SURFACE_TYPE_LEGACY);
+ ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
return luma->resource.buf;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r600_viewport.c mesa-17.3.3/src/gallium/drivers/r600/r600_viewport.c
--- mesa-17.2.4/src/gallium/drivers/r600/r600_viewport.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r600_viewport.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,456 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "r600_cs.h"
+#include "util/u_viewport.h"
+#include "tgsi/tgsi_scan.h"
+
+#define R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ 0x028C0C
+#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8
+#define R_02843C_PA_CL_VPORT_XSCALE 0x02843C
+
+#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250
+#define S_028250_TL_X(x) (((unsigned)(x) & 0x7FFF) << 0)
+#define G_028250_TL_X(x) (((x) >> 0) & 0x7FFF)
+#define C_028250_TL_X 0xFFFF8000
+#define S_028250_TL_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
+#define G_028250_TL_Y(x) (((x) >> 16) & 0x7FFF)
+#define C_028250_TL_Y 0x8000FFFF
+#define S_028250_WINDOW_OFFSET_DISABLE(x) (((unsigned)(x) & 0x1) << 31)
+#define G_028250_WINDOW_OFFSET_DISABLE(x) (((x) >> 31) & 0x1)
+#define C_028250_WINDOW_OFFSET_DISABLE 0x7FFFFFFF
+#define S_028254_BR_X(x) (((unsigned)(x) & 0x7FFF) << 0)
+#define G_028254_BR_X(x) (((x) >> 0) & 0x7FFF)
+#define C_028254_BR_X 0xFFFF8000
+#define S_028254_BR_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
+#define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF)
+#define C_028254_BR_Y 0x8000FFFF
+#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0
+#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4
+
+#define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
+
+static void r600_set_scissor_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *state)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ int i;
+
+ for (i = 0; i < num_scissors; i++)
+ rctx->scissors.states[start_slot + i] = state[i];
+
+ if (!rctx->scissor_enabled)
+ return;
+
+ rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+}
+
+/* Since the guard band disables clipping, we have to clip per-pixel
+ * using a scissor.
+ */
+static void r600_get_scissor_from_viewport(struct r600_common_context *rctx,
+ const struct pipe_viewport_state *vp,
+ struct r600_signed_scissor *scissor)
+{
+ float tmp, minx, miny, maxx, maxy;
+
+ /* Convert (-1, -1) and (1, 1) from clip space into window space. */
+ minx = -vp->scale[0] + vp->translate[0];
+ miny = -vp->scale[1] + vp->translate[1];
+ maxx = vp->scale[0] + vp->translate[0];
+ maxy = vp->scale[1] + vp->translate[1];
+
+ /* r600_draw_rectangle sets this. Disable the scissor. */
+ if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
+ scissor->minx = scissor->miny = 0;
+ scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx);
+ return;
+ }
+
+ /* Handle inverted viewports. */
+ if (minx > maxx) {
+ tmp = minx;
+ minx = maxx;
+ maxx = tmp;
+ }
+ if (miny > maxy) {
+ tmp = miny;
+ miny = maxy;
+ maxy = tmp;
+ }
+
+ /* Convert to integer and round up the max bounds. */
+ scissor->minx = minx;
+ scissor->miny = miny;
+ scissor->maxx = ceilf(maxx);
+ scissor->maxy = ceilf(maxy);
+}
+
+static void r600_clamp_scissor(struct r600_common_context *rctx,
+ struct pipe_scissor_state *out,
+ struct r600_signed_scissor *scissor)
+{
+ unsigned max_scissor = GET_MAX_SCISSOR(rctx);
+ out->minx = CLAMP(scissor->minx, 0, max_scissor);
+ out->miny = CLAMP(scissor->miny, 0, max_scissor);
+ out->maxx = CLAMP(scissor->maxx, 0, max_scissor);
+ out->maxy = CLAMP(scissor->maxy, 0, max_scissor);
+}
+
+static void r600_clip_scissor(struct pipe_scissor_state *out,
+ struct pipe_scissor_state *clip)
+{
+ out->minx = MAX2(out->minx, clip->minx);
+ out->miny = MAX2(out->miny, clip->miny);
+ out->maxx = MIN2(out->maxx, clip->maxx);
+ out->maxy = MIN2(out->maxy, clip->maxy);
+}
+
+static void r600_scissor_make_union(struct r600_signed_scissor *out,
+ struct r600_signed_scissor *in)
+{
+ out->minx = MIN2(out->minx, in->minx);
+ out->miny = MIN2(out->miny, in->miny);
+ out->maxx = MAX2(out->maxx, in->maxx);
+ out->maxy = MAX2(out->maxy, in->maxy);
+}
+
+void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
+ struct pipe_scissor_state *scissor)
+{
+ if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
+ if (scissor->maxx == 0)
+ scissor->minx = 1;
+ if (scissor->maxy == 0)
+ scissor->miny = 1;
+
+ if (rctx->chip_class == CAYMAN &&
+ scissor->maxx == 1 && scissor->maxy == 1)
+ scissor->maxx = 2;
+ }
+}
+
+static void r600_emit_one_scissor(struct r600_common_context *rctx,
+ struct radeon_winsys_cs *cs,
+ struct r600_signed_scissor *vp_scissor,
+ struct pipe_scissor_state *scissor)
+{
+ struct pipe_scissor_state final;
+
+ if (rctx->vs_disables_clipping_viewport) {
+ final.minx = final.miny = 0;
+ final.maxx = final.maxy = GET_MAX_SCISSOR(rctx);
+ } else {
+ r600_clamp_scissor(rctx, &final, vp_scissor);
+ }
+
+ if (scissor)
+ r600_clip_scissor(&final, scissor);
+
+ evergreen_apply_scissor_bug_workaround(rctx, &final);
+
+ radeon_emit(cs, S_028250_TL_X(final.minx) |
+ S_028250_TL_Y(final.miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(final.maxx) |
+ S_028254_BR_Y(final.maxy));
+}
+
+/* the range is [-MAX, MAX] */
+#define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384)
+
+static void r600_emit_guardband(struct r600_common_context *rctx,
+ struct r600_signed_scissor *vp_as_scissor)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_viewport_state vp;
+ float left, top, right, bottom, max_range, guardband_x, guardband_y;
+
+ /* Reconstruct the viewport transformation from the scissor. */
+ vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
+ vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
+ vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
+ vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
+
+ /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
+ if (vp_as_scissor->minx == vp_as_scissor->maxx)
+ vp.scale[0] = 0.5;
+ if (vp_as_scissor->miny == vp_as_scissor->maxy)
+ vp.scale[1] = 0.5;
+
+ /* Find the biggest guard band that is inside the supported viewport
+ * range. The guard band is specified as a horizontal and vertical
+ * distance from (0,0) in clip space.
+ *
+ * This is done by applying the inverse viewport transformation
+ * on the viewport limits to get those limits in clip space.
+ *
+ * Use a limit one pixel smaller to allow for some precision error.
+ */
+ max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1;
+ left = (-max_range - vp.translate[0]) / vp.scale[0];
+ right = ( max_range - vp.translate[0]) / vp.scale[0];
+ top = (-max_range - vp.translate[1]) / vp.scale[1];
+ bottom = ( max_range - vp.translate[1]) / vp.scale[1];
+
+ assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
+
+ guardband_x = MIN2(-left, right);
+ guardband_y = MIN2(-top, bottom);
+
+ /* If any of the GB registers is updated, all of them must be updated. */
+ if (rctx->chip_class >= CAYMAN)
+ radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+ else
+ radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
+
+ radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
+ radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
+ radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
+ radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
+}
+
+static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_scissor_state *states = rctx->scissors.states;
+ unsigned mask = rctx->scissors.dirty_mask;
+ bool scissor_enabled = rctx->scissor_enabled;
+ struct r600_signed_scissor max_vp_scissor;
+ int i;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!rctx->vs_writes_viewport_index) {
+ struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0];
+
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+ r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL);
+ r600_emit_guardband(rctx, vp);
+ rctx->scissors.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ /* Shaders can draw to any viewport. Make a union of all viewports. */
+ max_vp_scissor = rctx->viewports.as_scissor[0];
+ for (i = 1; i < R600_MAX_VIEWPORTS; i++)
+ r600_scissor_make_union(&max_vp_scissor,
+ &rctx->viewports.as_scissor[i]);
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i],
+ scissor_enabled ? &states[i] : NULL);
+ }
+ }
+ r600_emit_guardband(rctx, &max_vp_scissor);
+ rctx->scissors.dirty_mask = 0;
+}
+
+static void r600_set_viewport_states(struct pipe_context *ctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *state)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ unsigned mask;
+ int i;
+
+ for (i = 0; i < num_viewports; i++) {
+ unsigned index = start_slot + i;
+
+ rctx->viewports.states[index] = state[i];
+ r600_get_scissor_from_viewport(rctx, &state[i],
+ &rctx->viewports.as_scissor[index]);
+ }
+
+ mask = ((1 << num_viewports) - 1) << start_slot;
+ rctx->viewports.dirty_mask |= mask;
+ rctx->viewports.depth_range_dirty_mask |= mask;
+ rctx->scissors.dirty_mask |= mask;
+ rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+}
+
+static void r600_emit_one_viewport(struct r600_common_context *rctx,
+ struct pipe_viewport_state *state)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+
+ radeon_emit(cs, fui(state->scale[0]));
+ radeon_emit(cs, fui(state->translate[0]));
+ radeon_emit(cs, fui(state->scale[1]));
+ radeon_emit(cs, fui(state->translate[1]));
+ radeon_emit(cs, fui(state->scale[2]));
+ radeon_emit(cs, fui(state->translate[2]));
+}
+
+static void r600_emit_viewports(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_viewport_state *states = rctx->viewports.states;
+ unsigned mask = rctx->viewports.dirty_mask;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!rctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+ r600_emit_one_viewport(rctx, &states[0]);
+ rctx->viewports.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+ start * 4 * 6, count * 6);
+ for (i = start; i < start+count; i++)
+ r600_emit_one_viewport(rctx, &states[i]);
+ }
+ rctx->viewports.dirty_mask = 0;
+}
+
+static void r600_emit_depth_ranges(struct r600_common_context *rctx)
+{
+ struct radeon_winsys_cs *cs = rctx->gfx.cs;
+ struct pipe_viewport_state *states = rctx->viewports.states;
+ unsigned mask = rctx->viewports.depth_range_dirty_mask;
+ float zmin, zmax;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!rctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax);
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ }
+ }
+ rctx->viewports.depth_range_dirty_mask = 0;
+}
+
+static void r600_emit_viewport_states(struct r600_common_context *rctx,
+ struct r600_atom *atom)
+{
+ r600_emit_viewports(rctx);
+ r600_emit_depth_ranges(rctx);
+}
+
+/* Set viewport dependencies on pipe_rasterizer_state. */
+void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
+ bool scissor_enable, bool clip_halfz)
+{
+ if (rctx->scissor_enabled != scissor_enable) {
+ rctx->scissor_enabled = scissor_enable;
+ rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+ }
+ if (rctx->clip_halfz != clip_halfz) {
+ rctx->clip_halfz = clip_halfz;
+ rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
+ }
+}
+
+/**
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
+ struct tgsi_shader_info *info)
+{
+ bool vs_window_space;
+
+ if (!info)
+ return;
+
+ /* When the VS disables clipping and viewport transformation. */
+ vs_window_space =
+ info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+
+ if (rctx->vs_disables_clipping_viewport != vs_window_space) {
+ rctx->vs_disables_clipping_viewport = vs_window_space;
+ rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+ }
+
+ /* Viewport index handling. */
+ rctx->vs_writes_viewport_index = info->writes_viewport_index;
+ if (!rctx->vs_writes_viewport_index)
+ return;
+
+ if (rctx->scissors.dirty_mask)
+ rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
+
+ if (rctx->viewports.dirty_mask ||
+ rctx->viewports.depth_range_dirty_mask)
+ rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
+}
+
+void r600_init_viewport_functions(struct r600_common_context *rctx)
+{
+ rctx->scissors.atom.emit = r600_emit_scissors;
+ rctx->viewports.atom.emit = r600_emit_viewport_states;
+
+ rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
+ rctx->viewports.atom.num_dw = 2 + 16 * 6;
+
+ rctx->b.set_scissor_states = r600_set_scissor_states;
+ rctx->b.set_viewport_states = r600_set_viewport_states;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/r700_asm.c mesa-17.3.3/src/gallium/drivers/r600/r700_asm.c
--- mesa-17.2.4/src/gallium/drivers/r600/r700_asm.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/r700_asm.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,7 +30,8 @@
*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R700, cf->op)) |
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COUNT(count) |
- S_SQ_CF_WORD1_COUNT_3(count >> 3);
+ S_SQ_CF_WORD1_COUNT_3(count >> 3)|
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/radeon_uvd.c mesa-17.3.3/src/gallium/drivers/r600/radeon_uvd.c
--- mesa-17.2.4/src/gallium/drivers/r600/radeon_uvd.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/radeon_uvd.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,1492 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König
+ *
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_memory.h"
+#include "util/u_video.h"
+
+#include "vl/vl_defines.h"
+#include "vl/vl_mpeg12_decoder.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_uvd.h"
+
+#define NUM_BUFFERS 4
+
+#define NUM_MPEG2_REFS 6
+#define NUM_H264_REFS 17
+#define NUM_VC1_REFS 5
+
+#define FB_BUFFER_OFFSET 0x1000
+#define FB_BUFFER_SIZE 2048
+#define FB_BUFFER_SIZE_TONGA (2048 * 64)
+#define IT_SCALING_TABLE_SIZE 992
+#define UVD_SESSION_CONTEXT_SIZE (128 * 1024)
+
+/* UVD decoder representation */
+struct ruvd_decoder {
+ struct pipe_video_codec base;
+
+ ruvd_set_dtb set_dtb;
+
+ unsigned stream_handle;
+ unsigned stream_type;
+ unsigned frame_number;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys* ws;
+ struct radeon_winsys_cs* cs;
+
+ unsigned cur_buffer;
+
+ struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];
+ struct ruvd_msg *msg;
+ uint32_t *fb;
+ unsigned fb_size;
+ uint8_t *it;
+
+ struct rvid_buffer bs_buffers[NUM_BUFFERS];
+ void* bs_ptr;
+ unsigned bs_size;
+
+ struct rvid_buffer dpb;
+ bool use_legacy;
+ struct rvid_buffer ctx;
+ struct rvid_buffer sessionctx;
+ struct {
+ unsigned data0;
+ unsigned data1;
+ unsigned cmd;
+ unsigned cntl;
+ } reg;
+};
+
+/* flush IB to the hardware */
+static int flush(struct ruvd_decoder *dec, unsigned flags)
+{
+ return dec->ws->cs_flush(dec->cs, flags, NULL);
+}
+
+/* add a new set register command to the IB */
+static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
+{
+ radeon_emit(dec->cs, RUVD_PKT0(reg >> 2, 0));
+ radeon_emit(dec->cs, val);
+}
+
+/* send a command to the VCPU through the GPCOM registers */
+static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
+ struct pb_buffer* buf, uint32_t off,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain)
+{
+ int reloc_idx;
+
+ reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain,
+ RADEON_PRIO_UVD);
+ if (!dec->use_legacy) {
+ uint64_t addr;
+ addr = dec->ws->buffer_get_virtual_address(buf);
+ addr = addr + off;
+ set_reg(dec, dec->reg.data0, addr);
+ set_reg(dec, dec->reg.data1, addr >> 32);
+ } else {
+ off += dec->ws->buffer_get_reloc_offset(buf);
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+ }
+ set_reg(dec, dec->reg.cmd, cmd << 1);
+}
+
+/* do the codec needs an IT buffer ?*/
+static bool have_it(struct ruvd_decoder *dec)
+{
+ return dec->stream_type == RUVD_CODEC_H264_PERF ||
+ dec->stream_type == RUVD_CODEC_H265;
+}
+
+/* map the next available message/feedback/itscaling buffer */
+static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
+{
+ struct rvid_buffer* buf;
+ uint8_t *ptr;
+
+ /* grab the current message/feedback buffer */
+ buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+
+ /* and map it for CPU access */
+ ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+
+ /* calc buffer offsets */
+ dec->msg = (struct ruvd_msg *)ptr;
+ memset(dec->msg, 0, sizeof(*dec->msg));
+
+ dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
+ if (have_it(dec))
+ dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);
+}
+
+/* unmap and send a message command to the VCPU */
+static void send_msg_buf(struct ruvd_decoder *dec)
+{
+ struct rvid_buffer* buf;
+
+ /* ignore the request if message/feedback buffer isn't mapped */
+ if (!dec->msg || !dec->fb)
+ return;
+
+ /* grab the current message buffer */
+ buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+
+ /* unmap the buffer */
+ dec->ws->buffer_unmap(buf->res->buf);
+ dec->msg = NULL;
+ dec->fb = NULL;
+ dec->it = NULL;
+
+
+ if (dec->sessionctx.res)
+ send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER,
+ dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE,
+ RADEON_DOMAIN_VRAM);
+
+ /* and send it to the hardware */
+ send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0,
+ RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+}
+
+/* cycle to the next set of buffers */
+static void next_buffer(struct ruvd_decoder *dec)
+{
+ ++dec->cur_buffer;
+ dec->cur_buffer %= NUM_BUFFERS;
+}
+
+/* convert the profile into something UVD understands */
+static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
+{
+ switch (u_reduce_video_profile(dec->base.profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ return RUVD_CODEC_H264;
+
+ case PIPE_VIDEO_FORMAT_VC1:
+ return RUVD_CODEC_VC1;
+
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ return RUVD_CODEC_MPEG2;
+
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ return RUVD_CODEC_MPEG4;
+
+ case PIPE_VIDEO_FORMAT_HEVC:
+ return RUVD_CODEC_H265;
+
+ case PIPE_VIDEO_FORMAT_JPEG:
+ return RUVD_CODEC_MJPEG;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec)
+{
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+ unsigned max_references = dec->base.max_references + 1;
+
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
+
+ width = align (width, 16);
+ height = align (height, 16);
+ return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
+}
+
+static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic)
+{
+ unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
+ unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
+ unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
+
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+ unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
+
+ unsigned max_references = dec->base.max_references + 1;
+
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
+
+ block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
+ log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+
+ width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
+ height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
+
+ num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
+ context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
+ max_mb_address = (unsigned) ceil(height * 8 / 2048.0);
+
+ cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
+ db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
+
+ return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
+}
+
+static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
+{
+ return 16;
+}
+
+/* calculate size of reference picture buffer */
+static unsigned calc_dpb_size(struct ruvd_decoder *dec)
+{
+ unsigned width_in_mb, height_in_mb, image_size, dpb_size;
+
+ // always align them to MB size for dpb calculation
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+ // always one more for currently decoded picture
+ unsigned max_references = dec->base.max_references + 1;
+
+ // aligned size of a single frame
+ image_size = align(width, get_db_pitch_alignment(dec)) * height;
+ image_size += image_size / 2;
+ image_size = align(image_size, 1024);
+
+ // picture width & height in 16 pixel units
+ width_in_mb = width / VL_MACROBLOCK_WIDTH;
+ height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
+
+ switch (u_reduce_video_profile(dec->base.profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+ if (!dec->use_legacy) {
+ unsigned fs_in_mb = width_in_mb * height_in_mb;
+ unsigned alignment = 64, num_dpb_buffer;
+
+ if (dec->stream_type == RUVD_CODEC_H264_PERF)
+ alignment = 256;
+ switch(dec->base.level) {
+ case 30:
+ num_dpb_buffer = 8100 / fs_in_mb;
+ break;
+ case 31:
+ num_dpb_buffer = 18000 / fs_in_mb;
+ break;
+ case 32:
+ num_dpb_buffer = 20480 / fs_in_mb;
+ break;
+ case 41:
+ num_dpb_buffer = 32768 / fs_in_mb;
+ break;
+ case 42:
+ num_dpb_buffer = 34816 / fs_in_mb;
+ break;
+ case 50:
+ num_dpb_buffer = 110400 / fs_in_mb;
+ break;
+ case 51:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ default:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ }
+ num_dpb_buffer++;
+ max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
+ dpb_size = image_size * max_references;
+ if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {
+ dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);
+ dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
+ }
+ } else {
+ // the firmware seems to allways assume a minimum of ref frames
+ max_references = MAX2(NUM_H264_REFS, max_references);
+ // reference picture buffer
+ dpb_size = image_size * max_references;
+ if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {
+ // macroblock context buffer
+ dpb_size += width_in_mb * height_in_mb * max_references * 192;
+ // IT surface buffer
+ dpb_size += width_in_mb * height_in_mb * 32;
+ }
+ }
+ break;
+ }
+
+ case PIPE_VIDEO_FORMAT_HEVC:
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
+
+ width = align (width, 16);
+ height = align (height, 16);
+ if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 9) / 4, 256) * max_references;
+ else
+ dpb_size = align((align(width, get_db_pitch_alignment(dec)) * height * 3) / 2, 256) * max_references;
+ break;
+
+ case PIPE_VIDEO_FORMAT_VC1:
+ // the firmware seems to allways assume a minimum of ref frames
+ max_references = MAX2(NUM_VC1_REFS, max_references);
+
+ // reference picture buffer
+ dpb_size = image_size * max_references;
+
+ // CONTEXT_BUFFER
+ dpb_size += width_in_mb * height_in_mb * 128;
+
+ // IT surface buffer
+ dpb_size += width_in_mb * 64;
+
+ // DB surface buffer
+ dpb_size += width_in_mb * 128;
+
+ // BP
+ dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
+ break;
+
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ // reference picture buffer, must be big enough for all frames
+ dpb_size = image_size * NUM_MPEG2_REFS;
+ break;
+
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ // reference picture buffer
+ dpb_size = image_size * max_references;
+
+ // CM
+ dpb_size += width_in_mb * height_in_mb * 64;
+
+ // IT surface buffer
+ dpb_size += align(width_in_mb * height_in_mb * 32, 64);
+
+ dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
+ break;
+
+ case PIPE_VIDEO_FORMAT_JPEG:
+ dpb_size = 0;
+ break;
+
+ default:
+ // something is missing here
+ assert(0);
+
+ // at least use a sane default value
+ dpb_size = 32 * 1024 * 1024;
+ break;
+ }
+ return dpb_size;
+}
+
+/* free associated data in the video buffer callback */
+static void ruvd_destroy_associated_data(void *data)
+{
+ /* NOOP, since we only use an intptr */
+}
+
+/* get h264 specific message bits */
+static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
+{
+ struct ruvd_h264 result;
+
+ memset(&result, 0, sizeof(result));
+ switch (pic->base.profile) {
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
+ result.profile = RUVD_H264_PROFILE_BASELINE;
+ break;
+
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+ result.profile = RUVD_H264_PROFILE_MAIN;
+ break;
+
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+ result.profile = RUVD_H264_PROFILE_HIGH;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ result.level = dec->base.level;
+
+ result.sps_info_flags = 0;
+ result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
+ result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
+ result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
+ result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
+
+ result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+ result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+ result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
+ result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
+ result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+
+ switch (dec->base.chroma_format) {
+ case PIPE_VIDEO_CHROMA_FORMAT_NONE:
+ /* TODO: assert? */
+ break;
+ case PIPE_VIDEO_CHROMA_FORMAT_400:
+ result.chroma_format = 0;
+ break;
+ case PIPE_VIDEO_CHROMA_FORMAT_420:
+ result.chroma_format = 1;
+ break;
+ case PIPE_VIDEO_CHROMA_FORMAT_422:
+ result.chroma_format = 2;
+ break;
+ case PIPE_VIDEO_CHROMA_FORMAT_444:
+ result.chroma_format = 3;
+ break;
+ }
+
+ result.pps_info_flags = 0;
+ result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
+ result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
+ result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
+ result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
+ result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
+ result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
+ result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
+ result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
+
+ result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
+ result.slice_group_map_type = pic->pps->slice_group_map_type;
+ result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
+ result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
+ result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
+ result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
+
+ memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
+ memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
+
+ if (dec->stream_type == RUVD_CODEC_H264_PERF) {
+ memcpy(dec->it, result.scaling_list_4x4, 6*16);
+ memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
+ }
+
+ result.num_ref_frames = pic->num_ref_frames;
+
+ result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
+ result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
+
+ result.frame_num = pic->frame_num;
+ memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
+ result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
+ result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
+ memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
+
+ result.decoded_pic_idx = pic->frame_num;
+
+ return result;
+}
+
+/* get h265 specific message bits */
+static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,
+ struct pipe_h265_picture_desc *pic)
+{
+ struct ruvd_h265 result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+
+ result.sps_info_flags = 0;
+ result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
+ result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
+ result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
+ result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
+ result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
+ result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
+ result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
+ result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
+ result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
+ if (pic->UseRefPicList == true)
+ result.sps_info_flags |= 1 << 10;
+
+ result.chroma_format = pic->pps->sps->chroma_format_idc;
+ result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+ result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+ result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+ result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
+ result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+ result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2;
+ result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size;
+ result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
+ result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
+ result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
+ result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
+ result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
+ result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
+
+ result.pps_info_flags = 0;
+ result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
+ result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
+ result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
+ result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
+ result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
+ result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
+ result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
+ result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
+ result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
+ result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
+ result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
+ result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
+ result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
+ result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
+ result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
+ result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
+ result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
+ result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
+ result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
+ result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
+ //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
+
+ result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
+ result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
+ result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
+ result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
+ result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
+ result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
+ result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
+ result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
+ result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
+ result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
+ result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
+ result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
+ result.init_qp_minus26 = pic->pps->init_qp_minus26;
+
+ for (i = 0; i < 19; ++i)
+ result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
+
+ for (i = 0; i < 21; ++i)
+ result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
+
+ result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
+ result.curr_idx = pic->CurrPicOrderCntVal;
+ result.curr_poc = pic->CurrPicOrderCntVal;
+
+ vl_video_buffer_set_associated_data(target, &dec->base,
+ (void *)(uintptr_t)pic->CurrPicOrderCntVal,
+ &ruvd_destroy_associated_data);
+
+ for (i = 0; i < 16; ++i) {
+ struct pipe_video_buffer *ref = pic->ref[i];
+ uintptr_t ref_pic = 0;
+
+ result.poc_list[i] = pic->PicOrderCntVal[i];
+
+ if (ref)
+ ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+ else
+ ref_pic = 0x7F;
+ result.ref_pic_list[i] = ref_pic;
+ }
+
+ for (i = 0; i < 8; ++i) {
+ result.ref_pic_set_st_curr_before[i] = 0xFF;
+ result.ref_pic_set_st_curr_after[i] = 0xFF;
+ result.ref_pic_set_lt_curr[i] = 0xFF;
+ }
+
+ for (i = 0; i < pic->NumPocStCurrBefore; ++i)
+ result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
+
+ for (i = 0; i < pic->NumPocStCurrAfter; ++i)
+ result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
+
+ for (i = 0; i < pic->NumPocLtCurr; ++i)
+ result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
+
+ for (i = 0; i < 6; ++i)
+ result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
+
+ for (i = 0; i < 2; ++i)
+ result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
+
+ memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
+ memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
+ memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
+ memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+
+ for (i = 0 ; i < 2 ; i++) {
+ for (int j = 0 ; j < 15 ; j++)
+ result.direct_reflist[i][j] = pic->RefPicList[i][j];
+ }
+
+ if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
+ if (target->buffer_format == PIPE_FORMAT_P016) {
+ result.p010_mode = 1;
+ result.msb_mode = 1;
+ } else {
+ result.luma_10to8 = 5;
+ result.chroma_10to8 = 5;
+ result.sclr_luma10to8 = 4;
+ result.sclr_chroma10to8 = 4;
+ }
+ }
+
+ /* TODO
+ result.highestTid;
+ result.isNonRef;
+
+ IDRPicFlag;
+ RAPPicFlag;
+ NumPocTotalCurr;
+ NumShortTermPictureSliceHeaderBits;
+ NumLongTermPictureSliceHeaderBits;
+
+ IsLongTerm[16];
+ */
+
+ return result;
+}
+
+/* get vc1 specific message bits */
+static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
+{
+ struct ruvd_vc1 result;
+
+ memset(&result, 0, sizeof(result));
+
+ switch(pic->base.profile) {
+ case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
+ result.profile = RUVD_VC1_PROFILE_SIMPLE;
+ result.level = 1;
+ break;
+
+ case PIPE_VIDEO_PROFILE_VC1_MAIN:
+ result.profile = RUVD_VC1_PROFILE_MAIN;
+ result.level = 2;
+ break;
+
+ case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
+ result.profile = RUVD_VC1_PROFILE_ADVANCED;
+ result.level = 4;
+ break;
+
+ default:
+ assert(0);
+ }
+
+ /* fields common for all profiles */
+ result.sps_info_flags |= pic->postprocflag << 7;
+ result.sps_info_flags |= pic->pulldown << 6;
+ result.sps_info_flags |= pic->interlace << 5;
+ result.sps_info_flags |= pic->tfcntrflag << 4;
+ result.sps_info_flags |= pic->finterpflag << 3;
+ result.sps_info_flags |= pic->psf << 1;
+
+ result.pps_info_flags |= pic->range_mapy_flag << 31;
+ result.pps_info_flags |= pic->range_mapy << 28;
+ result.pps_info_flags |= pic->range_mapuv_flag << 27;
+ result.pps_info_flags |= pic->range_mapuv << 24;
+ result.pps_info_flags |= pic->multires << 21;
+ result.pps_info_flags |= pic->maxbframes << 16;
+ result.pps_info_flags |= pic->overlap << 11;
+ result.pps_info_flags |= pic->quantizer << 9;
+ result.pps_info_flags |= pic->panscan_flag << 7;
+ result.pps_info_flags |= pic->refdist_flag << 6;
+ result.pps_info_flags |= pic->vstransform << 0;
+
+ /* some fields only apply to main/advanced profile */
+ if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
+ result.pps_info_flags |= pic->syncmarker << 20;
+ result.pps_info_flags |= pic->rangered << 19;
+ result.pps_info_flags |= pic->loopfilter << 5;
+ result.pps_info_flags |= pic->fastuvmc << 4;
+ result.pps_info_flags |= pic->extended_mv << 3;
+ result.pps_info_flags |= pic->extended_dmv << 8;
+ result.pps_info_flags |= pic->dquant << 1;
+ }
+
+ result.chroma_format = 1;
+
+#if 0
+//(((unsigned int)(pPicParams->advance.reserved1)) << SPS_INFO_VC1_RESERVED_SHIFT)
+uint32_t slice_count
+uint8_t picture_type
+uint8_t frame_coding_mode
+uint8_t deblockEnable
+uint8_t pquant
+#endif
+
+ return result;
+}
+
+/* extract the frame number from a referenced video buffer */
+static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)
+{
+ uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
+ uint32_t max = MAX2(dec->frame_number, 1) - 1;
+ uintptr_t frame;
+
+ /* seems to be the most sane fallback */
+ if (!ref)
+ return max;
+
+ /* get the frame number from the associated data */
+ frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+
+ /* limit the frame number to a valid range */
+ return MAX2(MIN2(frame, max), min);
+}
+
+/* get mpeg2 specific msg bits */
+static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
+ struct pipe_mpeg12_picture_desc *pic)
+{
+ const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
+ struct ruvd_mpeg2 result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+ result.decoded_pic_idx = dec->frame_number;
+ for (i = 0; i < 2; ++i)
+ result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
+
+ result.load_intra_quantiser_matrix = 1;
+ result.load_nonintra_quantiser_matrix = 1;
+
+ for (i = 0; i < 64; ++i) {
+ result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
+ result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
+ }
+
+ result.profile_and_level_indication = 0;
+ result.chroma_format = 0x1;
+
+ result.picture_coding_type = pic->picture_coding_type;
+ result.f_code[0][0] = pic->f_code[0][0] + 1;
+ result.f_code[0][1] = pic->f_code[0][1] + 1;
+ result.f_code[1][0] = pic->f_code[1][0] + 1;
+ result.f_code[1][1] = pic->f_code[1][1] + 1;
+ result.intra_dc_precision = pic->intra_dc_precision;
+ result.pic_structure = pic->picture_structure;
+ result.top_field_first = pic->top_field_first;
+ result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
+ result.concealment_motion_vectors = pic->concealment_motion_vectors;
+ result.q_scale_type = pic->q_scale_type;
+ result.intra_vlc_format = pic->intra_vlc_format;
+ result.alternate_scan = pic->alternate_scan;
+
+ return result;
+}
+
+/* get mpeg4 specific msg bits */
+static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
+ struct pipe_mpeg4_picture_desc *pic)
+{
+ struct ruvd_mpeg4 result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+ result.decoded_pic_idx = dec->frame_number;
+ for (i = 0; i < 2; ++i)
+ result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
+
+ result.variant_type = 0;
+ result.profile_and_level_indication = 0xF0; // ASP Level0
+
+ result.video_object_layer_verid = 0x5; // advanced simple
+ result.video_object_layer_shape = 0x0; // rectangular
+
+ result.video_object_layer_width = dec->base.width;
+ result.video_object_layer_height = dec->base.height;
+
+ result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
+
+ result.flags |= pic->short_video_header << 0;
+ //result.flags |= obmc_disable << 1;
+ result.flags |= pic->interlaced << 2;
+ result.flags |= 1 << 3; // load_intra_quant_mat
+ result.flags |= 1 << 4; // load_nonintra_quant_mat
+ result.flags |= pic->quarter_sample << 5;
+ result.flags |= 1 << 6; // complexity_estimation_disable
+ result.flags |= pic->resync_marker_disable << 7;
+ //result.flags |= data_partitioned << 8;
+ //result.flags |= reversible_vlc << 9;
+ result.flags |= 0 << 10; // newpred_enable
+ result.flags |= 0 << 11; // reduced_resolution_vop_enable
+ //result.flags |= scalability << 12;
+ //result.flags |= is_object_layer_identifier << 13;
+ //result.flags |= fixed_vop_rate << 14;
+ //result.flags |= newpred_segment_type << 15;
+
+ result.quant_type = pic->quant_type;
+
+ for (i = 0; i < 64; ++i) {
+ result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
+ result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
+ }
+
+ /*
+ int32_t trd [2]
+ int32_t trb [2]
+ uint8_t vop_coding_type
+ uint8_t vop_fcode_forward
+ uint8_t vop_fcode_backward
+ uint8_t rounding_control
+ uint8_t alternate_vertical_scan_flag
+ uint8_t top_field_first
+ */
+
+ return result;
+}
+
+static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic)
+{
+ int size = 0, saved_size, len_pos, i;
+ uint16_t *bs;
+ uint8_t *buf = dec->bs_ptr;
+
+ /* SOI */
+ buf[size++] = 0xff;
+ buf[size++] = 0xd8;
+
+ /* DQT */
+ buf[size++] = 0xff;
+ buf[size++] = 0xdb;
+
+ len_pos = size++;
+ size++;
+
+ for (i = 0; i < 4; ++i) {
+ if (pic->quantization_table.load_quantiser_table[i] == 0)
+ continue;
+
+ buf[size++] = i;
+ memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);
+ size += 64;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - 4);
+
+ saved_size = size;
+
+ /* DHT */
+ buf[size++] = 0xff;
+ buf[size++] = 0xc4;
+
+ len_pos = size++;
+ size++;
+
+ for (i = 0; i < 2; ++i) {
+ if (pic->huffman_table.load_huffman_table[i] == 0)
+ continue;
+
+ buf[size++] = 0x00 | i;
+ memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
+ size += 16;
+ memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
+ size += 12;
+ }
+
+ for (i = 0; i < 2; ++i) {
+ if (pic->huffman_table.load_huffman_table[i] == 0)
+ continue;
+
+ buf[size++] = 0x10 | i;
+ memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
+ size += 16;
+ memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
+ size += 162;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ saved_size = size;
+
+ /* DRI */
+ if (pic->slice_parameter.restart_interval) {
+ buf[size++] = 0xff;
+ buf[size++] = 0xdd;
+ buf[size++] = 0x00;
+ buf[size++] = 0x04;
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->slice_parameter.restart_interval);
+ saved_size = ++size;
+ }
+
+ /* SOF */
+ buf[size++] = 0xff;
+ buf[size++] = 0xc0;
+
+ len_pos = size++;
+ size++;
+
+ buf[size++] = 0x08;
+
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->picture_parameter.picture_height);
+ size++;
+
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->picture_parameter.picture_width);
+ size++;
+
+ buf[size++] = pic->picture_parameter.num_components;
+
+ for (i = 0; i < pic->picture_parameter.num_components; ++i) {
+ buf[size++] = pic->picture_parameter.components[i].component_id;
+ buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |
+ pic->picture_parameter.components[i].v_sampling_factor;
+ buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ saved_size = size;
+
+ /* SOS */
+ buf[size++] = 0xff;
+ buf[size++] = 0xda;
+
+ len_pos = size++;
+ size++;
+
+ buf[size++] = pic->slice_parameter.num_components;
+
+ for (i = 0; i < pic->slice_parameter.num_components; ++i) {
+ buf[size++] = pic->slice_parameter.components[i].component_selector;
+ buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |
+ pic->slice_parameter.components[i].ac_table_selector;
+ }
+
+ buf[size++] = 0x00;
+ buf[size++] = 0x3f;
+ buf[size++] = 0x00;
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ dec->bs_ptr += size;
+ dec->bs_size += size;
+}
+
+/**
+ * destroy this video decoder
+ */
+static void ruvd_destroy(struct pipe_video_codec *decoder)
+{
+ struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ unsigned i;
+
+ assert(decoder);
+
+ map_msg_fb_it_buf(dec);
+ dec->msg->size = sizeof(*dec->msg);
+ dec->msg->msg_type = RUVD_MSG_DESTROY;
+ dec->msg->stream_handle = dec->stream_handle;
+ send_msg_buf(dec);
+
+ flush(dec, 0);
+
+ dec->ws->cs_destroy(dec->cs);
+
+ for (i = 0; i < NUM_BUFFERS; ++i) {
+ rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ rvid_destroy_buffer(&dec->bs_buffers[i]);
+ }
+
+ rvid_destroy_buffer(&dec->dpb);
+ rvid_destroy_buffer(&dec->ctx);
+ rvid_destroy_buffer(&dec->sessionctx);
+
+ FREE(dec);
+}
+
+/**
+ * start decoding of a new frame
+ */
+static void ruvd_begin_frame(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ uintptr_t frame;
+
+ assert(decoder);
+
+ frame = ++dec->frame_number;
+ vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
+ &ruvd_destroy_associated_data);
+
+ dec->bs_size = 0;
+ dec->bs_ptr = dec->ws->buffer_map(
+ dec->bs_buffers[dec->cur_buffer].res->buf,
+ dec->cs, PIPE_TRANSFER_WRITE);
+}
+
+/**
+ * decode a macroblock
+ */
+static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture,
+ const struct pipe_macroblock *macroblocks,
+ unsigned num_macroblocks)
+{
+ /* not supported (yet) */
+ assert(0);
+}
+
+/**
+ * decode a bitstream
+ */
+static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void * const *buffers,
+ const unsigned *sizes)
+{
+ struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ enum pipe_video_format format = u_reduce_video_profile(picture->profile);
+ unsigned i;
+
+ assert(decoder);
+
+ if (!dec->bs_ptr)
+ return;
+
+ if (format == PIPE_VIDEO_FORMAT_JPEG)
+ get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);
+
+ for (i = 0; i < num_buffers; ++i) {
+ struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
+ unsigned new_size = dec->bs_size + sizes[i];
+
+ if (format == PIPE_VIDEO_FORMAT_JPEG)
+ new_size += 2; /* save for EOI */
+
+ if (new_size > buf->res->buf->size) {
+ dec->ws->buffer_unmap(buf->res->buf);
+ if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
+ RVID_ERR("Can't resize bitstream buffer!");
+ return;
+ }
+
+ dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+ PIPE_TRANSFER_WRITE);
+ if (!dec->bs_ptr)
+ return;
+
+ dec->bs_ptr += dec->bs_size;
+ }
+
+ memcpy(dec->bs_ptr, buffers[i], sizes[i]);
+ dec->bs_size += sizes[i];
+ dec->bs_ptr += sizes[i];
+ }
+
+ if (format == PIPE_VIDEO_FORMAT_JPEG) {
+ ((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */
+ ((uint8_t *)dec->bs_ptr)[1] = 0xd9;
+ dec->bs_size += 2;
+ dec->bs_ptr += 2;
+ }
+}
+
+/**
+ * end decoding of the current frame
+ */
+static void ruvd_end_frame(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ struct pb_buffer *dt;
+ struct rvid_buffer *msg_fb_it_buf, *bs_buf;
+ unsigned bs_size;
+
+ assert(decoder);
+
+ if (!dec->bs_ptr)
+ return;
+
+ msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
+ bs_buf = &dec->bs_buffers[dec->cur_buffer];
+
+ bs_size = align(dec->bs_size, 128);
+ memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
+ dec->ws->buffer_unmap(bs_buf->res->buf);
+
+ map_msg_fb_it_buf(dec);
+ dec->msg->size = sizeof(*dec->msg);
+ dec->msg->msg_type = RUVD_MSG_DECODE;
+ dec->msg->stream_handle = dec->stream_handle;
+ dec->msg->status_report_feedback_number = dec->frame_number;
+
+ dec->msg->body.decode.stream_type = dec->stream_type;
+ dec->msg->body.decode.decode_flags = 0x1;
+ dec->msg->body.decode.width_in_samples = dec->base.width;
+ dec->msg->body.decode.height_in_samples = dec->base.height;
+
+ if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
+ (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
+ dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16;
+ dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16;
+ }
+
+ if (dec->dpb.res)
+ dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
+ dec->msg->body.decode.bsd_size = bs_size;
+ dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));
+
+ dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
+
+ switch (u_reduce_video_profile(picture->profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
+ break;
+
+ case PIPE_VIDEO_FORMAT_HEVC:
+ dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
+ if (dec->ctx.res == NULL) {
+ unsigned ctx_size;
+ if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture);
+ else
+ ctx_size = calc_ctx_size_h265_main(dec);
+ if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't allocated context buffer.\n");
+ }
+ rvid_clear_buffer(decoder->context, &dec->ctx);
+ }
+
+ if (dec->ctx.res)
+ dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size;
+ break;
+
+ case PIPE_VIDEO_FORMAT_VC1:
+ dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
+ break;
+
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);
+ break;
+
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
+ break;
+
+ case PIPE_VIDEO_FORMAT_JPEG:
+ break;
+
+ default:
+ assert(0);
+ return;
+ }
+
+ dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;
+ dec->msg->body.decode.extension_support = 0x1;
+
+ /* set at least the feedback buffer size */
+ dec->fb[0] = dec->fb_size;
+
+ send_msg_buf(dec);
+
+ if (dec->dpb.res)
+ send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+
+ if (dec->ctx.res)
+ send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0,
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+ send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf,
+ 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+ send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
+ RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
+ send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf,
+ FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
+ if (have_it(dec))
+ send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
+ FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+ set_reg(dec, dec->reg.cntl, 1);
+
+ flush(dec, RADEON_FLUSH_ASYNC);
+ next_buffer(dec);
+}
+
+/**
+ * flush any outstanding command buffers to the hardware
+ */
+static void ruvd_flush(struct pipe_video_codec *decoder)
+{
+}
+
+/**
+ * create and UVD decoder
+ */
+struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ ruvd_set_dtb set_dtb)
+{
+ struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
+ unsigned dpb_size;
+ unsigned width = templ->width, height = templ->height;
+ unsigned bs_buf_size;
+ struct radeon_info info;
+ struct ruvd_decoder *dec;
+ int r, i;
+
+ ws->query_info(ws, &info);
+
+ switch(u_reduce_video_profile(templ->profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
+ return vl_create_mpeg12_decoder(context, templ);
+
+ /* fall through */
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ width = align(width, VL_MACROBLOCK_WIDTH);
+ height = align(height, VL_MACROBLOCK_HEIGHT);
+ break;
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ width = align(width, VL_MACROBLOCK_WIDTH);
+ height = align(height, VL_MACROBLOCK_HEIGHT);
+ break;
+
+ default:
+ break;
+ }
+
+
+ dec = CALLOC_STRUCT(ruvd_decoder);
+
+ if (!dec)
+ return NULL;
+
+ if (info.drm_major < 3)
+ dec->use_legacy = true;
+
+ dec->base = *templ;
+ dec->base.context = context;
+ dec->base.width = width;
+ dec->base.height = height;
+
+ dec->base.destroy = ruvd_destroy;
+ dec->base.begin_frame = ruvd_begin_frame;
+ dec->base.decode_macroblock = ruvd_decode_macroblock;
+ dec->base.decode_bitstream = ruvd_decode_bitstream;
+ dec->base.end_frame = ruvd_end_frame;
+ dec->base.flush = ruvd_flush;
+
+ dec->stream_type = profile2stream_type(dec, info.family);
+ dec->set_dtb = set_dtb;
+ dec->stream_handle = rvid_alloc_stream_handle();
+ dec->screen = context->screen;
+ dec->ws = ws;
+ dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
+ if (!dec->cs) {
+ RVID_ERR("Can't get command submission context.\n");
+ goto error;
+ }
+
+ dec->fb_size = FB_BUFFER_SIZE;
+ bs_buf_size = width * height * (512 / (16 * 16));
+ for (i = 0; i < NUM_BUFFERS; ++i) {
+ unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size;
+ STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
+ if (have_it(dec))
+ msg_fb_it_size += IT_SCALING_TABLE_SIZE;
+ if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
+ msg_fb_it_size, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't allocated message buffers.\n");
+ goto error;
+ }
+
+ if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
+ bs_buf_size, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't allocated bitstream buffers.\n");
+ goto error;
+ }
+
+ rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
+ rvid_clear_buffer(context, &dec->bs_buffers[i]);
+ }
+
+ dpb_size = calc_dpb_size(dec);
+ if (dpb_size) {
+ if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't allocated dpb.\n");
+ goto error;
+ }
+ rvid_clear_buffer(context, &dec->dpb);
+ }
+
+ dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;
+ dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;
+ dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;
+ dec->reg.cntl = RUVD_ENGINE_CNTL;
+
+ map_msg_fb_it_buf(dec);
+ dec->msg->size = sizeof(*dec->msg);
+ dec->msg->msg_type = RUVD_MSG_CREATE;
+ dec->msg->stream_handle = dec->stream_handle;
+ dec->msg->body.create.stream_type = dec->stream_type;
+ dec->msg->body.create.width_in_samples = dec->base.width;
+ dec->msg->body.create.height_in_samples = dec->base.height;
+ dec->msg->body.create.dpb_size = dpb_size;
+ send_msg_buf(dec);
+ r = flush(dec, 0);
+ if (r)
+ goto error;
+
+ next_buffer(dec);
+
+ return &dec->base;
+
+error:
+ if (dec->cs) dec->ws->cs_destroy(dec->cs);
+
+ for (i = 0; i < NUM_BUFFERS; ++i) {
+ rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ rvid_destroy_buffer(&dec->bs_buffers[i]);
+ }
+
+ rvid_destroy_buffer(&dec->dpb);
+ rvid_destroy_buffer(&dec->ctx);
+ rvid_destroy_buffer(&dec->sessionctx);
+
+ FREE(dec);
+
+ return NULL;
+}
+
+/* calculate top/bottom offset */
+static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
+{
+ return surface->u.legacy.level[0].offset +
+ layer * surface->u.legacy.level[0].slice_size;
+}
+
+/* hw encode the aspect of macro tiles */
+static unsigned macro_tile_aspect(unsigned macro_tile_aspect)
+{
+ switch (macro_tile_aspect) {
+ default:
+ case 1: macro_tile_aspect = 0; break;
+ case 2: macro_tile_aspect = 1; break;
+ case 4: macro_tile_aspect = 2; break;
+ case 8: macro_tile_aspect = 3; break;
+ }
+ return macro_tile_aspect;
+}
+
+/* hw encode the bank width and height */
+static unsigned bank_wh(unsigned bankwh)
+{
+ switch (bankwh) {
+ default:
+ case 1: bankwh = 0; break;
+ case 2: bankwh = 1; break;
+ case 4: bankwh = 2; break;
+ case 8: bankwh = 3; break;
+ }
+ return bankwh;
+}
+
+/**
+ * fill decoding target field from the luma and chroma surfaces
+ */
+void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
+ struct radeon_surf *chroma)
+{
+ msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;
+ switch (luma->u.legacy.level[0].mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
+ break;
+ case RADEON_SURF_MODE_1D:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
+ break;
+ case RADEON_SURF_MODE_2D:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
+ if (chroma)
+ msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
+ if (msg->body.decode.dt_field_mode) {
+ msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
+ if (chroma)
+ msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
+ } else {
+ msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
+ msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
+ }
+
+ if (chroma) {
+ assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
+ assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
+ assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
+ }
+
+ msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));
+ msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));
+ msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/radeon_uvd.h mesa-17.3.3/src/gallium/drivers/r600/radeon_uvd.h
--- mesa-17.2.4/src/gallium/drivers/r600/radeon_uvd.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/radeon_uvd.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,442 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König
+ *
+ */
+
+#ifndef RADEON_UVD_H
+#define RADEON_UVD_H
+
+#include "radeon/radeon_winsys.h"
+#include "vl/vl_video_buffer.h"
+
+/* UVD uses PM4 packet type 0 and 2 */
+#define RUVD_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
+#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3)
+#define RUVD_PKT_TYPE_C 0x3FFFFFFF
+#define RUVD_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
+#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
+#define RUVD_PKT_COUNT_C 0xC000FFFF
+#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0)
+#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
+#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000
+#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count))
+#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2))
+
+/* registers involved with UVD */
+#define RUVD_GPCOM_VCPU_CMD 0xEF0C
+#define RUVD_GPCOM_VCPU_DATA0 0xEF10
+#define RUVD_GPCOM_VCPU_DATA1 0xEF14
+#define RUVD_ENGINE_CNTL 0xEF18
+
+#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c
+#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710
+#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714
+#define RUVD_ENGINE_CNTL_SOC15 0x20718
+
+/* UVD commands to VCPU */
+#define RUVD_CMD_MSG_BUFFER 0x00000000
+#define RUVD_CMD_DPB_BUFFER 0x00000001
+#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002
+#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003
+#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005
+#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100
+#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204
+#define RUVD_CMD_CONTEXT_BUFFER 0x00000206
+
+/* UVD message types */
+#define RUVD_MSG_CREATE 0
+#define RUVD_MSG_DECODE 1
+#define RUVD_MSG_DESTROY 2
+
+/* UVD stream types */
+#define RUVD_CODEC_H264 0x00000000
+#define RUVD_CODEC_VC1 0x00000001
+#define RUVD_CODEC_MPEG2 0x00000003
+#define RUVD_CODEC_MPEG4 0x00000004
+#define RUVD_CODEC_H264_PERF 0x00000007
+#define RUVD_CODEC_MJPEG 0x00000008
+#define RUVD_CODEC_H265 0x00000010
+
+/* UVD decode target buffer tiling mode */
+#define RUVD_TILE_LINEAR 0x00000000
+#define RUVD_TILE_8X4 0x00000001
+#define RUVD_TILE_8X8 0x00000002
+#define RUVD_TILE_32AS8 0x00000003
+
+/* UVD decode target buffer array mode */
+#define RUVD_ARRAY_MODE_LINEAR 0x00000000
+#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001
+#define RUVD_ARRAY_MODE_1D_THIN 0x00000002
+#define RUVD_ARRAY_MODE_2D_THIN 0x00000004
+#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004
+#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005
+
+/* UVD tile config */
+#define RUVD_BANK_WIDTH(x) ((x) << 0)
+#define RUVD_BANK_HEIGHT(x) ((x) << 3)
+#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6)
+#define RUVD_NUM_BANKS(x) ((x) << 9)
+
+/* H.264 profile definitions */
+#define RUVD_H264_PROFILE_BASELINE 0x00000000
+#define RUVD_H264_PROFILE_MAIN 0x00000001
+#define RUVD_H264_PROFILE_HIGH 0x00000002
+#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003
+#define RUVD_H264_PROFILE_MVC 0x00000004
+
+/* VC-1 profile definitions */
+#define RUVD_VC1_PROFILE_SIMPLE 0x00000000
+#define RUVD_VC1_PROFILE_MAIN 0x00000001
+#define RUVD_VC1_PROFILE_ADVANCED 0x00000002
+
+struct ruvd_mvc_element {
+ uint16_t viewOrderIndex;
+ uint16_t viewId;
+ uint16_t numOfAnchorRefsInL0;
+ uint16_t viewIdOfAnchorRefsInL0[15];
+ uint16_t numOfAnchorRefsInL1;
+ uint16_t viewIdOfAnchorRefsInL1[15];
+ uint16_t numOfNonAnchorRefsInL0;
+ uint16_t viewIdOfNonAnchorRefsInL0[15];
+ uint16_t numOfNonAnchorRefsInL1;
+ uint16_t viewIdOfNonAnchorRefsInL1[15];
+};
+
+struct ruvd_h264 {
+ uint32_t profile;
+ uint32_t level;
+
+ uint32_t sps_info_flags;
+ uint32_t pps_info_flags;
+ uint8_t chroma_format;
+ uint8_t bit_depth_luma_minus8;
+ uint8_t bit_depth_chroma_minus8;
+ uint8_t log2_max_frame_num_minus4;
+
+ uint8_t pic_order_cnt_type;
+ uint8_t log2_max_pic_order_cnt_lsb_minus4;
+ uint8_t num_ref_frames;
+ uint8_t reserved_8bit;
+
+ int8_t pic_init_qp_minus26;
+ int8_t pic_init_qs_minus26;
+ int8_t chroma_qp_index_offset;
+ int8_t second_chroma_qp_index_offset;
+
+ uint8_t num_slice_groups_minus1;
+ uint8_t slice_group_map_type;
+ uint8_t num_ref_idx_l0_active_minus1;
+ uint8_t num_ref_idx_l1_active_minus1;
+
+ uint16_t slice_group_change_rate_minus1;
+ uint16_t reserved_16bit_1;
+
+ uint8_t scaling_list_4x4[6][16];
+ uint8_t scaling_list_8x8[2][64];
+
+ uint32_t frame_num;
+ uint32_t frame_num_list[16];
+ int32_t curr_field_order_cnt_list[2];
+ int32_t field_order_cnt_list[16][2];
+
+ uint32_t decoded_pic_idx;
+
+ uint32_t curr_pic_ref_frame_num;
+
+ uint8_t ref_frame_list[16];
+
+ uint32_t reserved[122];
+
+ struct {
+ uint32_t numViews;
+ uint32_t viewId0;
+ struct ruvd_mvc_element mvcElements[1];
+ } mvc;
+};
+
+struct ruvd_h265 {
+ uint32_t sps_info_flags;
+ uint32_t pps_info_flags;
+
+ uint8_t chroma_format;
+ uint8_t bit_depth_luma_minus8;
+ uint8_t bit_depth_chroma_minus8;
+ uint8_t log2_max_pic_order_cnt_lsb_minus4;
+
+ uint8_t sps_max_dec_pic_buffering_minus1;
+ uint8_t log2_min_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_luma_coding_block_size;
+ uint8_t log2_min_transform_block_size_minus2;
+
+ uint8_t log2_diff_max_min_transform_block_size;
+ uint8_t max_transform_hierarchy_depth_inter;
+ uint8_t max_transform_hierarchy_depth_intra;
+ uint8_t pcm_sample_bit_depth_luma_minus1;
+
+ uint8_t pcm_sample_bit_depth_chroma_minus1;
+ uint8_t log2_min_pcm_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
+ uint8_t num_extra_slice_header_bits;
+
+ uint8_t num_short_term_ref_pic_sets;
+ uint8_t num_long_term_ref_pic_sps;
+ uint8_t num_ref_idx_l0_default_active_minus1;
+ uint8_t num_ref_idx_l1_default_active_minus1;
+
+ int8_t pps_cb_qp_offset;
+ int8_t pps_cr_qp_offset;
+ int8_t pps_beta_offset_div2;
+ int8_t pps_tc_offset_div2;
+
+ uint8_t diff_cu_qp_delta_depth;
+ uint8_t num_tile_columns_minus1;
+ uint8_t num_tile_rows_minus1;
+ uint8_t log2_parallel_merge_level_minus2;
+
+ uint16_t column_width_minus1[19];
+ uint16_t row_height_minus1[21];
+
+ int8_t init_qp_minus26;
+ uint8_t num_delta_pocs_ref_rps_idx;
+ uint8_t curr_idx;
+ uint8_t reserved1;
+ int32_t curr_poc;
+ uint8_t ref_pic_list[16];
+ int32_t poc_list[16];
+ uint8_t ref_pic_set_st_curr_before[8];
+ uint8_t ref_pic_set_st_curr_after[8];
+ uint8_t ref_pic_set_lt_curr[8];
+
+ uint8_t ucScalingListDCCoefSizeID2[6];
+ uint8_t ucScalingListDCCoefSizeID3[2];
+
+ uint8_t highestTid;
+ uint8_t isNonRef;
+
+ uint8_t p010_mode;
+ uint8_t msb_mode;
+ uint8_t luma_10to8;
+ uint8_t chroma_10to8;
+ uint8_t sclr_luma10to8;
+ uint8_t sclr_chroma10to8;
+
+ uint8_t direct_reflist[2][15];
+};
+
+struct ruvd_vc1 {
+ uint32_t profile;
+ uint32_t level;
+ uint32_t sps_info_flags;
+ uint32_t pps_info_flags;
+ uint32_t pic_structure;
+ uint32_t chroma_format;
+};
+
+struct ruvd_mpeg2 {
+ uint32_t decoded_pic_idx;
+ uint32_t ref_pic_idx[2];
+
+ uint8_t load_intra_quantiser_matrix;
+ uint8_t load_nonintra_quantiser_matrix;
+ uint8_t reserved_quantiser_alignement[2];
+ uint8_t intra_quantiser_matrix[64];
+ uint8_t nonintra_quantiser_matrix[64];
+
+ uint8_t profile_and_level_indication;
+ uint8_t chroma_format;
+
+ uint8_t picture_coding_type;
+
+ uint8_t reserved_1;
+
+ uint8_t f_code[2][2];
+ uint8_t intra_dc_precision;
+ uint8_t pic_structure;
+ uint8_t top_field_first;
+ uint8_t frame_pred_frame_dct;
+ uint8_t concealment_motion_vectors;
+ uint8_t q_scale_type;
+ uint8_t intra_vlc_format;
+ uint8_t alternate_scan;
+};
+
+struct ruvd_mpeg4
+{
+ uint32_t decoded_pic_idx;
+ uint32_t ref_pic_idx[2];
+
+ uint32_t variant_type;
+ uint8_t profile_and_level_indication;
+
+ uint8_t video_object_layer_verid;
+ uint8_t video_object_layer_shape;
+
+ uint8_t reserved_1;
+
+ uint16_t video_object_layer_width;
+ uint16_t video_object_layer_height;
+
+ uint16_t vop_time_increment_resolution;
+
+ uint16_t reserved_2;
+
+ uint32_t flags;
+
+ uint8_t quant_type;
+
+ uint8_t reserved_3[3];
+
+ uint8_t intra_quant_mat[64];
+ uint8_t nonintra_quant_mat[64];
+
+ struct {
+ uint8_t sprite_enable;
+
+ uint8_t reserved_4[3];
+
+ uint16_t sprite_width;
+ uint16_t sprite_height;
+ int16_t sprite_left_coordinate;
+ int16_t sprite_top_coordinate;
+
+ uint8_t no_of_sprite_warping_points;
+ uint8_t sprite_warping_accuracy;
+ uint8_t sprite_brightness_change;
+ uint8_t low_latency_sprite_enable;
+ } sprite_config;
+
+ struct {
+ uint32_t flags;
+ uint8_t vol_mode;
+ uint8_t reserved_5[3];
+ } divx_311_config;
+};
+
+/* message between driver and hardware */
+struct ruvd_msg {
+
+ uint32_t size;
+ uint32_t msg_type;
+ uint32_t stream_handle;
+ uint32_t status_report_feedback_number;
+
+ union {
+ struct {
+ uint32_t stream_type;
+ uint32_t session_flags;
+ uint32_t asic_id;
+ uint32_t width_in_samples;
+ uint32_t height_in_samples;
+ uint32_t dpb_buffer;
+ uint32_t dpb_size;
+ uint32_t dpb_model;
+ uint32_t version_info;
+ } create;
+
+ struct {
+ uint32_t stream_type;
+ uint32_t decode_flags;
+ uint32_t width_in_samples;
+ uint32_t height_in_samples;
+
+ uint32_t dpb_buffer;
+ uint32_t dpb_size;
+ uint32_t dpb_model;
+ uint32_t dpb_reserved;
+
+ uint32_t db_offset_alignment;
+ uint32_t db_pitch;
+ uint32_t db_tiling_mode;
+ uint32_t db_array_mode;
+ uint32_t db_field_mode;
+ uint32_t db_surf_tile_config;
+ uint32_t db_aligned_height;
+ uint32_t db_reserved;
+
+ uint32_t use_addr_macro;
+
+ uint32_t bsd_buffer;
+ uint32_t bsd_size;
+
+ uint32_t pic_param_buffer;
+ uint32_t pic_param_size;
+ uint32_t mb_cntl_buffer;
+ uint32_t mb_cntl_size;
+
+ uint32_t dt_buffer;
+ uint32_t dt_pitch;
+ uint32_t dt_tiling_mode;
+ uint32_t dt_array_mode;
+ uint32_t dt_field_mode;
+ uint32_t dt_luma_top_offset;
+ uint32_t dt_luma_bottom_offset;
+ uint32_t dt_chroma_top_offset;
+ uint32_t dt_chroma_bottom_offset;
+ uint32_t dt_surf_tile_config;
+ uint32_t dt_uv_surf_tile_config;
+ // re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney
+ uint32_t dt_wa_chroma_top_offset;
+ uint32_t dt_wa_chroma_bottom_offset;
+
+ uint32_t reserved[16];
+
+ union {
+ struct ruvd_h264 h264;
+ struct ruvd_h265 h265;
+ struct ruvd_vc1 vc1;
+ struct ruvd_mpeg2 mpeg2;
+ struct ruvd_mpeg4 mpeg4;
+
+ uint32_t info[768];
+ } codec;
+
+ uint8_t extension_support;
+ uint8_t reserved_8bit_1;
+ uint8_t reserved_8bit_2;
+ uint8_t reserved_8bit_3;
+ uint32_t extension_reserved[64];
+ } decode;
+ } body;
+};
+
+/* driver dependent callback */
+typedef struct pb_buffer* (*ruvd_set_dtb)
+(struct ruvd_msg* msg, struct vl_video_buffer *vb);
+
+/* create an UVD decode */
+struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templat,
+ ruvd_set_dtb set_dtb);
+
+/* fill decoding target field from the luma and chroma surfaces */
+void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
+ struct radeon_surf *chroma);
+#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/radeon_vce.c mesa-17.3.3/src/gallium/drivers/r600/radeon_vce.c
--- mesa-17.2.4/src/gallium/drivers/r600/radeon_vce.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/radeon_vce.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,533 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König
+ *
+ */
+
+#include
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
+#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
+#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
+#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
+#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
+#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
+#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
+#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
+#define FW_53 (53 << 24)
+
+/**
+ * flush commands to the hardware
+ */
+static void flush(struct rvce_encoder *enc)
+{
+ enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
+ enc->task_info_idx = 0;
+ enc->bs_idx = 0;
+}
+
+#if 0
+static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
+{
+ uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+ unsigned i = 0;
+ fprintf(stderr, "\n");
+ fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
+ fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
+ fprintf(stderr, "\n");
+ enc->ws->buffer_unmap(fb->res->buf);
+}
+#endif
+
+/**
+ * reset the CPB handling
+ */
+static void reset_cpb(struct rvce_encoder *enc)
+{
+ unsigned i;
+
+ LIST_INITHEAD(&enc->cpb_slots);
+ for (i = 0; i < enc->cpb_num; ++i) {
+ struct rvce_cpb_slot *slot = &enc->cpb_array[i];
+ slot->index = i;
+ slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP;
+ slot->frame_num = 0;
+ slot->pic_order_cnt = 0;
+ LIST_ADDTAIL(&slot->list, &enc->cpb_slots);
+ }
+}
+
+/**
+ * sort l0 and l1 to the top of the list
+ */
+static void sort_cpb(struct rvce_encoder *enc)
+{
+ struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;
+
+ LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) {
+ if (i->frame_num == enc->pic.ref_idx_l0)
+ l0 = i;
+
+ if (i->frame_num == enc->pic.ref_idx_l1)
+ l1 = i;
+
+ if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0)
+ break;
+
+ if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B &&
+ l0 && l1)
+ break;
+ }
+
+ if (l1) {
+ LIST_DEL(&l1->list);
+ LIST_ADD(&l1->list, &enc->cpb_slots);
+ }
+
+ if (l0) {
+ LIST_DEL(&l0->list);
+ LIST_ADD(&l0->list, &enc->cpb_slots);
+ }
+}
+
+/**
+ * get number of cpbs based on dpb
+ */
+static unsigned get_cpb_num(struct rvce_encoder *enc)
+{
+ unsigned w = align(enc->base.width, 16) / 16;
+ unsigned h = align(enc->base.height, 16) / 16;
+ unsigned dpb;
+
+ switch (enc->base.level) {
+ case 10:
+ dpb = 396;
+ break;
+ case 11:
+ dpb = 900;
+ break;
+ case 12:
+ case 13:
+ case 20:
+ dpb = 2376;
+ break;
+ case 21:
+ dpb = 4752;
+ break;
+ case 22:
+ case 30:
+ dpb = 8100;
+ break;
+ case 31:
+ dpb = 18000;
+ break;
+ case 32:
+ dpb = 20480;
+ break;
+ case 40:
+ case 41:
+ dpb = 32768;
+ break;
+ case 42:
+ dpb = 34816;
+ break;
+ case 50:
+ dpb = 110400;
+ break;
+ default:
+ case 51:
+ case 52:
+ dpb = 184320;
+ break;
+ }
+
+ return MIN2(dpb / (w * h), 16);
+}
+
+/**
+ * Get the slot for the currently encoded frame
+ */
+struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
+}
+
+/**
+ * Get the slot for L0
+ */
+struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
+}
+
+/**
+ * Get the slot for L1
+ */
+struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
+{
+ return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
+}
+
+/**
+ * Calculate the offsets into the CPB
+ */
+void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ signed *luma_offset, signed *chroma_offset)
+{
+ unsigned pitch, vpitch, fsize;
+
+ pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
+ vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
+ fsize = pitch * (vpitch + vpitch / 2);
+
+ *luma_offset = slot->index * fsize;
+ *chroma_offset = *luma_offset + pitch * vpitch;
+}
+
+/**
+ * destroy this video encoder
+ */
+static void rvce_destroy(struct pipe_video_codec *encoder)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ if (enc->stream_handle) {
+ struct rvid_buffer fb;
+ rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->session(enc);
+ enc->feedback(enc);
+ enc->destroy(enc);
+ flush(enc);
+ rvid_destroy_buffer(&fb);
+ }
+ rvid_destroy_buffer(&enc->cpb);
+ enc->ws->cs_destroy(enc->cs);
+ FREE(enc->cpb_array);
+ FREE(enc);
+}
+
+static void rvce_begin_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
+ struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
+
+ bool need_rate_control =
+ enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method ||
+ enc->pic.quant_i_frames != pic->quant_i_frames ||
+ enc->pic.quant_p_frames != pic->quant_p_frames ||
+ enc->pic.quant_b_frames != pic->quant_b_frames;
+
+ enc->pic = *pic;
+ get_pic_param(enc, pic);
+
+ enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
+ enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
+
+ if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+ reset_cpb(enc);
+ else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+ pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)
+ sort_cpb(enc);
+
+ if (!enc->stream_handle) {
+ struct rvid_buffer fb;
+ enc->stream_handle = rvid_alloc_stream_handle();
+ rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->fb = &fb;
+ enc->session(enc);
+ enc->create(enc);
+ enc->config(enc);
+ enc->feedback(enc);
+ flush(enc);
+ //dump_feedback(enc, &fb);
+ rvid_destroy_buffer(&fb);
+ need_rate_control = false;
+ }
+
+ if (need_rate_control) {
+ enc->session(enc);
+ enc->config(enc);
+ flush(enc);
+ }
+}
+
+static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_resource *destination,
+ void **fb)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ enc->get_buffer(destination, &enc->bs_handle, NULL);
+ enc->bs_size = destination->width0;
+
+ *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
+ if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
+ RVID_ERR("Can't create feedback buffer.\n");
+ return;
+ }
+ if (!radeon_emitted(enc->cs, 0))
+ enc->session(enc);
+ enc->encode(enc);
+ enc->feedback(enc);
+}
+
+static void rvce_end_frame(struct pipe_video_codec *encoder,
+ struct pipe_video_buffer *source,
+ struct pipe_picture_desc *picture)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ struct rvce_cpb_slot *slot = LIST_ENTRY(
+ struct rvce_cpb_slot, enc->cpb_slots.prev, list);
+
+ if (!enc->dual_inst || enc->bs_idx > 1)
+ flush(enc);
+
+ /* update the CPB backtrack with the just encoded frame */
+ slot->picture_type = enc->pic.picture_type;
+ slot->frame_num = enc->pic.frame_num;
+ slot->pic_order_cnt = enc->pic.pic_order_cnt;
+ if (!enc->pic.not_referenced) {
+ LIST_DEL(&slot->list);
+ LIST_ADD(&slot->list, &enc->cpb_slots);
+ }
+}
+
+static void rvce_get_feedback(struct pipe_video_codec *encoder,
+ void *feedback, unsigned *size)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+ struct rvid_buffer *fb = feedback;
+
+ if (size) {
+ uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+
+ if (ptr[1]) {
+ *size = ptr[4] - ptr[9];
+ } else {
+ *size = 0;
+ }
+
+ enc->ws->buffer_unmap(fb->res->buf);
+ }
+ //dump_feedback(enc, fb);
+ rvid_destroy_buffer(fb);
+ FREE(fb);
+}
+
+/**
+ * flush any outstanding command buffers to the hardware
+ */
+static void rvce_flush(struct pipe_video_codec *encoder)
+{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+
+ flush(enc);
+}
+
+static void rvce_cs_flush(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence)
+{
+ // just ignored
+}
+
+struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ struct radeon_winsys* ws,
+ rvce_get_buffer get_buffer)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
+ struct rvce_encoder *enc;
+ struct pipe_video_buffer *tmp_buf, templat = {};
+ struct radeon_surf *tmp_surf;
+ unsigned cpb_size;
+
+ if (!rscreen->info.vce_fw_version) {
+ RVID_ERR("Kernel doesn't supports VCE!\n");
+ return NULL;
+
+ } else if (!rvce_is_fw_version_supported(rscreen)) {
+ RVID_ERR("Unsupported VCE fw version loaded!\n");
+ return NULL;
+ }
+
+ enc = CALLOC_STRUCT(rvce_encoder);
+ if (!enc)
+ return NULL;
+
+ if (rscreen->info.drm_major == 3)
+ enc->use_vm = true;
+ if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) ||
+ rscreen->info.drm_major == 3)
+ enc->use_vui = true;
+
+ enc->base = *templ;
+ enc->base.context = context;
+
+ enc->base.destroy = rvce_destroy;
+ enc->base.begin_frame = rvce_begin_frame;
+ enc->base.encode_bitstream = rvce_encode_bitstream;
+ enc->base.end_frame = rvce_end_frame;
+ enc->base.flush = rvce_flush;
+ enc->base.get_feedback = rvce_get_feedback;
+ enc->get_buffer = get_buffer;
+
+ enc->screen = context->screen;
+ enc->ws = ws;
+ enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc);
+ if (!enc->cs) {
+ RVID_ERR("Can't get command submission context.\n");
+ goto error;
+ }
+
+ templat.buffer_format = PIPE_FORMAT_NV12;
+ templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+ templat.width = enc->base.width;
+ templat.height = enc->base.height;
+ templat.interlaced = false;
+ if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
+ RVID_ERR("Can't create video buffer.\n");
+ goto error;
+ }
+
+ enc->cpb_num = get_cpb_num(enc);
+ if (!enc->cpb_num)
+ goto error;
+
+ get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
+
+ cpb_size = align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+ align(tmp_surf->u.legacy.level[0].nblk_y, 32);
+
+ cpb_size = cpb_size * 3 / 2;
+ cpb_size = cpb_size * enc->cpb_num;
+ if (enc->dual_pipe)
+ cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
+ RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
+ tmp_buf->destroy(tmp_buf);
+ if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't create CPB buffer.\n");
+ goto error;
+ }
+
+ enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));
+ if (!enc->cpb_array)
+ goto error;
+
+ reset_cpb(enc);
+
+ goto error;
+
+ return &enc->base;
+
+error:
+ if (enc->cs)
+ enc->ws->cs_destroy(enc->cs);
+
+ rvid_destroy_buffer(&enc->cpb);
+
+ FREE(enc->cpb_array);
+ FREE(enc);
+ return NULL;
+}
+
+/**
+ * check if kernel has the right fw version loaded
+ */
+bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
+{
+ switch (rscreen->info.vce_fw_version) {
+ case FW_40_2_2:
+ case FW_50_0_1:
+ case FW_50_1_2:
+ case FW_50_10_2:
+ case FW_50_17_3:
+ case FW_52_0_3:
+ case FW_52_4_3:
+ case FW_52_8_3:
+ return true;
+ default:
+ if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53)
+ return true;
+ else
+ return false;
+ }
+}
+
+/**
+ * Add the buffer as relocation to the current command submission
+ */
+void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset)
+{
+ int reloc_idx;
+
+ reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
+ domain, RADEON_PRIO_VCE);
+ if (enc->use_vm) {
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RVCE_CS(addr >> 32);
+ RVCE_CS(addr);
+ } else {
+ offset += enc->ws->buffer_get_reloc_offset(buf);
+ RVCE_CS(reloc_idx * 4);
+ RVCE_CS(offset);
+ }
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/radeon_vce.h mesa-17.3.3/src/gallium/drivers/r600/radeon_vce.h
--- mesa-17.2.4/src/gallium/drivers/r600/radeon_vce.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/radeon_vce.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,462 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König
+ *
+ */
+
+#ifndef RADEON_VCE_H
+#define RADEON_VCE_H
+
+#include "util/list.h"
+
+#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
+#define RVCE_BEGIN(cmd) { \
+ uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
+ RVCE_CS(cmd)
+#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
+
+#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
+#define RVCE_MAX_AUX_BUFFER_NUM 4
+
+struct r600_common_screen;
+
+/* driver dependent callback */
+typedef void (*rvce_get_buffer)(struct pipe_resource *resource,
+ struct pb_buffer **handle,
+ struct radeon_surf **surface);
+
+/* Coded picture buffer slot */
+struct rvce_cpb_slot {
+ struct list_head list;
+
+ unsigned index;
+ enum pipe_h264_enc_picture_type picture_type;
+ unsigned frame_num;
+ unsigned pic_order_cnt;
+};
+
+struct rvce_rate_control {
+ uint32_t rc_method;
+ uint32_t target_bitrate;
+ uint32_t peak_bitrate;
+ uint32_t frame_rate_num;
+ uint32_t gop_size;
+ uint32_t quant_i_frames;
+ uint32_t quant_p_frames;
+ uint32_t quant_b_frames;
+ uint32_t vbv_buffer_size;
+ uint32_t frame_rate_den;
+ uint32_t vbv_buf_lv;
+ uint32_t max_au_size;
+ uint32_t qp_initial_mode;
+ uint32_t target_bits_picture;
+ uint32_t peak_bits_picture_integer;
+ uint32_t peak_bits_picture_fraction;
+ uint32_t min_qp;
+ uint32_t max_qp;
+ uint32_t skip_frame_enable;
+ uint32_t fill_data_enable;
+ uint32_t enforce_hrd;
+ uint32_t b_pics_delta_qp;
+ uint32_t ref_b_pics_delta_qp;
+ uint32_t rc_reinit_disable;
+ uint32_t enc_lcvbr_init_qp_flag;
+ uint32_t lcvbrsatd_based_nonlinear_bit_budget_flag;
+};
+
+struct rvce_motion_estimation {
+ uint32_t enc_ime_decimation_search;
+ uint32_t motion_est_half_pixel;
+ uint32_t motion_est_quarter_pixel;
+ uint32_t disable_favor_pmv_point;
+ uint32_t force_zero_point_center;
+ uint32_t lsmvert;
+ uint32_t enc_search_range_x;
+ uint32_t enc_search_range_y;
+ uint32_t enc_search1_range_x;
+ uint32_t enc_search1_range_y;
+ uint32_t disable_16x16_frame1;
+ uint32_t disable_satd;
+ uint32_t enable_amd;
+ uint32_t enc_disable_sub_mode;
+ uint32_t enc_ime_skip_x;
+ uint32_t enc_ime_skip_y;
+ uint32_t enc_en_ime_overw_dis_subm;
+ uint32_t enc_ime_overw_dis_subm_no;
+ uint32_t enc_ime2_search_range_x;
+ uint32_t enc_ime2_search_range_y;
+ uint32_t parallel_mode_speedup_enable;
+ uint32_t fme0_enc_disable_sub_mode;
+ uint32_t fme1_enc_disable_sub_mode;
+ uint32_t ime_sw_speedup_enable;
+};
+
+struct rvce_pic_control {
+ uint32_t enc_use_constrained_intra_pred;
+ uint32_t enc_cabac_enable;
+ uint32_t enc_cabac_idc;
+ uint32_t enc_loop_filter_disable;
+ int32_t enc_lf_beta_offset;
+ int32_t enc_lf_alpha_c0_offset;
+ uint32_t enc_crop_left_offset;
+ uint32_t enc_crop_right_offset;
+ uint32_t enc_crop_top_offset;
+ uint32_t enc_crop_bottom_offset;
+ uint32_t enc_num_mbs_per_slice;
+ uint32_t enc_intra_refresh_num_mbs_per_slot;
+ uint32_t enc_force_intra_refresh;
+ uint32_t enc_force_imb_period;
+ uint32_t enc_pic_order_cnt_type;
+ uint32_t log2_max_pic_order_cnt_lsb_minus4;
+ uint32_t enc_sps_id;
+ uint32_t enc_pps_id;
+ uint32_t enc_constraint_set_flags;
+ uint32_t enc_b_pic_pattern;
+ uint32_t weight_pred_mode_b_picture;
+ uint32_t enc_number_of_reference_frames;
+ uint32_t enc_max_num_ref_frames;
+ uint32_t enc_num_default_active_ref_l0;
+ uint32_t enc_num_default_active_ref_l1;
+ uint32_t enc_slice_mode;
+ uint32_t enc_max_slice_size;
+};
+
+struct rvce_task_info {
+ uint32_t offset_of_next_task_info;
+ uint32_t task_operation;
+ uint32_t reference_picture_dependency;
+ uint32_t collocate_flag_dependency;
+ uint32_t feedback_index;
+ uint32_t video_bitstream_ring_index;
+};
+
+struct rvce_feedback_buf_pkg {
+ uint32_t feedback_ring_address_hi;
+ uint32_t feedback_ring_address_lo;
+ uint32_t feedback_ring_size;
+};
+
+struct rvce_rdo {
+ uint32_t enc_disable_tbe_pred_i_frame;
+ uint32_t enc_disable_tbe_pred_p_frame;
+ uint32_t use_fme_interpol_y;
+ uint32_t use_fme_interpol_uv;
+ uint32_t use_fme_intrapol_y;
+ uint32_t use_fme_intrapol_uv;
+ uint32_t use_fme_interpol_y_1;
+ uint32_t use_fme_interpol_uv_1;
+ uint32_t use_fme_intrapol_y_1;
+ uint32_t use_fme_intrapol_uv_1;
+ uint32_t enc_16x16_cost_adj;
+ uint32_t enc_skip_cost_adj;
+ uint32_t enc_force_16x16_skip;
+ uint32_t enc_disable_threshold_calc_a;
+ uint32_t enc_luma_coeff_cost;
+ uint32_t enc_luma_mb_coeff_cost;
+ uint32_t enc_chroma_coeff_cost;
+};
+
+struct rvce_vui {
+ uint32_t aspect_ratio_info_present_flag;
+ uint32_t aspect_ratio_idc;
+ uint32_t sar_width;
+ uint32_t sar_height;
+ uint32_t overscan_info_present_flag;
+ uint32_t overscan_Approp_flag;
+ uint32_t video_signal_type_present_flag;
+ uint32_t video_format;
+ uint32_t video_full_range_flag;
+ uint32_t color_description_present_flag;
+ uint32_t color_prim;
+ uint32_t transfer_char;
+ uint32_t matrix_coef;
+ uint32_t chroma_loc_info_present_flag;
+ uint32_t chroma_loc_top;
+ uint32_t chroma_loc_bottom;
+ uint32_t timing_info_present_flag;
+ uint32_t num_units_in_tick;
+ uint32_t time_scale;
+ uint32_t fixed_frame_rate_flag;
+ uint32_t nal_hrd_parameters_present_flag;
+ uint32_t cpb_cnt_minus1;
+ uint32_t bit_rate_scale;
+ uint32_t cpb_size_scale;
+ uint32_t bit_rate_value_minus;
+ uint32_t cpb_size_value_minus;
+ uint32_t cbr_flag;
+ uint32_t initial_cpb_removal_delay_length_minus1;
+ uint32_t cpb_removal_delay_length_minus1;
+ uint32_t dpb_output_delay_length_minus1;
+ uint32_t time_offset_length;
+ uint32_t low_delay_hrd_flag;
+ uint32_t pic_struct_present_flag;
+ uint32_t bitstream_restriction_present_flag;
+ uint32_t motion_vectors_over_pic_boundaries_flag;
+ uint32_t max_bytes_per_pic_denom;
+ uint32_t max_bits_per_mb_denom;
+ uint32_t log2_max_mv_length_hori;
+ uint32_t log2_max_mv_length_vert;
+ uint32_t num_reorder_frames;
+ uint32_t max_dec_frame_buffering;
+};
+
+struct rvce_enc_operation {
+ uint32_t insert_headers;
+ uint32_t picture_structure;
+ uint32_t allowed_max_bitstream_size;
+ uint32_t force_refresh_map;
+ uint32_t insert_aud;
+ uint32_t end_of_sequence;
+ uint32_t end_of_stream;
+ uint32_t input_picture_luma_address_hi;
+ uint32_t input_picture_luma_address_lo;
+ uint32_t input_picture_chroma_address_hi;
+ uint32_t input_picture_chroma_address_lo;
+ uint32_t enc_input_frame_y_pitch;
+ uint32_t enc_input_pic_luma_pitch;
+ uint32_t enc_input_pic_chroma_pitch;;
+ uint32_t enc_input_pic_addr_array;
+ uint32_t enc_input_pic_addr_array_disable2pipe_disablemboffload;
+ uint32_t enc_input_pic_tile_config;
+ uint32_t enc_pic_type;
+ uint32_t enc_idr_flag;
+ uint32_t enc_idr_pic_id;
+ uint32_t enc_mgs_key_pic;
+ uint32_t enc_reference_flag;
+ uint32_t enc_temporal_layer_index;
+ uint32_t num_ref_idx_active_override_flag;
+ uint32_t num_ref_idx_l0_active_minus1;
+ uint32_t num_ref_idx_l1_active_minus1;
+ uint32_t enc_ref_list_modification_op;
+ uint32_t enc_ref_list_modification_num;
+ uint32_t enc_decoded_picture_marking_op;
+ uint32_t enc_decoded_picture_marking_num;
+ uint32_t enc_decoded_picture_marking_idx;
+ uint32_t enc_decoded_ref_base_picture_marking_op;
+ uint32_t enc_decoded_ref_base_picture_marking_num;
+ uint32_t l0_picture_structure;
+ uint32_t l0_enc_pic_type;
+ uint32_t l0_frame_number;
+ uint32_t l0_picture_order_count;
+ uint32_t l0_luma_offset;
+ uint32_t l0_chroma_offset;
+ uint32_t l1_picture_structure;
+ uint32_t l1_enc_pic_type;
+ uint32_t l1_frame_number;
+ uint32_t l1_picture_order_count;
+ uint32_t l1_luma_offset;
+ uint32_t l1_chroma_offset;
+ uint32_t enc_reconstructed_luma_offset;
+ uint32_t enc_reconstructed_chroma_offset;;
+ uint32_t enc_coloc_buffer_offset;
+ uint32_t enc_reconstructed_ref_base_picture_luma_offset;
+ uint32_t enc_reconstructed_ref_base_picture_chroma_offset;
+ uint32_t enc_reference_ref_base_picture_luma_offset;
+ uint32_t enc_reference_ref_base_picture_chroma_offset;
+ uint32_t picture_count;
+ uint32_t frame_number;
+ uint32_t picture_order_count;
+ uint32_t num_i_pic_remain_in_rcgop;
+ uint32_t num_p_pic_remain_in_rcgop;
+ uint32_t num_b_pic_remain_in_rcgop;
+ uint32_t num_ir_pic_remain_in_rcgop;
+ uint32_t enable_intra_refresh;
+ uint32_t aq_variance_en;
+ uint32_t aq_block_size;
+ uint32_t aq_mb_variance_sel;
+ uint32_t aq_frame_variance_sel;
+ uint32_t aq_param_a;
+ uint32_t aq_param_b;
+ uint32_t aq_param_c;
+ uint32_t aq_param_d;
+ uint32_t aq_param_e;
+ uint32_t context_in_sfb;
+};
+
+struct rvce_enc_create {
+ uint32_t enc_use_circular_buffer;
+ uint32_t enc_profile;
+ uint32_t enc_level;
+ uint32_t enc_pic_struct_restriction;
+ uint32_t enc_image_width;
+ uint32_t enc_image_height;
+ uint32_t enc_ref_pic_luma_pitch;
+ uint32_t enc_ref_pic_chroma_pitch;
+ uint32_t enc_ref_y_height_in_qw;
+ uint32_t enc_ref_pic_addr_array_enc_pic_struct_restriction_disable_rdo;
+ uint32_t enc_pre_encode_context_buffer_offset;
+ uint32_t enc_pre_encode_input_luma_buffer_offset;
+ uint32_t enc_pre_encode_input_chroma_buffer_offset;
+ uint32_t enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity;
+};
+
+struct rvce_config_ext {
+ uint32_t enc_enable_perf_logging;
+};
+
+struct rvce_h264_enc_pic {
+ struct rvce_rate_control rc;
+ struct rvce_motion_estimation me;
+ struct rvce_pic_control pc;
+ struct rvce_task_info ti;
+ struct rvce_feedback_buf_pkg fb;
+ struct rvce_rdo rdo;
+ struct rvce_vui vui;
+ struct rvce_enc_operation eo;
+ struct rvce_enc_create ec;
+ struct rvce_config_ext ce;
+
+ unsigned quant_i_frames;
+ unsigned quant_p_frames;
+ unsigned quant_b_frames;
+
+ enum pipe_h264_enc_picture_type picture_type;
+ unsigned frame_num;
+ unsigned frame_num_cnt;
+ unsigned p_remain;
+ unsigned i_remain;
+ unsigned idr_pic_id;
+ unsigned gop_cnt;
+ unsigned gop_size;
+ unsigned pic_order_cnt;
+ unsigned ref_idx_l0;
+ unsigned ref_idx_l1;
+ unsigned addrmode_arraymode_disrdo_distwoinstants;
+
+ bool not_referenced;
+ bool is_idr;
+ bool has_ref_pic_list;
+ bool enable_vui;
+ unsigned int ref_pic_list_0[32];
+ unsigned int ref_pic_list_1[32];
+ unsigned int frame_idx[32];
+};
+
+/* VCE encoder representation */
+struct rvce_encoder {
+ struct pipe_video_codec base;
+
+ /* version specific packets */
+ void (*session)(struct rvce_encoder *enc);
+ void (*create)(struct rvce_encoder *enc);
+ void (*feedback)(struct rvce_encoder *enc);
+ void (*rate_control)(struct rvce_encoder *enc);
+ void (*config_extension)(struct rvce_encoder *enc);
+ void (*pic_control)(struct rvce_encoder *enc);
+ void (*motion_estimation)(struct rvce_encoder *enc);
+ void (*rdo)(struct rvce_encoder *enc);
+ void (*vui)(struct rvce_encoder *enc);
+ void (*config)(struct rvce_encoder *enc);
+ void (*encode)(struct rvce_encoder *enc);
+ void (*destroy)(struct rvce_encoder *enc);
+ void (*task_info)(struct rvce_encoder *enc, uint32_t op,
+ uint32_t dep, uint32_t fb_idx,
+ uint32_t ring_idx);
+
+ unsigned stream_handle;
+
+ struct pipe_screen *screen;
+ struct radeon_winsys* ws;
+ struct radeon_winsys_cs* cs;
+
+ rvce_get_buffer get_buffer;
+
+ struct pb_buffer* handle;
+ struct radeon_surf* luma;
+ struct radeon_surf* chroma;
+
+ struct pb_buffer* bs_handle;
+ unsigned bs_size;
+
+ struct rvce_cpb_slot *cpb_array;
+ struct list_head cpb_slots;
+ unsigned cpb_num;
+
+ struct rvid_buffer *fb;
+ struct rvid_buffer cpb;
+ struct pipe_h264_enc_picture_desc pic;
+ struct rvce_h264_enc_pic enc_pic;
+
+ unsigned task_info_idx;
+ unsigned bs_idx;
+
+ bool use_vm;
+ bool use_vui;
+ bool dual_pipe;
+ bool dual_inst;
+};
+
+/* CPB handling functions */
+struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
+void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ signed *luma_offset, signed *chroma_offset);
+
+struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templat,
+ struct radeon_winsys* ws,
+ rvce_get_buffer get_buffer);
+
+bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
+
+void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset);
+
+/* init vce fw 40.2.2 specific callbacks */
+void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
+
+/* init vce fw 50 specific callbacks */
+void radeon_vce_50_init(struct rvce_encoder *enc);
+
+/* init vce fw 52 specific callbacks */
+void radeon_vce_52_init(struct rvce_encoder *enc);
+
+/* version specific function for getting parameters */
+void (*get_pic_param)(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 40.2.2 */
+void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 50 */
+void radeon_vce_50_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
+
+/* get parameters for vce 52 */
+void radeon_vce_52_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
+
+#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/radeon_video.c mesa-17.3.3/src/gallium/drivers/r600/radeon_video.c
--- mesa-17.2.4/src/gallium/drivers/r600/radeon_video.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/radeon_video.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,349 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König
+ *
+ */
+
+#include
+
+#include "util/u_memory.h"
+#include "util/u_video.h"
+
+#include "vl/vl_defines.h"
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
+
+/* generate an stream handle */
+unsigned rvid_alloc_stream_handle()
+{
+ static unsigned counter = 0;
+ unsigned stream_handle = 0;
+ unsigned pid = getpid();
+ int i;
+
+ for (i = 0; i < 32; ++i)
+ stream_handle |= ((pid >> i) & 1) << (31 - i);
+
+ stream_handle ^= ++counter;
+ return stream_handle;
+}
+
+/* create a buffer in the winsys */
+bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
+ unsigned size, unsigned usage)
+{
+ memset(buffer, 0, sizeof(*buffer));
+ buffer->usage = usage;
+
+ /* Hardware buffer placement restrictions require the kernel to be
+ * able to move buffers around individually, so request a
+ * non-sub-allocated buffer.
+ */
+ buffer->res = (struct r600_resource *)
+ pipe_buffer_create(screen, PIPE_BIND_SHARED,
+ usage, size);
+
+ return buffer->res != NULL;
+}
+
+/* destroy a buffer */
+void rvid_destroy_buffer(struct rvid_buffer *buffer)
+{
+ r600_resource_reference(&buffer->res, NULL);
+}
+
+/* reallocate a buffer, preserving its content */
+bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+ struct rvid_buffer *new_buf, unsigned new_size)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ struct radeon_winsys* ws = rscreen->ws;
+ unsigned bytes = MIN2(new_buf->res->buf->size, new_size);
+ struct rvid_buffer old_buf = *new_buf;
+ void *src = NULL, *dst = NULL;
+
+ if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
+ goto error;
+
+ src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+ if (!src)
+ goto error;
+
+ dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+ if (!dst)
+ goto error;
+
+ memcpy(dst, src, bytes);
+ if (new_size > bytes) {
+ new_size -= bytes;
+ dst += bytes;
+ memset(dst, 0, new_size);
+ }
+ ws->buffer_unmap(new_buf->res->buf);
+ ws->buffer_unmap(old_buf.res->buf);
+ rvid_destroy_buffer(&old_buf);
+ return true;
+
+error:
+ if (src)
+ ws->buffer_unmap(old_buf.res->buf);
+ rvid_destroy_buffer(new_buf);
+ *new_buf = old_buf;
+ return false;
+}
+
+/* clear the buffer with zeros */
+void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
+{
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
+
+ rctx->dma_clear_buffer(context, &buffer->res->b.b, 0,
+ buffer->res->buf->size, 0);
+ context->flush(context, NULL, 0);
+}
+
+/**
+ * join surfaces into the same buffer with identical tiling params
+ * sumup their sizes and replace the backend buffers with a single bo
+ */
+void rvid_join_surfaces(struct r600_common_context *rctx,
+ struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+ struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
+{
+ struct radeon_winsys* ws;
+ unsigned best_tiling, best_wh, off;
+ unsigned size, alignment;
+ struct pb_buffer *pb;
+ unsigned i, j;
+
+ ws = rctx->ws;
+
+ for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
+ unsigned wh;
+
+ if (!surfaces[i])
+ continue;
+
+ /* choose the smallest bank w/h for now */
+ wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
+ if (wh < best_wh) {
+ best_wh = wh;
+ best_tiling = i;
+ }
+ }
+
+ for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (!surfaces[i])
+ continue;
+
+ /* adjust the texture layer offsets */
+ off = align(off, surfaces[i]->surf_alignment);
+
+ /* copy the tiling parameters */
+ surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
+ surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
+ surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea;
+ surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split;
+
+ for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
+ surfaces[i]->u.legacy.level[j].offset += off;
+
+ off += surfaces[i]->surf_size;
+ }
+
+ for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (!buffers[i] || !*buffers[i])
+ continue;
+
+ size = align(size, (*buffers[i])->alignment);
+ size += (*buffers[i])->size;
+ alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
+ }
+
+ if (!size)
+ return;
+
+ /* TODO: 2D tiling workaround */
+ alignment *= 2;
+
+ pb = ws->buffer_create(ws, size, alignment, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_GTT_WC);
+ if (!pb)
+ return;
+
+ for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (!buffers[i] || !*buffers[i])
+ continue;
+
+ pb_reference(buffers[i], pb);
+ }
+
+ pb_reference(&pb, NULL);
+}
+
+int rvid_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ enum pipe_video_format codec = u_reduce_video_profile(profile);
+ struct radeon_info info;
+
+ rscreen->ws->query_info(rscreen->ws, &info);
+
+ if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
+ rvce_is_fw_version_supported(rscreen);
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ return 2048;
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return 1152;
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ return PIPE_FORMAT_NV12;
+ case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ return false;
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ return false;
+ case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+ return true;
+ case PIPE_VIDEO_CAP_STACKED_FRAMES:
+ return 1;
+ default:
+ return 0;
+ }
+ }
+
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ switch (codec) {
+ case PIPE_VIDEO_FORMAT_MPEG12:
+ return profile != PIPE_VIDEO_PROFILE_MPEG1;
+ case PIPE_VIDEO_FORMAT_MPEG4:
+ /* no support for MPEG4 on older hw */
+ return rscreen->family >= CHIP_PALM;
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+ return true;
+ case PIPE_VIDEO_FORMAT_VC1:
+ return true;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ return false;
+ case PIPE_VIDEO_FORMAT_JPEG:
+ return false;
+ default:
+ return false;
+ }
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ return 2048;
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return 1152;
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ return PIPE_FORMAT_P016;
+ else
+ return PIPE_FORMAT_NV12;
+
+ case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ if (rscreen->family < CHIP_PALM) {
+ /* MPEG2 only with shaders and no support for
+ interlacing on R6xx style UVD */
+ return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
+ rscreen->family > CHIP_RV770;
+ } else {
+ enum pipe_video_format format = u_reduce_video_profile(profile);
+
+ if (format == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The firmware doesn't support interlaced HEVC.
+ else if (format == PIPE_VIDEO_FORMAT_JPEG)
+ return false;
+ return true;
+ }
+ case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+ return true;
+ case PIPE_VIDEO_CAP_MAX_LEVEL:
+ switch (profile) {
+ case PIPE_VIDEO_PROFILE_MPEG1:
+ return 0;
+ case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+ case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+ return 3;
+ case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
+ return 3;
+ case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
+ return 5;
+ case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
+ return 1;
+ case PIPE_VIDEO_PROFILE_VC1_MAIN:
+ return 2;
+ case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
+ return 4;
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+ return 41;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
+ return 186;
+ default:
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+boolean rvid_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint)
+{
+ /* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
+ if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
+ return (format == PIPE_FORMAT_NV12) ||
+ (format == PIPE_FORMAT_P016);
+
+ /* we can only handle this one with UVD */
+ if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
+ return format == PIPE_FORMAT_NV12;
+
+ return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/radeon_video.h mesa-17.3.3/src/gallium/drivers/r600/radeon_video.h
--- mesa-17.2.4/src/gallium/drivers/r600/radeon_video.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/radeon_video.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König
+ *
+ */
+
+#ifndef RADEON_VIDEO_H
+#define RADEON_VIDEO_H
+
+#include "radeon/radeon_winsys.h"
+#include "vl/vl_video_buffer.h"
+
+#define RVID_ERR(fmt, args...) \
+ fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
+
+/* video buffer representation */
+struct rvid_buffer
+{
+ unsigned usage;
+ struct r600_resource *res;
+};
+
+/* generate an stream handle */
+unsigned rvid_alloc_stream_handle(void);
+
+/* create a buffer in the winsys */
+bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
+ unsigned size, unsigned usage);
+
+/* destroy a buffer */
+void rvid_destroy_buffer(struct rvid_buffer *buffer);
+
+/* reallocate a buffer, preserving its content */
+bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+ struct rvid_buffer *new_buf, unsigned new_size);
+
+/* clear the buffer with zeros */
+void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
+
+/* join surfaces into the same buffer with identical tiling params
+ sumup their sizes and replace the backend buffers with a single bo */
+void rvid_join_surfaces(struct r600_common_context *rctx,
+ struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+ struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
+
+/* returns supported codecs and other parameters */
+int rvid_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param);
+
+/* the hardware only supports NV12 */
+boolean rvid_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint);
+
+#endif // RADEON_VIDEO_H
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp mesa-17.3.3/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
--- mesa-17.2.4/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -933,6 +933,11 @@
cf_node *c = static_cast(*I);
if (c->jump_after_target) {
+ if (c->jump_target->next == NULL) {
+ c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
+ if (last_cf == c->jump_target)
+ last_cf = static_cast(c->jump_target->next);
+ }
c->jump_target = static_cast(c->jump_target->next);
c->jump_after_target = false;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/sb/sb_expr.cpp mesa-17.3.3/src/gallium/drivers/r600/sb/sb_expr.cpp
--- mesa-17.2.4/src/gallium/drivers/r600/sb/sb_expr.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/sb/sb_expr.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -753,7 +753,9 @@
n.bc.src[0].abs == n.bc.src[1].abs) {
switch (n.bc.op) {
case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
+ case ALU_OP2_MIN_DX10:
case ALU_OP2_MAX:
+ case ALU_OP2_MAX_DX10:
convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
return fold_alu_op1(n);
case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2)
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/sb/sb_if_conversion.cpp mesa-17.3.3/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
--- mesa-17.2.4/src/gallium/drivers/r600/sb/sb_if_conversion.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/sb/sb_if_conversion.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -136,7 +136,7 @@
);
if (s.region_count || s.fetch_count || s.alu_kill_count ||
- s.if_count != 1 || s.repeat_count)
+ s.if_count != 1 || s.repeat_count || s.uses_ar)
return false;
unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count;
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/sb/sb_ir.cpp mesa-17.3.3/src/gallium/drivers/r600/sb/sb_ir.cpp
--- mesa-17.2.4/src/gallium/drivers/r600/sb/sb_ir.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/sb/sb_ir.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -461,6 +461,8 @@
++s.alu_kill_count;
else if (a->is_copy_mov())
++s.alu_copy_mov_count;
+ if (a->uses_ar())
+ s.uses_ar = true;
} else if (n->is_fetch_inst())
++s.fetch_count;
else if (n->is_cf_inst())
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/sb/sb_ir.h mesa-17.3.3/src/gallium/drivers/r600/sb/sb_ir.h
--- mesa-17.2.4/src/gallium/drivers/r600/sb/sb_ir.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/sb/sb_ir.h 2018-01-18 21:30:28.000000000 +0000
@@ -726,11 +726,12 @@
unsigned depart_count;
unsigned repeat_count;
unsigned if_count;
+ bool uses_ar;
node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(),
cf_count(), fetch_count(), region_count(),
loop_count(), phi_count(), loop_phi_count(), depart_count(),
- repeat_count(), if_count() {}
+ repeat_count(), if_count(), uses_ar(false) {}
void dump();
};
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/sb/sb_sched.cpp mesa-17.3.3/src/gallium/drivers/r600/sb/sb_sched.cpp
--- mesa-17.2.4/src/gallium/drivers/r600/sb/sb_sched.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/sb/sb_sched.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -711,22 +711,24 @@
}
int post_scheduler::run() {
- run_on(sh.root);
- return 0;
+ return run_on(sh.root) ? 0 : 1;
}
-void post_scheduler::run_on(container_node* n) {
-
+bool post_scheduler::run_on(container_node* n) {
+ int r = true;
for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
if (I->is_container()) {
if (I->subtype == NST_BB) {
bb_node* bb = static_cast(*I);
- schedule_bb(bb);
+ r = schedule_bb(bb);
} else {
- run_on(static_cast(*I));
+ r = run_on(static_cast(*I));
}
+ if (!r)
+ break;
}
}
+ return r;
}
void post_scheduler::init_uc_val(container_node *c, value *v) {
@@ -758,7 +760,7 @@
return F == ucm.end() ? 0 : F->second;
}
-void post_scheduler::schedule_bb(bb_node* bb) {
+bool post_scheduler::schedule_bb(bb_node* bb) {
PSC_DUMP(
sblog << "scheduling BB " << bb->id << "\n";
if (!pending.empty())
@@ -791,8 +793,10 @@
if (n->is_alu_clause()) {
n->remove();
- process_alu(static_cast(n));
- continue;
+ bool r = process_alu(static_cast(n));
+ if (r)
+ continue;
+ return false;
}
n->remove();
@@ -800,6 +804,7 @@
}
this->cur_bb = NULL;
+ return true;
}
void post_scheduler::init_regmap() {
@@ -933,10 +938,10 @@
cur_bb->push_front(c);
}
-void post_scheduler::process_alu(container_node *c) {
+bool post_scheduler::process_alu(container_node *c) {
if (c->empty())
- return;
+ return true;
ucm.clear();
alu.reset();
@@ -973,7 +978,7 @@
}
}
- schedule_alu(c);
+ return schedule_alu(c);
}
void post_scheduler::update_local_interferences() {
@@ -1135,15 +1140,20 @@
emit_index_registers();
}
-void post_scheduler::schedule_alu(container_node *c) {
+bool post_scheduler::schedule_alu(container_node *c) {
assert(!ready.empty() || !ready_copies.empty());
- while (1) {
-
+ bool improving = true;
+ int last_pending = pending.count();
+ while (improving) {
prev_regmap = regmap;
-
if (!prepare_alu_group()) {
+
+ int new_pending = pending.count();
+ improving = (new_pending < last_pending) || (last_pending == 0);
+ last_pending = new_pending;
+
if (alu.current_idx[0] || alu.current_idx[1]) {
regmap = prev_regmap;
emit_clause();
@@ -1186,6 +1196,7 @@
dump::dump_op_list(&pending);
assert(!"unscheduled pending instructions");
}
+ return improving;
}
void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
diff -Nru mesa-17.2.4/src/gallium/drivers/r600/sb/sb_sched.h mesa-17.3.3/src/gallium/drivers/r600/sb/sb_sched.h
--- mesa-17.2.4/src/gallium/drivers/r600/sb/sb_sched.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/r600/sb/sb_sched.h 2018-01-18 21:30:28.000000000 +0000
@@ -267,14 +267,14 @@
live(), ucm(), alu(sh), regmap(), cleared_interf() {}
virtual int run();
- void run_on(container_node *n);
- void schedule_bb(bb_node *bb);
+ bool run_on(container_node *n);
+ bool schedule_bb(bb_node *bb);
void load_index_register(value *v, unsigned idx);
void process_fetch(container_node *c);
- void process_alu(container_node *c);
- void schedule_alu(container_node *c);
+ bool process_alu(container_node *c);
+ bool schedule_alu(container_node *c);
bool prepare_alu_group();
void release_op(node *n);
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/cayman_msaa.c mesa-17.3.3/src/gallium/drivers/radeon/cayman_msaa.c
--- mesa-17.2.4/src/gallium/drivers/radeon/cayman_msaa.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/cayman_msaa.c 1970-01-01 00:00:00.000000000 +0000
@@ -1,269 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors: Marek Olšák
- *
- */
-
-#include "r600_cs.h"
-
-/* 2xMSAA
- * There are two locations (4, 4), (-4, -4). */
-const uint32_t eg_sample_locs_2x[4] = {
- FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
- FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
- FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
- FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
-};
-const unsigned eg_max_dist_2x = 4;
-/* 4xMSAA
- * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
-const uint32_t eg_sample_locs_4x[4] = {
- FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
- FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
- FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
- FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
-};
-const unsigned eg_max_dist_4x = 6;
-
-/* Cayman 8xMSAA */
-static const uint32_t cm_sample_locs_8x[] = {
- FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
- FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
- FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
- FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
- FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
- FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
- FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
- FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
-};
-static const unsigned cm_max_dist_8x = 8;
-/* Cayman 16xMSAA */
-static const uint32_t cm_sample_locs_16x[] = {
- FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
- FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
- FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
- FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
- FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
- FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
- FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
- FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
- FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
- FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
- FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
- FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
- FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
- FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
- FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
- FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
-};
-static const unsigned cm_max_dist_16x = 8;
-
-void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
- unsigned sample_index, float *out_value)
-{
- int offset, index;
- struct {
- int idx:4;
- } val;
- switch (sample_count) {
- case 1:
- default:
- out_value[0] = out_value[1] = 0.5;
- break;
- case 2:
- offset = 4 * (sample_index * 2);
- val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
- out_value[0] = (float)(val.idx + 8) / 16.0f;
- val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
- out_value[1] = (float)(val.idx + 8) / 16.0f;
- break;
- case 4:
- offset = 4 * (sample_index * 2);
- val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
- out_value[0] = (float)(val.idx + 8) / 16.0f;
- val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
- out_value[1] = (float)(val.idx + 8) / 16.0f;
- break;
- case 8:
- offset = 4 * (sample_index % 4 * 2);
- index = (sample_index / 4) * 4;
- val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
- out_value[0] = (float)(val.idx + 8) / 16.0f;
- val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
- out_value[1] = (float)(val.idx + 8) / 16.0f;
- break;
- case 16:
- offset = 4 * (sample_index % 4 * 2);
- index = (sample_index / 4) * 4;
- val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
- out_value[0] = (float)(val.idx + 8) / 16.0f;
- val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
- out_value[1] = (float)(val.idx + 8) / 16.0f;
- break;
- }
-}
-
-void cayman_init_msaa(struct pipe_context *ctx)
-{
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- int i;
-
- cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
-
- for (i = 0; i < 2; i++)
- cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
- for (i = 0; i < 4; i++)
- cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
- for (i = 0; i < 8; i++)
- cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
- for (i = 0; i < 16; i++)
- cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
-}
-
-void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
-{
- switch (nr_samples) {
- default:
- case 1:
- radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
- radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
- radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
- radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
- break;
- case 2:
- radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
- radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
- radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
- radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
- break;
- case 4:
- radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
- radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
- radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
- radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
- break;
- case 8:
- radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
- radeon_emit(cs, cm_sample_locs_8x[0]);
- radeon_emit(cs, cm_sample_locs_8x[4]);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, cm_sample_locs_8x[1]);
- radeon_emit(cs, cm_sample_locs_8x[5]);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, cm_sample_locs_8x[2]);
- radeon_emit(cs, cm_sample_locs_8x[6]);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, cm_sample_locs_8x[3]);
- radeon_emit(cs, cm_sample_locs_8x[7]);
- break;
- case 16:
- radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
- radeon_emit(cs, cm_sample_locs_16x[0]);
- radeon_emit(cs, cm_sample_locs_16x[4]);
- radeon_emit(cs, cm_sample_locs_16x[8]);
- radeon_emit(cs, cm_sample_locs_16x[12]);
- radeon_emit(cs, cm_sample_locs_16x[1]);
- radeon_emit(cs, cm_sample_locs_16x[5]);
- radeon_emit(cs, cm_sample_locs_16x[9]);
- radeon_emit(cs, cm_sample_locs_16x[13]);
- radeon_emit(cs, cm_sample_locs_16x[2]);
- radeon_emit(cs, cm_sample_locs_16x[6]);
- radeon_emit(cs, cm_sample_locs_16x[10]);
- radeon_emit(cs, cm_sample_locs_16x[14]);
- radeon_emit(cs, cm_sample_locs_16x[3]);
- radeon_emit(cs, cm_sample_locs_16x[7]);
- radeon_emit(cs, cm_sample_locs_16x[11]);
- radeon_emit(cs, cm_sample_locs_16x[15]);
- break;
- }
-}
-
-void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
- int ps_iter_samples, int overrast_samples,
- unsigned sc_mode_cntl_1)
-{
- int setup_samples = nr_samples > 1 ? nr_samples :
- overrast_samples > 1 ? overrast_samples : 0;
- /* Required by OpenGL line rasterization.
- *
- * TODO: We should also enable perpendicular endcaps for AA lines,
- * but that requires implementing line stippling in the pixel
- * shader. SC can only do line stippling with axis-aligned
- * endcaps.
- */
- unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
-
- if (setup_samples > 1) {
- /* indexed by log2(nr_samples) */
- unsigned max_dist[] = {
- 0,
- eg_max_dist_2x,
- eg_max_dist_4x,
- cm_max_dist_8x,
- cm_max_dist_16x
- };
- unsigned log_samples = util_logbase2(setup_samples);
- unsigned log_ps_iter_samples =
- util_logbase2(util_next_power_of_two(ps_iter_samples));
-
- radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
- radeon_emit(cs, sc_line_cntl |
- S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
- radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
- S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
- S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
-
- if (nr_samples > 1) {
- radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
- S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
- S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
- S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
- S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
- S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
- S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
- radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
- EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
- sc_mode_cntl_1);
- } else if (overrast_samples > 1) {
- radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
- S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
- S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
- S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
- radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
- sc_mode_cntl_1);
- }
- } else {
- radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
- radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
- radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
-
- radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
- S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
- S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
- radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
- sc_mode_cntl_1);
- }
-}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/Makefile.in mesa-17.3.3/src/gallium/drivers/radeon/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/radeon/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -104,7 +104,8 @@
subdir = src/gallium/drivers/radeon
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -125,12 +126,11 @@
@HAVE_GALLIUM_LLVM_TRUE@libradeon_la_DEPENDENCIES = \
@HAVE_GALLIUM_LLVM_TRUE@ $(am__DEPENDENCIES_1) \
@HAVE_GALLIUM_LLVM_TRUE@ $(am__DEPENDENCIES_1)
-am__objects_1 = cayman_msaa.lo r600_buffer_common.lo r600_gpu_load.lo \
+am__objects_1 = r600_buffer_common.lo r600_gpu_load.lo \
r600_perfcounter.lo r600_pipe_common.lo r600_query.lo \
- r600_streamout.lo r600_test_dma.lo r600_texture.lo \
- r600_viewport.lo radeon_uvd.lo radeon_vcn_dec.lo \
- radeon_vce_40_2_2.lo radeon_vce_50.lo radeon_vce_52.lo \
- radeon_vce.lo radeon_video.lo
+ r600_test_dma.lo r600_texture.lo radeon_uvd.lo \
+ radeon_vcn_dec.lo radeon_vce_40_2_2.lo radeon_vce_50.lo \
+ radeon_vce_52.lo radeon_vce.lo radeon_video.lo
am_libradeon_la_OBJECTS = $(am__objects_1)
libradeon_la_OBJECTS = $(am_libradeon_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
@@ -342,9 +342,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -398,6 +398,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -409,12 +411,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -499,7 +504,6 @@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
C_SOURCES := \
- cayman_msaa.c \
r600_buffer_common.c \
r600_cs.h \
r600_gpu_load.c \
@@ -508,10 +512,8 @@
r600_pipe_common.h \
r600_query.c \
r600_query.h \
- r600_streamout.c \
r600_test_dma.c \
r600_texture.c \
- r600_viewport.c \
radeon_uvd.c \
radeon_uvd.h \
radeon_vcn_dec.c \
@@ -567,6 +569,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
@@ -655,16 +659,13 @@
distclean-compile:
-rm -f *.tab.c
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cayman_msaa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_buffer_common.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_gpu_load.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_perfcounter.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_pipe_common.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_query.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_streamout.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_test_dma.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_texture.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_viewport.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_uvd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_40_2_2.Plo@am__quote@
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/Makefile.sources mesa-17.3.3/src/gallium/drivers/radeon/Makefile.sources
--- mesa-17.2.4/src/gallium/drivers/radeon/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -1,5 +1,4 @@
C_SOURCES := \
- cayman_msaa.c \
r600_buffer_common.c \
r600_cs.h \
r600_gpu_load.c \
@@ -8,10 +7,8 @@
r600_pipe_common.h \
r600_query.c \
r600_query.h \
- r600_streamout.c \
r600_test_dma.c \
r600_texture.c \
- r600_viewport.c \
radeon_uvd.c \
radeon_uvd.h \
radeon_vcn_dec.c \
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/meson.build mesa-17.3.3/src/gallium/drivers/radeon/meson.build
--- mesa-17.2.4/src/gallium/drivers/radeon/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,55 @@
+# Copyright © 2017 Dylan Baker
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_libradeon = files(
+ 'r600_buffer_common.c',
+ 'r600_cs.h',
+ 'r600_gpu_load.c',
+ 'r600_perfcounter.c',
+ 'r600_pipe_common.c',
+ 'r600_pipe_common.h',
+ 'r600_query.c',
+ 'r600_query.h',
+ 'r600_test_dma.c',
+ 'r600_texture.c',
+ 'radeon_uvd.c',
+ 'radeon_uvd.h',
+ 'radeon_vcn_dec.c',
+ 'radeon_vcn_dec.h',
+ 'radeon_vce_40_2_2.c',
+ 'radeon_vce_50.c',
+ 'radeon_vce_52.c',
+ 'radeon_vce.c',
+ 'radeon_vce.h',
+ 'radeon_video.c',
+ 'radeon_video.h',
+ 'radeon_winsys.h',
+)
+
+libradeon = static_library(
+ 'radeon',
+ files_libradeon,
+ c_args : ['-Wstrict-overflow=0', c_vis_args],
+ dependencies : [dep_llvm, dep_clock],
+ include_directories : [
+ inc_include, inc_src, inc_gallium, inc_gallium_aux, inc_gallium_drivers,
+ ],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_buffer_common.c mesa-17.3.3/src/gallium/drivers/radeon/r600_buffer_common.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_buffer_common.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_buffer_common.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,9 +30,9 @@
#include
#include
-bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
- struct pb_buffer *buf,
- enum radeon_bo_usage usage)
+bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
+ struct pb_buffer *buf,
+ enum radeon_bo_usage usage)
{
if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
return true;
@@ -44,9 +44,9 @@
return false;
}
-void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
- struct r600_resource *resource,
- unsigned usage)
+void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+ struct r600_resource *resource,
+ unsigned usage)
{
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
bool busy = false;
@@ -101,9 +101,9 @@
return ctx->ws->buffer_map(resource->buf, NULL, usage);
}
-void r600_init_resource_fields(struct r600_common_screen *rscreen,
- struct r600_resource *res,
- uint64_t size, unsigned alignment)
+void si_init_resource_fields(struct r600_common_screen *rscreen,
+ struct r600_resource *res,
+ uint64_t size, unsigned alignment)
{
struct r600_texture *rtex = (struct r600_texture*)res;
@@ -161,12 +161,18 @@
/* Tiled textures are unmappable. Always put them in VRAM. */
if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
- res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
+ res->b.b.flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
res->domains = RADEON_DOMAIN_VRAM;
res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
RADEON_FLAG_GTT_WC;
}
+ /* Displayable and shareable surfaces are not suballocated. */
+ if (res->b.b.bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
+ res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */
+ else
+ res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
/* If VRAM is just stolen system memory, allow both VRAM and
* GTT, whichever has free space. If a buffer is evicted from
* VRAM to GTT, it will stay there.
@@ -181,12 +187,9 @@
res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */
}
- if (rscreen->debug_flags & DBG_NO_WC)
+ if (rscreen->debug_flags & DBG(NO_WC))
res->flags &= ~RADEON_FLAG_GTT_WC;
- if (res->b.b.bind & PIPE_BIND_SHARED)
- res->flags |= RADEON_FLAG_NO_SUBALLOC;
-
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage = 0;
res->gart_usage = 0;
@@ -197,8 +200,8 @@
res->gart_usage = size;
}
-bool r600_alloc_resource(struct r600_common_screen *rscreen,
- struct r600_resource *res)
+bool si_alloc_resource(struct r600_common_screen *rscreen,
+ struct r600_resource *res)
{
struct pb_buffer *old_buf, *new_buf;
@@ -228,7 +231,7 @@
res->TC_L2_dirty = false;
/* Print debug information. */
- if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
+ if (rscreen->debug_flags & DBG(VM) && res->b.b.target == PIPE_BUFFER) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
res->gpu_address, res->gpu_address + res->buf->size,
res->buf->size);
@@ -266,7 +269,7 @@
return false;
/* Check if mapping this buffer would cause waiting for the GPU. */
- if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
+ if (si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
} else {
@@ -277,7 +280,7 @@
}
/* Replace the storage of dst with src. */
-void r600_replace_buffer_storage(struct pipe_context *ctx,
+void si_replace_buffer_storage(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_resource *src)
{
@@ -300,8 +303,8 @@
rctx->rebind_buffer(ctx, dst, old_gpu_address);
}
-void r600_invalidate_resource(struct pipe_context *ctx,
- struct pipe_resource *resource)
+void si_invalidate_resource(struct pipe_context *ctx,
+ struct pipe_resource *resource)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_resource *rbuffer = r600_resource(resource);
@@ -383,7 +386,7 @@
/* See if the buffer range being mapped has never been initialized,
* in which case it can be mapped unsynchronized. */
if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- TC_TRANSFER_MAP_IGNORE_VALID_RANGE)) &&
+ TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
usage & PIPE_TRANSFER_WRITE &&
!rbuffer->b.is_shared &&
!util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
@@ -411,7 +414,7 @@
}
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
- !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
+ !(rscreen->debug_flags & DBG(NO_DISCARD_RANGE)) &&
((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_PERSISTENT)) &&
r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
@@ -421,7 +424,7 @@
/* Check if mapping this buffer would cause waiting for the GPU.
*/
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
- r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
+ si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
@@ -464,7 +467,7 @@
box->x % R600_MAP_BUFFER_ALIGNMENT,
0, 0, resource, 0, box);
- data = r600_buffer_map_sync_with_rings(rctx, staging,
+ data = si_buffer_map_sync_with_rings(rctx, staging,
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
if (!data) {
r600_resource_reference(&staging, NULL);
@@ -479,7 +482,7 @@
}
}
- data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
+ data = si_buffer_map_sync_with_rings(rctx, rbuffer, usage);
if (!data) {
return NULL;
}
@@ -549,10 +552,10 @@
slab_free(&rctx->pool_transfers, transfer);
}
-void r600_buffer_subdata(struct pipe_context *ctx,
- struct pipe_resource *buffer,
- unsigned usage, unsigned offset,
- unsigned size, const void *data)
+void si_buffer_subdata(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned usage, unsigned offset,
+ unsigned size, const void *data)
{
struct pipe_transfer *transfer = NULL;
struct pipe_box box;
@@ -603,30 +606,30 @@
return rbuffer;
}
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- unsigned alignment)
+struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ unsigned alignment)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
- r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
+ si_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
rbuffer->flags |= RADEON_FLAG_SPARSE;
- if (!r600_alloc_resource(rscreen, rbuffer)) {
+ if (!si_alloc_resource(rscreen, rbuffer)) {
FREE(rbuffer);
return NULL;
}
return &rbuffer->b.b;
}
-struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
- unsigned flags,
- unsigned usage,
- unsigned size,
- unsigned alignment)
+struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned flags,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment)
{
struct pipe_resource buffer;
@@ -640,13 +643,13 @@
buffer.height0 = 1;
buffer.depth0 = 1;
buffer.array_size = 1;
- return r600_buffer_create(screen, &buffer, alignment);
+ return si_buffer_create(screen, &buffer, alignment);
}
struct pipe_resource *
-r600_buffer_from_user_memory(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- void *user_memory)
+si_buffer_from_user_memory(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ void *user_memory)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_winsys *ws = rscreen->ws;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_cs.h mesa-17.3.3/src/gallium/drivers/radeon/r600_cs.h
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_cs.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_cs.h 2018-01-18 21:30:28.000000000 +0000
@@ -31,7 +31,7 @@
#define R600_CS_H
#include "r600_pipe_common.h"
-#include "amd/common/r600d_common.h"
+#include "amd/common/sid.h"
/**
* Return true if there is enough memory in VRAM and GTT for the buffers
@@ -113,27 +113,12 @@
return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
}
-static inline void r600_emit_reloc(struct r600_common_context *rctx,
- struct r600_ring *ring, struct r600_resource *rbo,
- enum radeon_bo_usage usage,
- enum radeon_bo_priority priority)
-{
- struct radeon_winsys_cs *cs = ring->cs;
- bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
- unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
-
- if (!has_vm) {
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, reloc);
- }
-}
-
static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
- assert(reg < R600_CONTEXT_REG_OFFSET);
+ assert(reg < SI_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
- radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
+ radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
@@ -144,10 +129,10 @@
static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
- assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(reg >= SI_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
- radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
}
static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
@@ -160,10 +145,10 @@
unsigned reg, unsigned idx,
unsigned value)
{
- assert(reg >= R600_CONTEXT_REG_OFFSET);
+ assert(reg >= SI_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 3 <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
- radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_gpu_load.c mesa-17.3.3/src/gallium/drivers/radeon/r600_gpu_load.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_gpu_load.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_gpu_load.c 2018-01-18 21:30:28.000000000 +0000
@@ -68,7 +68,6 @@
#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
#define DMA_BUSY(x) (((x) >> 22) & 0x1)
#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
-#define CE_BUSY(x) (((x) >> 26) & 0x1)
#define IDENTITY(x) x
@@ -121,9 +120,8 @@
UPDATE_COUNTER(meq, MEQ_BUSY);
UPDATE_COUNTER(me, ME_BUSY);
UPDATE_COUNTER(surf_sync, SURFACE_SYNC_BUSY);
- UPDATE_COUNTER(dma, DMA_BUSY);
+ UPDATE_COUNTER(cp_dma, DMA_BUSY);
UPDATE_COUNTER(scratch_ram, SCRATCH_RAM_BUSY);
- UPDATE_COUNTER(ce, CE_BUSY);
}
value = gui_busy || sdma_busy;
@@ -164,7 +162,7 @@
return 0;
}
-void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
+void si_gpu_load_kill_thread(struct r600_common_screen *rscreen)
{
if (!rscreen->gpu_load_thread)
return;
@@ -262,25 +260,23 @@
return BUSY_INDEX(rscreen, me);
case R600_QUERY_GPU_SURF_SYNC_BUSY:
return BUSY_INDEX(rscreen, surf_sync);
- case R600_QUERY_GPU_DMA_BUSY:
- return BUSY_INDEX(rscreen, dma);
+ case R600_QUERY_GPU_CP_DMA_BUSY:
+ return BUSY_INDEX(rscreen, cp_dma);
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
return BUSY_INDEX(rscreen, scratch_ram);
- case R600_QUERY_GPU_CE_BUSY:
- return BUSY_INDEX(rscreen, ce);
default:
unreachable("invalid query type");
}
}
-uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
+uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type)
{
unsigned busy_index = busy_index_from_type(rscreen, type);
return r600_read_mmio_counter(rscreen, busy_index);
}
-unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
- uint64_t begin)
+unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
+ uint64_t begin)
{
unsigned busy_index = busy_index_from_type(rscreen, type);
return r600_end_mmio_counter(rscreen, begin, busy_index);
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_perfcounter.c mesa-17.3.3/src/gallium/drivers/radeon/r600_perfcounter.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_perfcounter.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_perfcounter.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,7 +28,7 @@
#include "util/u_memory.h"
#include "r600_query.h"
#include "r600_pipe_common.h"
-#include "amd/common/r600d_common.h"
+#include "amd/common/sid.h"
/* Max counters per HW block */
#define R600_QUERY_MAX_COUNTERS 16
@@ -112,7 +112,7 @@
FREE(query->counters);
- r600_query_hw_destroy(rscreen, rquery);
+ si_query_hw_destroy(rscreen, rquery);
}
static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
@@ -217,9 +217,9 @@
static struct r600_query_ops batch_query_ops = {
.destroy = r600_pc_query_destroy,
- .begin = r600_query_hw_begin,
- .end = r600_query_hw_end,
- .get_result = r600_query_hw_get_result
+ .begin = si_query_hw_begin,
+ .end = si_query_hw_end,
+ .get_result = si_query_hw_get_result
};
static struct r600_query_hw_ops batch_query_hw_ops = {
@@ -297,9 +297,9 @@
return group;
}
-struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
- unsigned num_queries,
- unsigned *query_types)
+struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
+ unsigned num_queries,
+ unsigned *query_types)
{
struct r600_common_screen *screen =
(struct r600_common_screen *)ctx->screen;
@@ -417,7 +417,7 @@
counter->qwords *= block->num_instances;
}
- if (!r600_query_hw_init(screen, &query->b))
+ if (!si_query_hw_init(screen, &query->b))
goto error;
return (struct pipe_query *)query;
@@ -511,9 +511,9 @@
return true;
}
-int r600_get_perfcounter_info(struct r600_common_screen *screen,
- unsigned index,
- struct pipe_driver_query_info *info)
+int si_get_perfcounter_info(struct r600_common_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
{
struct r600_perfcounters *pc = screen->perfcounters;
struct r600_perfcounter_block *block;
@@ -553,9 +553,9 @@
return 1;
}
-int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
- unsigned index,
- struct pipe_driver_query_group_info *info)
+int si_get_perfcounter_group_info(struct r600_common_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
{
struct r600_perfcounters *pc = screen->perfcounters;
struct r600_perfcounter_block *block;
@@ -580,13 +580,13 @@
return 1;
}
-void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
+void si_perfcounters_destroy(struct r600_common_screen *rscreen)
{
if (rscreen->perfcounters)
rscreen->perfcounters->cleanup(rscreen);
}
-bool r600_perfcounters_init(struct r600_perfcounters *pc,
+bool si_perfcounters_init(struct r600_perfcounters *pc,
unsigned num_blocks)
{
pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
@@ -599,11 +599,11 @@
return true;
}
-void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
- struct r600_perfcounters *pc,
- const char *name, unsigned flags,
- unsigned counters, unsigned selectors,
- unsigned instances, void *data)
+void si_perfcounters_add_block(struct r600_common_screen *rscreen,
+ struct r600_perfcounters *pc,
+ const char *name, unsigned flags,
+ unsigned counters, unsigned selectors,
+ unsigned instances, void *data)
{
struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
@@ -636,7 +636,7 @@
pc->num_groups += block->num_groups;
}
-void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
+void si_perfcounters_do_destroy(struct r600_perfcounters *pc)
{
unsigned i;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_pipe_common.c mesa-17.3.3/src/gallium/drivers/radeon/r600_pipe_common.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_pipe_common.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_pipe_common.c 2018-01-18 21:30:28.000000000 +0000
@@ -36,20 +36,13 @@
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
#include "radeon/radeon_video.h"
+#include "amd/common/sid.h"
#include
#include
+#include
-#ifndef HAVE_LLVM
-#define HAVE_LLVM 0
-#endif
-
-#if HAVE_LLVM
#include
-#endif
-#ifndef MESA_LLVM_VERSION_PATCH
-#define MESA_LLVM_VERSION_PATCH 0
-#endif
struct r600_multi_fence {
struct pipe_reference reference;
@@ -66,12 +59,12 @@
/*
* shader binary helpers.
*/
-void radeon_shader_binary_init(struct ac_shader_binary *b)
+void si_radeon_shader_binary_init(struct ac_shader_binary *b)
{
memset(b, 0, sizeof(*b));
}
-void radeon_shader_binary_clean(struct ac_shader_binary *b)
+void si_radeon_shader_binary_clean(struct ac_shader_binary *b)
{
if (!b)
return;
@@ -99,21 +92,51 @@
* \param old_value Previous fence value (for a bug workaround)
* \param new_value Fence value to write for this event.
*/
-void r600_gfx_write_event_eop(struct r600_common_context *ctx,
- unsigned event, unsigned event_flags,
- unsigned data_sel,
- struct r600_resource *buf, uint64_t va,
- uint32_t old_fence, uint32_t new_fence)
+void si_gfx_write_event_eop(struct r600_common_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t new_fence, unsigned query_type)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
unsigned op = EVENT_TYPE(event) |
EVENT_INDEX(5) |
event_flags;
+ unsigned sel = EOP_DATA_SEL(data_sel);
+
+ /* Wait for write confirmation before writing data, but don't send
+ * an interrupt. */
+ if (data_sel != EOP_DATA_SEL_DISCARD)
+ sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
if (ctx->chip_class >= GFX9) {
+ /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
+ * counters) must immediately precede every timestamp event to
+ * prevent a GPU hang on GFX9.
+ *
+ * Occlusion queries don't need to do it here, because they
+ * always do ZPASS_DONE before the timestamp.
+ */
+ if (ctx->chip_class == GFX9 &&
+ query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
+ query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
+ query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
+ struct r600_resource *scratch = ctx->eop_bug_scratch;
+
+ assert(16 * ctx->screen->info.num_render_backends <=
+ scratch->b.b.width0);
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, scratch->gpu_address);
+ radeon_emit(cs, scratch->gpu_address >> 32);
+
+ radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
+ RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
+ }
+
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
radeon_emit(cs, op);
- radeon_emit(cs, EOP_DATA_SEL(data_sel));
+ radeon_emit(cs, sel);
radeon_emit(cs, va); /* address lo */
radeon_emit(cs, va >> 32); /* address hi */
radeon_emit(cs, new_fence); /* immediate data lo */
@@ -122,6 +145,9 @@
} else {
if (ctx->chip_class == CIK ||
ctx->chip_class == VI) {
+ struct r600_resource *scratch = ctx->eop_bug_scratch;
+ uint64_t va = scratch->gpu_address;
+
/* Two EOP events are required to make all engines go idle
* (and optional cache flushes executed) before the timestamp
* is written.
@@ -129,25 +155,29 @@
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, op);
radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
- radeon_emit(cs, old_fence); /* immediate data */
+ radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
+ radeon_emit(cs, 0); /* immediate data */
radeon_emit(cs, 0); /* unused */
+
+ radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
+ RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
}
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, op);
radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
+ radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
radeon_emit(cs, new_fence); /* immediate data */
radeon_emit(cs, 0); /* unused */
}
- if (buf)
- r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_QUERY);
+ if (buf) {
+ radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
+ }
}
-unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
+unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
{
unsigned dwords = 6;
@@ -161,8 +191,8 @@
return dwords;
}
-void r600_gfx_wait_fence(struct r600_common_context *ctx,
- uint64_t va, uint32_t ref, uint32_t mask)
+void si_gfx_wait_fence(struct r600_common_context *ctx,
+ uint64_t va, uint32_t ref, uint32_t mask)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
@@ -175,72 +205,6 @@
radeon_emit(cs, 4); /* poll interval */
}
-void r600_draw_rectangle(struct blitter_context *blitter,
- int x1, int y1, int x2, int y2, float depth,
- enum blitter_attrib_type type,
- const union pipe_color_union *attrib)
-{
- struct r600_common_context *rctx =
- (struct r600_common_context*)util_blitter_get_pipe(blitter);
- struct pipe_viewport_state viewport;
- struct pipe_resource *buf = NULL;
- unsigned offset = 0;
- float *vb;
-
- if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
- util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib);
- return;
- }
-
- /* Some operations (like color resolve on r6xx) don't work
- * with the conventional primitive types.
- * One that works is PT_RECTLIST, which we use here. */
-
- /* setup viewport */
- viewport.scale[0] = 1.0f;
- viewport.scale[1] = 1.0f;
- viewport.scale[2] = 1.0f;
- viewport.translate[0] = 0.0f;
- viewport.translate[1] = 0.0f;
- viewport.translate[2] = 0.0f;
- rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport);
-
- /* Upload vertices. The hw rectangle has only 3 vertices,
- * I guess the 4th one is derived from the first 3.
- * The vertex specification should match u_blitter's vertex element state. */
- u_upload_alloc(rctx->b.stream_uploader, 0, sizeof(float) * 24,
- rctx->screen->info.tcc_cache_line_size,
- &offset, &buf, (void**)&vb);
- if (!buf)
- return;
-
- vb[0] = x1;
- vb[1] = y1;
- vb[2] = depth;
- vb[3] = 1;
-
- vb[8] = x1;
- vb[9] = y2;
- vb[10] = depth;
- vb[11] = 1;
-
- vb[16] = x2;
- vb[17] = y1;
- vb[18] = depth;
- vb[19] = 1;
-
- if (attrib) {
- memcpy(vb+4, attrib->f, sizeof(float)*4);
- memcpy(vb+12, attrib->f, sizeof(float)*4);
- memcpy(vb+20, attrib->f, sizeof(float)*4);
- }
-
- /* draw */
- util_draw_vertex_buffer(&rctx->b, NULL, buf, blitter->vb_slot, offset,
- R600_PRIM_RECTANGLE_LIST, 3, 2);
- pipe_resource_reference(&buf, NULL);
-}
-
static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->dma.cs;
@@ -248,16 +212,12 @@
/* NOP waits for idle on Evergreen and later. */
if (rctx->chip_class >= CIK)
radeon_emit(cs, 0x00000000); /* NOP */
- else if (rctx->chip_class >= EVERGREEN)
+ else
radeon_emit(cs, 0xf0000000); /* NOP */
- else {
- /* TODO: R600-R700 should use the FENCE packet.
- * CS checker support is required. */
- }
}
-void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
- struct r600_resource *dst, struct r600_resource *src)
+void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+ struct r600_resource *dst, struct r600_resource *src)
{
uint64_t vram = ctx->dma.cs->used_vram;
uint64_t gtt = ctx->dma.cs->used_gart;
@@ -334,29 +294,131 @@
{
}
-void r600_preflush_suspend_features(struct r600_common_context *ctx)
+void si_preflush_suspend_features(struct r600_common_context *ctx)
{
/* suspend queries */
if (!LIST_IS_EMPTY(&ctx->active_queries))
- r600_suspend_queries(ctx);
+ si_suspend_queries(ctx);
+}
+
+void si_postflush_resume_features(struct r600_common_context *ctx)
+{
+ /* resume queries */
+ if (!LIST_IS_EMPTY(&ctx->active_queries))
+ si_resume_queries(ctx);
+}
+
+static void r600_add_fence_dependency(struct r600_common_context *rctx,
+ struct pipe_fence_handle *fence)
+{
+ struct radeon_winsys *ws = rctx->ws;
+
+ if (rctx->dma.cs)
+ ws->cs_add_fence_dependency(rctx->dma.cs, fence);
+ ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
+}
+
+static void r600_fence_server_sync(struct pipe_context *ctx,
+ struct pipe_fence_handle *fence)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+
+ /* Only amdgpu needs to handle fence dependencies (for fence imports).
+ * radeon synchronizes all rings by default and will not implement
+ * fence imports.
+ */
+ if (rctx->screen->info.drm_major == 2)
+ return;
+
+ /* Only imported fences need to be handled by fence_server_sync,
+ * because the winsys handles synchronizations automatically for BOs
+ * within the process.
+ *
+ * Simply skip unflushed fences here, and the winsys will drop no-op
+ * dependencies (i.e. dependencies within the same ring).
+ */
+ if (rfence->gfx_unflushed.ctx)
+ return;
+
+ /* All unflushed commands will not start execution before
+ * this fence dependency is signalled.
+ *
+ * Should we flush the context to allow more GPU parallelism?
+ */
+ if (rfence->sdma)
+ r600_add_fence_dependency(rctx, rfence->sdma);
+ if (rfence->gfx)
+ r600_add_fence_dependency(rctx, rfence->gfx);
+}
- ctx->streamout.suspended = false;
- if (ctx->streamout.begin_emitted) {
- r600_emit_streamout_end(ctx);
- ctx->streamout.suspended = true;
+static void r600_create_fence_fd(struct pipe_context *ctx,
+ struct pipe_fence_handle **pfence, int fd)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ struct r600_multi_fence *rfence;
+
+ *pfence = NULL;
+
+ if (!rscreen->info.has_sync_file)
+ return;
+
+ rfence = CALLOC_STRUCT(r600_multi_fence);
+ if (!rfence)
+ return;
+
+ pipe_reference_init(&rfence->reference, 1);
+ rfence->gfx = ws->fence_import_sync_file(ws, fd);
+ if (!rfence->gfx) {
+ FREE(rfence);
+ return;
}
+
+ *pfence = (struct pipe_fence_handle*)rfence;
}
-void r600_postflush_resume_features(struct r600_common_context *ctx)
+static int r600_fence_get_fd(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence)
{
- if (ctx->streamout.suspended) {
- ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
- r600_streamout_buffers_dirty(ctx);
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+ int gfx_fd = -1, sdma_fd = -1;
+
+ if (!rscreen->info.has_sync_file)
+ return -1;
+
+ /* Deferred fences aren't supported. */
+ assert(!rfence->gfx_unflushed.ctx);
+ if (rfence->gfx_unflushed.ctx)
+ return -1;
+
+ if (rfence->sdma) {
+ sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma);
+ if (sdma_fd == -1)
+ return -1;
+ }
+ if (rfence->gfx) {
+ gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx);
+ if (gfx_fd == -1) {
+ if (sdma_fd != -1)
+ close(sdma_fd);
+ return -1;
+ }
}
- /* resume queries */
- if (!LIST_IS_EMPTY(&ctx->active_queries))
- r600_resume_queries(ctx);
+ /* If we don't have FDs at this point, it means we don't have fences
+ * either. */
+ if (sdma_fd == -1)
+ return gfx_fd;
+ if (gfx_fd == -1)
+ return sdma_fd;
+
+ /* Get a fence that will be a combination of both fences. */
+ sync_accumulate("radeonsi", &gfx_fd, sdma_fd);
+ close(sdma_fd);
+ return gfx_fd;
}
static void r600_flush_from_st(struct pipe_context *ctx,
@@ -387,9 +449,12 @@
/* Instead of flushing, create a deferred fence. Constraints:
* - The state tracker must allow a deferred flush.
* - The state tracker must request a fence.
+ * - fence_get_fd is not allowed.
* Thread safety in fence_finish must be ensured by the state tracker.
*/
- if (flags & PIPE_FLUSH_DEFERRED && fence) {
+ if (flags & PIPE_FLUSH_DEFERRED &&
+ !(flags & PIPE_FLUSH_FENCE_FD) &&
+ fence) {
gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
deferred_fence = true;
} else {
@@ -401,8 +466,11 @@
if (fence) {
struct r600_multi_fence *multi_fence =
CALLOC_STRUCT(r600_multi_fence);
- if (!multi_fence)
- return;
+ if (!multi_fence) {
+ ws->fence_reference(&sdma_fence, NULL);
+ ws->fence_reference(&gfx_fence, NULL);
+ goto finish;
+ }
multi_fence->reference.count = 1;
/* If both fences are NULL, fence_finish will always return true. */
@@ -417,7 +485,7 @@
screen->fence_reference(screen, fence, NULL);
*fence = (struct pipe_fence_handle*)multi_fence;
}
-
+finish:
if (!(flags & PIPE_FLUSH_DEFERRED)) {
if (rctx->dma.cs)
ws->cs_sync_flush(rctx->dma.cs);
@@ -432,7 +500,7 @@
struct radeon_winsys_cs *cs = rctx->dma.cs;
struct radeon_saved_cs saved;
bool check_vm =
- (rctx->screen->debug_flags & DBG_CHECK_VM) &&
+ (rctx->screen->debug_flags & DBG(CHECK_VM)) &&
rctx->check_vm_faults;
if (!radeon_emitted(cs, 0)) {
@@ -442,7 +510,7 @@
}
if (check_vm)
- radeon_save_cs(rctx->ws, cs, &saved);
+ si_save_cs(rctx->ws, cs, &saved, true);
rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
if (fence)
@@ -455,7 +523,7 @@
rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
rctx->check_vm_faults(rctx, &saved, RING_DMA);
- radeon_clear_saved_cs(&saved);
+ si_clear_saved_cs(&saved);
}
}
@@ -463,10 +531,10 @@
* Store a linearized copy of all chunks of \p cs together with the buffer
* list in \p saved.
*/
-void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
- struct radeon_saved_cs *saved)
+void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
+ struct radeon_saved_cs *saved, bool get_buffer_list)
{
- void *buf;
+ uint32_t *buf;
unsigned i;
/* Save the IB chunks. */
@@ -482,6 +550,9 @@
}
memcpy(buf, cs->current.buf, cs->current.cdw * 4);
+ if (!get_buffer_list)
+ return;
+
/* Save the buffer list. */
saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
saved->bo_list = CALLOC(saved->bo_count,
@@ -499,7 +570,7 @@
memset(saved, 0, sizeof(*saved));
}
-void radeon_clear_saved_cs(struct radeon_saved_cs *saved)
+void si_clear_saved_cs(struct radeon_saved_cs *saved)
{
FREE(saved->ib);
FREE(saved->bo_list);
@@ -543,7 +614,7 @@
sizeof(rctx->device_reset_callback));
}
-bool r600_check_device_reset(struct r600_common_context *rctx)
+bool si_check_device_reset(struct r600_common_context *rctx)
{
enum pipe_reset_status status;
@@ -605,9 +676,9 @@
return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
}
-bool r600_common_context_init(struct r600_common_context *rctx,
- struct r600_common_screen *rscreen,
- unsigned context_flags)
+bool si_common_context_init(struct r600_common_context *rctx,
+ struct r600_common_screen *rscreen,
+ unsigned context_flags)
{
slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
@@ -617,7 +688,7 @@
rctx->family = rscreen->family;
rctx->chip_class = rscreen->chip_class;
- rctx->b.invalidate_resource = r600_invalidate_resource;
+ rctx->b.invalidate_resource = si_invalidate_resource;
rctx->b.resource_commit = r600_resource_commit;
rctx->b.transfer_map = u_transfer_map_vtbl;
rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
@@ -626,20 +697,10 @@
rctx->b.memory_barrier = r600_memory_barrier;
rctx->b.flush = r600_flush_from_st;
rctx->b.set_debug_callback = r600_set_debug_callback;
+ rctx->b.create_fence_fd = r600_create_fence_fd;
+ rctx->b.fence_server_sync = r600_fence_server_sync;
rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
-
- /* evergreen_compute.c has a special codepath for global buffers.
- * Everything else can use the direct path.
- */
- if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) &&
- (context_flags & PIPE_CONTEXT_COMPUTE_ONLY))
- rctx->b.buffer_subdata = u_default_buffer_subdata;
- else
- rctx->b.buffer_subdata = r600_buffer_subdata;
-
- /* Set a reasonable default to avoid a performance regression in r600
- * on stable branches. */
- rctx->current_rast_prim = PIPE_PRIM_TRIANGLES;
+ rctx->b.buffer_subdata = si_buffer_subdata;
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
rctx->b.get_device_reset_status = r600_get_reset_status;
@@ -650,11 +711,18 @@
rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
- r600_init_context_texture_functions(rctx);
- r600_init_viewport_functions(rctx);
- r600_streamout_init(rctx);
- r600_query_init(rctx);
- cayman_init_msaa(&rctx->b);
+ si_init_context_texture_functions(rctx);
+ si_init_query_functions(rctx);
+
+ if (rctx->chip_class == CIK ||
+ rctx->chip_class == VI ||
+ rctx->chip_class == GFX9) {
+ rctx->eop_bug_scratch = (struct r600_resource*)
+ pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
+ 16 * rscreen->info.num_render_backends);
+ if (!rctx->eop_bug_scratch)
+ return false;
+ }
rctx->allocator_zeroed_memory =
u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
@@ -676,7 +744,7 @@
if (!rctx->ctx)
return false;
- if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
+ if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
rctx);
@@ -686,7 +754,7 @@
return true;
}
-void r600_common_context_cleanup(struct r600_common_context *rctx)
+void si_common_context_cleanup(struct r600_common_context *rctx)
{
unsigned i,j;
@@ -725,6 +793,7 @@
}
rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
+ r600_resource_reference(&rctx->eop_bug_scratch, NULL);
}
/*
@@ -733,52 +802,55 @@
static const struct debug_named_value common_debug_options[] = {
/* logging */
- { "tex", DBG_TEX, "Print texture info" },
- { "compute", DBG_COMPUTE, "Print compute info" },
- { "vm", DBG_VM, "Print virtual addresses when creating resources" },
- { "info", DBG_INFO, "Print driver information" },
+ { "tex", DBG(TEX), "Print texture info" },
+ { "nir", DBG(NIR), "Enable experimental NIR shaders" },
+ { "compute", DBG(COMPUTE), "Print compute info" },
+ { "vm", DBG(VM), "Print virtual addresses when creating resources" },
+ { "info", DBG(INFO), "Print driver information" },
/* shaders */
- { "fs", DBG_FS, "Print fetch shaders" },
- { "vs", DBG_VS, "Print vertex shaders" },
- { "gs", DBG_GS, "Print geometry shaders" },
- { "ps", DBG_PS, "Print pixel shaders" },
- { "cs", DBG_CS, "Print compute shaders" },
- { "tcs", DBG_TCS, "Print tessellation control shaders" },
- { "tes", DBG_TES, "Print tessellation evaluation shaders" },
- { "noir", DBG_NO_IR, "Don't print the LLVM IR"},
- { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
- { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
- { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" },
- { "checkir", DBG_CHECK_IR, "Enable additional sanity checks on shader IR" },
- { "nooptvariant", DBG_NO_OPT_VARIANT, "Disable compiling optimized shader variants." },
-
- { "testdma", DBG_TEST_DMA, "Invoke SDMA tests and exit." },
- { "testvmfaultcp", DBG_TEST_VMFAULT_CP, "Invoke a CP VM fault test and exit." },
- { "testvmfaultsdma", DBG_TEST_VMFAULT_SDMA, "Invoke a SDMA VM fault test and exit." },
- { "testvmfaultshader", DBG_TEST_VMFAULT_SHADER, "Invoke a shader VM fault test and exit." },
+ { "vs", DBG(VS), "Print vertex shaders" },
+ { "gs", DBG(GS), "Print geometry shaders" },
+ { "ps", DBG(PS), "Print pixel shaders" },
+ { "cs", DBG(CS), "Print compute shaders" },
+ { "tcs", DBG(TCS), "Print tessellation control shaders" },
+ { "tes", DBG(TES), "Print tessellation evaluation shaders" },
+ { "noir", DBG(NO_IR), "Don't print the LLVM IR"},
+ { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
+ { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"},
+ { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },
+ { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
+ { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." },
+
+ { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
+ { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
+ { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
+ { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
/* features */
- { "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
- { "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" },
+ { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" },
+ { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
/* GL uses the word INVALIDATE, gallium uses the word DISCARD */
- { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" },
- { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
- { "notiling", DBG_NO_TILING, "Disable tiling" },
- { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
- { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
- { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
- { "nowc", DBG_NO_WC, "Disable GTT write combining" },
- { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
- { "nodcc", DBG_NO_DCC, "Disable DCC." },
- { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
- { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." },
- { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
- { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
- { "ce", DBG_CE, "Force enable the constant engine" },
- { "noce", DBG_NO_CE, "Disable the constant engine"},
- { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" },
- { "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" },
+ { "noinvalrange", DBG(NO_DISCARD_RANGE), "Disable handling of INVALIDATE_RANGE map flags" },
+ { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
+ { "notiling", DBG(NO_TILING), "Disable tiling" },
+ { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." },
+ { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." },
+ { "precompile", DBG(PRECOMPILE), "Compile one shader variant at shader creation." },
+ { "nowc", DBG(NO_WC), "Disable GTT write combining" },
+ { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
+ { "nodcc", DBG(NO_DCC), "Disable DCC." },
+ { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
+ { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
+ { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
+ { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" },
+ { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" },
+ { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" },
+ { "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
+ { "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
+ { "dpbb", DBG(DPBB), "Enable DPBB." },
+ { "dfsm", DBG(DFSM), "Enable DFSM." },
+ { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -803,31 +875,6 @@
static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
{
switch (rscreen->info.family) {
- case CHIP_R600: return "AMD R600";
- case CHIP_RV610: return "AMD RV610";
- case CHIP_RV630: return "AMD RV630";
- case CHIP_RV670: return "AMD RV670";
- case CHIP_RV620: return "AMD RV620";
- case CHIP_RV635: return "AMD RV635";
- case CHIP_RS780: return "AMD RS780";
- case CHIP_RS880: return "AMD RS880";
- case CHIP_RV770: return "AMD RV770";
- case CHIP_RV730: return "AMD RV730";
- case CHIP_RV710: return "AMD RV710";
- case CHIP_RV740: return "AMD RV740";
- case CHIP_CEDAR: return "AMD CEDAR";
- case CHIP_REDWOOD: return "AMD REDWOOD";
- case CHIP_JUNIPER: return "AMD JUNIPER";
- case CHIP_CYPRESS: return "AMD CYPRESS";
- case CHIP_HEMLOCK: return "AMD HEMLOCK";
- case CHIP_PALM: return "AMD PALM";
- case CHIP_SUMO: return "AMD SUMO";
- case CHIP_SUMO2: return "AMD SUMO2";
- case CHIP_BARTS: return "AMD BARTS";
- case CHIP_TURKS: return "AMD TURKS";
- case CHIP_CAICOS: return "AMD CAICOS";
- case CHIP_CAYMAN: return "AMD CAYMAN";
- case CHIP_ARUBA: return "AMD ARUBA";
case CHIP_TAHITI: return "AMD TAHITI";
case CHIP_PITCAIRN: return "AMD PITCAIRN";
case CHIP_VERDE: return "AMD CAPE VERDE";
@@ -855,8 +902,7 @@
static void r600_disk_cache_create(struct r600_common_screen *rscreen)
{
/* Don't use the cache if shader dumping is enabled. */
- if (rscreen->debug_flags &
- (DBG_FS | DBG_VS | DBG_TCS | DBG_TES | DBG_GS | DBG_PS | DBG_CS))
+ if (rscreen->debug_flags & DBG_ALL_SHADERS)
return;
uint32_t mesa_timestamp;
@@ -864,24 +910,26 @@
&mesa_timestamp)) {
char *timestamp_str;
int res = -1;
- if (rscreen->chip_class < SI) {
- res = asprintf(×tamp_str, "%u",mesa_timestamp);
- }
-#if HAVE_LLVM
- else {
- uint32_t llvm_timestamp;
- if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
- &llvm_timestamp)) {
- res = asprintf(×tamp_str, "%u_%u",
- mesa_timestamp, llvm_timestamp);
- }
+ uint32_t llvm_timestamp;
+
+ if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
+ &llvm_timestamp)) {
+ res = asprintf(×tamp_str, "%u_%u",
+ mesa_timestamp, llvm_timestamp);
}
-#endif
+
if (res != -1) {
+ /* These flags affect shader compilation. */
+ uint64_t shader_debug_flags =
+ rscreen->debug_flags &
+ (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
+ DBG(SI_SCHED) |
+ DBG(UNSAFE_MATH));
+
rscreen->disk_shader_cache =
disk_cache_create(r600_get_family_name(rscreen),
timestamp_str,
- rscreen->debug_flags);
+ shader_debug_flags);
free(timestamp_str);
}
}
@@ -950,49 +998,9 @@
}
}
-const char *r600_get_llvm_processor_name(enum radeon_family family)
+const char *si_get_llvm_processor_name(enum radeon_family family)
{
switch (family) {
- case CHIP_R600:
- case CHIP_RV630:
- case CHIP_RV635:
- case CHIP_RV670:
- return "r600";
- case CHIP_RV610:
- case CHIP_RV620:
- case CHIP_RS780:
- case CHIP_RS880:
- return "rs880";
- case CHIP_RV710:
- return "rv710";
- case CHIP_RV730:
- return "rv730";
- case CHIP_RV740:
- case CHIP_RV770:
- return "rv770";
- case CHIP_PALM:
- case CHIP_CEDAR:
- return "cedar";
- case CHIP_SUMO:
- case CHIP_SUMO2:
- return "sumo";
- case CHIP_REDWOOD:
- return "redwood";
- case CHIP_JUNIPER:
- return "juniper";
- case CHIP_HEMLOCK:
- case CHIP_CYPRESS:
- return "cypress";
- case CHIP_BARTS:
- return "barts";
- case CHIP_TURKS:
- return "turks";
- case CHIP_CAICOS:
- return "caicos";
- case CHIP_CAYMAN:
- case CHIP_ARUBA:
- return "cayman";
-
case CHIP_TAHITI: return "tahiti";
case CHIP_PITCAIRN: return "pitcairn";
case CHIP_VERDE: return "verde";
@@ -1037,10 +1045,7 @@
/* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
* round number.
*/
- if (screen->chip_class >= SI)
- return 2048;
-
- return 256;
+ return 2048;
}
static int r600_get_compute_param(struct pipe_screen *screen,
@@ -1055,23 +1060,13 @@
case PIPE_COMPUTE_CAP_IR_TARGET: {
const char *gpu;
const char *triple;
- if (rscreen->family <= CHIP_ARUBA) {
- triple = "r600--";
- } else {
- if (HAVE_LLVM < 0x0400) {
- triple = "amdgcn--";
- } else {
- triple = "amdgcn-mesa-mesa3d";
- }
- }
- switch(rscreen->family) {
- /* Clang < 3.6 is missing Hainan in its list of
- * GPUs, so we need to use the name of a similar GPU.
- */
- default:
- gpu = r600_get_llvm_processor_name(rscreen->family);
- break;
- }
+
+ if (HAVE_LLVM < 0x0400)
+ triple = "amdgcn--";
+ else
+ triple = "amdgcn-mesa-mesa3d";
+
+ gpu = si_get_llvm_processor_name(rscreen->family);
if (ret) {
sprintf(ret, "%s-%s", gpu, triple);
}
@@ -1113,9 +1108,7 @@
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
if (ret) {
uint32_t *address_bits = ret;
- address_bits[0] = 32;
- if (rscreen->chip_class >= SI)
- address_bits[0] = 64;
+ address_bits[0] = 64;
}
return 1 * sizeof(uint32_t);
@@ -1189,14 +1182,13 @@
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
if (ret) {
uint32_t *subgroup_size = ret;
- *subgroup_size = r600_wavefront_size(rscreen->family);
+ *subgroup_size = 64;
}
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
if (ret) {
uint64_t *max_variable_threads_per_block = ret;
- if (rscreen->chip_class >= SI &&
- ir_type == PIPE_SHADER_IR_TGSI)
+ if (ir_type == PIPE_SHADER_IR_TGSI)
*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
else
*max_variable_threads_per_block = 0;
@@ -1320,18 +1312,18 @@
info->nr_device_memory_evictions = info->device_memory_evicted / 64;
}
-struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
- const struct pipe_resource *templ)
+struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
{
if (templ->target == PIPE_BUFFER) {
- return r600_buffer_create(screen, templ, 256);
+ return si_buffer_create(screen, templ, 256);
} else {
- return r600_texture_create(screen, templ);
+ return si_texture_create(screen, templ);
}
}
-bool r600_common_screen_init(struct r600_common_screen *rscreen,
- struct radeon_winsys *ws, unsigned flags)
+bool si_common_screen_init(struct r600_common_screen *rscreen,
+ struct radeon_winsys *ws)
{
char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
struct utsname uname_data;
@@ -1341,7 +1333,8 @@
rscreen->ws = ws;
if ((chip_name = r600_get_marketing_name(ws)))
- snprintf(family_name, sizeof(family_name), "%s / ", r600_get_family_name(rscreen));
+ snprintf(family_name, sizeof(family_name), "%s / ",
+ r600_get_family_name(rscreen) + 4);
else
chip_name = r600_get_family_name(rscreen);
@@ -1371,31 +1364,27 @@
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
- rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
+ rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
rscreen->b.query_memory_info = r600_query_memory_info;
+ rscreen->b.fence_get_fd = r600_fence_get_fd;
if (rscreen->info.has_hw_decode) {
- rscreen->b.get_video_param = rvid_get_video_param;
- rscreen->b.is_video_format_supported = rvid_is_format_supported;
+ rscreen->b.get_video_param = si_vid_get_video_param;
+ rscreen->b.is_video_format_supported = si_vid_is_format_supported;
} else {
rscreen->b.get_video_param = r600_get_video_param;
rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
}
- r600_init_screen_texture_functions(rscreen);
- r600_init_screen_query_functions(rscreen);
+ si_init_screen_texture_functions(rscreen);
+ si_init_screen_query_functions(rscreen);
rscreen->family = rscreen->info.family;
rscreen->chip_class = rscreen->info.chip_class;
- rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
+ rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
rscreen->has_rbplus = false;
rscreen->rbplus_allowed = false;
- /* Set the flag in debug_flags, so that the shader cache takes it
- * into account. */
- if (flags & PIPE_SCREEN_ENABLE_CORRECT_TGSI_DERIVATIVES_AFTER_KILL)
- rscreen->debug_flags |= DBG_FS_CORRECT_DERIVS_AFTER_KILL;
-
r600_disk_cache_create(rscreen);
slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
@@ -1407,33 +1396,47 @@
1 << util_logbase2(rscreen->force_aniso));
}
- util_format_s3tc_init();
(void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
(void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
- if (rscreen->debug_flags & DBG_INFO) {
+ if (rscreen->debug_flags & DBG(INFO)) {
+ printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
+ rscreen->info.pci_domain, rscreen->info.pci_bus,
+ rscreen->info.pci_dev, rscreen->info.pci_func);
printf("pci_id = 0x%x\n", rscreen->info.pci_id);
printf("family = %i (%s)\n", rscreen->info.family,
r600_get_family_name(rscreen));
printf("chip_class = %i\n", rscreen->info.chip_class);
+ printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
+ printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
printf("max_alloc_size = %i MB\n",
(int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
+ printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
+ printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
+ printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
- printf("has_hw_decode = %i\n", rscreen->info.has_hw_decode);
+ printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
+ printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
+ printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
+ printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
+ printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
- printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
+ printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
+ printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
printf("has_userptr = %i\n", rscreen->info.has_userptr);
+ printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
+ printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
@@ -1448,14 +1451,15 @@
printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
+ printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
}
return true;
}
-void r600_destroy_common_screen(struct r600_common_screen *rscreen)
+void si_destroy_common_screen(struct r600_common_screen *rscreen)
{
- r600_perfcounters_destroy(rscreen);
- r600_gpu_load_kill_thread(rscreen);
+ si_perfcounters_destroy(rscreen);
+ si_gpu_load_kill_thread(rscreen);
mtx_destroy(&rscreen->gpu_load_mutex);
mtx_destroy(&rscreen->aux_context_lock);
@@ -1468,35 +1472,20 @@
FREE(rscreen);
}
-bool r600_can_dump_shader(struct r600_common_screen *rscreen,
- unsigned processor)
+bool si_can_dump_shader(struct r600_common_screen *rscreen,
+ unsigned processor)
{
- switch (processor) {
- case PIPE_SHADER_VERTEX:
- return (rscreen->debug_flags & DBG_VS) != 0;
- case PIPE_SHADER_TESS_CTRL:
- return (rscreen->debug_flags & DBG_TCS) != 0;
- case PIPE_SHADER_TESS_EVAL:
- return (rscreen->debug_flags & DBG_TES) != 0;
- case PIPE_SHADER_GEOMETRY:
- return (rscreen->debug_flags & DBG_GS) != 0;
- case PIPE_SHADER_FRAGMENT:
- return (rscreen->debug_flags & DBG_PS) != 0;
- case PIPE_SHADER_COMPUTE:
- return (rscreen->debug_flags & DBG_CS) != 0;
- default:
- return false;
- }
+ return rscreen->debug_flags & (1 << processor);
}
-bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
+bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
{
- return (rscreen->debug_flags & DBG_CHECK_IR) ||
- r600_can_dump_shader(rscreen, processor);
+ return (rscreen->debug_flags & DBG(CHECK_IR)) ||
+ si_can_dump_shader(rscreen, processor);
}
-void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- uint64_t offset, uint64_t size, unsigned value)
+void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_pipe_common.h mesa-17.3.3/src/gallium/drivers/radeon/r600_pipe_common.h
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_pipe_common.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_pipe_common.h 2018-01-18 21:30:28.000000000 +0000
@@ -47,6 +47,8 @@
#include "util/u_transfer.h"
#include "util/u_threaded_context.h"
+struct u_log_context;
+
#define ATI_VENDOR_ID 0x1002
#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
@@ -59,62 +61,79 @@
/* Pipeline & streamout query controls. */
#define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
#define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
-#define R600_CONTEXT_PRIVATE_FLAG (1u << 3)
+#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
+#define R600_CONTEXT_PRIVATE_FLAG (1u << 4)
/* special primitive types */
#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
+#define R600_NOT_QUERY 0xffffffff
+
/* Debug flags. */
-/* logging and features */
-#define DBG_TEX (1 << 0)
-/* gap - reuse */
-#define DBG_COMPUTE (1 << 2)
-#define DBG_VM (1 << 3)
-#define DBG_CE (1 << 4)
-/* shader logging */
-#define DBG_FS (1 << 5)
-#define DBG_VS (1 << 6)
-#define DBG_GS (1 << 7)
-#define DBG_PS (1 << 8)
-#define DBG_CS (1 << 9)
-#define DBG_TCS (1 << 10)
-#define DBG_TES (1 << 11)
-#define DBG_NO_IR (1 << 12)
-#define DBG_NO_TGSI (1 << 13)
-#define DBG_NO_ASM (1 << 14)
-#define DBG_PREOPT_IR (1 << 15)
-#define DBG_CHECK_IR (1 << 16)
-#define DBG_NO_OPT_VARIANT (1 << 17)
-#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18)
-/* gaps */
-#define DBG_TEST_DMA (1 << 20)
-/* Bits 21-31 are reserved for the r600g driver. */
-/* features */
-#define DBG_NO_ASYNC_DMA (1ull << 32)
-#define DBG_NO_HYPERZ (1ull << 33)
-#define DBG_NO_DISCARD_RANGE (1ull << 34)
-#define DBG_NO_2D_TILING (1ull << 35)
-#define DBG_NO_TILING (1ull << 36)
-#define DBG_SWITCH_ON_EOP (1ull << 37)
-#define DBG_FORCE_DMA (1ull << 38)
-#define DBG_PRECOMPILE (1ull << 39)
-#define DBG_INFO (1ull << 40)
-#define DBG_NO_WC (1ull << 41)
-#define DBG_CHECK_VM (1ull << 42)
-#define DBG_NO_DCC (1ull << 43)
-#define DBG_NO_DCC_CLEAR (1ull << 44)
-#define DBG_NO_RB_PLUS (1ull << 45)
-#define DBG_SI_SCHED (1ull << 46)
-#define DBG_MONOLITHIC_SHADERS (1ull << 47)
-#define DBG_NO_CE (1ull << 48)
-#define DBG_UNSAFE_MATH (1ull << 49)
-#define DBG_NO_DCC_FB (1ull << 50)
-#define DBG_TEST_VMFAULT_CP (1ull << 51)
-#define DBG_TEST_VMFAULT_SDMA (1ull << 52)
-#define DBG_TEST_VMFAULT_SHADER (1ull << 53)
+enum {
+ /* Shader logging options: */
+ DBG_VS = PIPE_SHADER_VERTEX,
+ DBG_PS = PIPE_SHADER_FRAGMENT,
+ DBG_GS = PIPE_SHADER_GEOMETRY,
+ DBG_TCS = PIPE_SHADER_TESS_CTRL,
+ DBG_TES = PIPE_SHADER_TESS_EVAL,
+ DBG_CS = PIPE_SHADER_COMPUTE,
+ DBG_NO_IR,
+ DBG_NO_TGSI,
+ DBG_NO_ASM,
+ DBG_PREOPT_IR,
+
+ /* Shader compiler options the shader cache should be aware of: */
+ DBG_FS_CORRECT_DERIVS_AFTER_KILL,
+ DBG_UNSAFE_MATH,
+ DBG_SI_SCHED,
+
+ /* Shader compiler options (with no effect on the shader cache): */
+ DBG_CHECK_IR,
+ DBG_PRECOMPILE,
+ DBG_NIR,
+ DBG_MONOLITHIC_SHADERS,
+ DBG_NO_OPT_VARIANT,
+
+ /* Information logging options: */
+ DBG_INFO,
+ DBG_TEX,
+ DBG_COMPUTE,
+ DBG_VM,
+
+ /* Driver options: */
+ DBG_FORCE_DMA,
+ DBG_NO_ASYNC_DMA,
+ DBG_NO_DISCARD_RANGE,
+ DBG_NO_WC,
+ DBG_CHECK_VM,
+
+ /* 3D engine options: */
+ DBG_SWITCH_ON_EOP,
+ DBG_NO_OUT_OF_ORDER,
+ DBG_NO_DPBB,
+ DBG_NO_DFSM,
+ DBG_DPBB,
+ DBG_DFSM,
+ DBG_NO_HYPERZ,
+ DBG_NO_RB_PLUS,
+ DBG_NO_2D_TILING,
+ DBG_NO_TILING,
+ DBG_NO_DCC,
+ DBG_NO_DCC_CLEAR,
+ DBG_NO_DCC_FB,
+
+ /* Tests: */
+ DBG_TEST_DMA,
+ DBG_TEST_VMFAULT_CP,
+ DBG_TEST_VMFAULT_SDMA,
+ DBG_TEST_VMFAULT_SHADER,
+};
+
+#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))
+#define DBG(name) (1ull << DBG_##name)
#define R600_MAP_BUFFER_ALIGNMENT 64
-#define R600_MAX_VIEWPORTS 16
#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
@@ -135,8 +154,8 @@
struct tgsi_shader_info;
struct r600_qbo_state;
-void radeon_shader_binary_init(struct ac_shader_binary *b);
-void radeon_shader_binary_clean(struct ac_shader_binary *b);
+void si_radeon_shader_binary_init(struct ac_shader_binary *b);
+void si_radeon_shader_binary_clean(struct ac_shader_binary *b);
/* Only 32-bit buffer allocations are supported, gallium doesn't support more
* at the moment.
@@ -202,6 +221,7 @@
unsigned bank_height;
unsigned slice_tile_max;
unsigned tile_mode_index;
+ unsigned tile_swizzle;
};
struct r600_cmask_info {
@@ -371,13 +391,19 @@
struct r600_mmio_counter meq;
struct r600_mmio_counter me;
struct r600_mmio_counter surf_sync;
- struct r600_mmio_counter dma;
+ struct r600_mmio_counter cp_dma;
struct r600_mmio_counter scratch_ram;
- struct r600_mmio_counter ce;
} named;
unsigned array[0];
};
+struct r600_memory_object {
+ struct pipe_memory_object b;
+ struct pb_buffer *buf;
+ uint32_t stride;
+ uint32_t offset;
+};
+
struct r600_common_screen {
struct pipe_screen b;
struct radeon_winsys *ws;
@@ -444,6 +470,11 @@
*/
unsigned cp_to_L2;
+ /* Context flags to set so that all writes from earlier jobs
+ * that end in L2 are seen by CP.
+ */
+ unsigned L2_to_cp;
+
/* Context flags to set so that all writes from earlier
* compute jobs are seen by L2 clients.
*/
@@ -463,69 +494,9 @@
* command stream. */
struct r600_atom {
void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
- unsigned num_dw;
unsigned short id;
};
-struct r600_so_target {
- struct pipe_stream_output_target b;
-
- /* The buffer where BUFFER_FILLED_SIZE is stored. */
- struct r600_resource *buf_filled_size;
- unsigned buf_filled_size_offset;
- bool buf_filled_size_valid;
-
- unsigned stride_in_dw;
-};
-
-struct r600_streamout {
- struct r600_atom begin_atom;
- bool begin_emitted;
- unsigned num_dw_for_end;
-
- unsigned enabled_mask;
- unsigned num_targets;
- struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS];
-
- unsigned append_bitmask;
- bool suspended;
-
- /* External state which comes from the vertex shader,
- * it must be set explicitly when binding a shader. */
- uint16_t *stride_in_dw;
- unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
-
- /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
- unsigned hw_enabled_mask;
-
- /* The state of VGT_STRMOUT_(CONFIG|EN). */
- struct r600_atom enable_atom;
- bool streamout_enabled;
- bool prims_gen_query_enabled;
- int num_prims_gen_queries;
-};
-
-struct r600_signed_scissor {
- int minx;
- int miny;
- int maxx;
- int maxy;
-};
-
-struct r600_scissors {
- struct r600_atom atom;
- unsigned dirty_mask;
- struct pipe_scissor_state states[R600_MAX_VIEWPORTS];
-};
-
-struct r600_viewports {
- struct r600_atom atom;
- unsigned dirty_mask;
- unsigned depth_range_dirty_mask;
- struct pipe_viewport_state states[R600_MAX_VIEWPORTS];
- struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS];
-};
-
struct r600_ring {
struct radeon_winsys_cs *cs;
void (*flush)(void *ctx, unsigned flags,
@@ -553,6 +524,7 @@
struct r600_ring dma;
struct pipe_fence_handle *last_gfx_fence;
struct pipe_fence_handle *last_sdma_fence;
+ struct r600_resource *eop_bug_scratch;
unsigned num_gfx_cs_flushes;
unsigned initial_gfx_cs_size;
unsigned gpu_reset_counter;
@@ -569,18 +541,8 @@
uint64_t vram;
uint64_t gtt;
- /* States. */
- struct r600_streamout streamout;
- struct r600_scissors scissors;
- struct r600_viewports viewports;
- bool scissor_enabled;
- bool clip_halfz;
- bool vs_writes_viewport_index;
- bool vs_disables_clipping_viewport;
-
/* Additional context states. */
unsigned flags; /* flush flags */
- enum pipe_prim_type current_rast_prim; /* primitive type after TES, GS */
/* Queries. */
/* Maintain the list of active queries for pausing between IBs. */
@@ -590,6 +552,8 @@
unsigned num_cs_dw_queries_suspend;
/* Misc stats. */
unsigned num_draw_calls;
+ unsigned num_decompress_calls;
+ unsigned num_mrt_draw_calls;
unsigned num_prim_restart_calls;
unsigned num_spill_draw_calls;
unsigned num_compute_calls;
@@ -614,15 +578,6 @@
bool render_cond_invert;
bool render_cond_force_off; /* for u_blitter */
- /* MSAA sample locations.
- * The first index is the sample index.
- * The second index is the coordinate: X, Y. */
- float sample_locations_1x[1][2];
- float sample_locations_2x[2][2];
- float sample_locations_4x[4][2];
- float sample_locations_8x[8][2];
- float sample_locations_16x[16][2];
-
/* Statistics gathering for the DCC enablement heuristic. It can't be
* in r600_texture because r600_texture can be shared by multiple
* contexts. This is for back buffers only. We shouldn't get too many
@@ -644,6 +599,7 @@
struct pipe_debug_callback debug;
struct pipe_device_reset_callback device_reset_callback;
+ struct u_log_context *log;
void *query_result_shader;
@@ -684,7 +640,9 @@
uint64_t old_gpu_address);
/* Enable or disable occlusion queries. */
- void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable);
+ void (*set_occlusion_query_state)(struct pipe_context *ctx,
+ bool old_enable,
+ bool old_perfect_enable);
void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
@@ -701,129 +659,109 @@
};
/* r600_buffer_common.c */
-bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
- struct pb_buffer *buf,
- enum radeon_bo_usage usage);
-void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
- struct r600_resource *resource,
- unsigned usage);
-void r600_buffer_subdata(struct pipe_context *ctx,
- struct pipe_resource *buffer,
- unsigned usage, unsigned offset,
- unsigned size, const void *data);
-void r600_init_resource_fields(struct r600_common_screen *rscreen,
- struct r600_resource *res,
- uint64_t size, unsigned alignment);
-bool r600_alloc_resource(struct r600_common_screen *rscreen,
- struct r600_resource *res);
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- unsigned alignment);
-struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
- unsigned flags,
- unsigned usage,
- unsigned size,
- unsigned alignment);
+bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
+ struct pb_buffer *buf,
+ enum radeon_bo_usage usage);
+void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+ struct r600_resource *resource,
+ unsigned usage);
+void si_buffer_subdata(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned usage, unsigned offset,
+ unsigned size, const void *data);
+void si_init_resource_fields(struct r600_common_screen *rscreen,
+ struct r600_resource *res,
+ uint64_t size, unsigned alignment);
+bool si_alloc_resource(struct r600_common_screen *rscreen,
+ struct r600_resource *res);
+struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ unsigned alignment);
+struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
+ unsigned flags,
+ unsigned usage,
+ unsigned size,
+ unsigned alignment);
struct pipe_resource *
-r600_buffer_from_user_memory(struct pipe_screen *screen,
- const struct pipe_resource *templ,
- void *user_memory);
-void
-r600_invalidate_resource(struct pipe_context *ctx,
- struct pipe_resource *resource);
-void r600_replace_buffer_storage(struct pipe_context *ctx,
- struct pipe_resource *dst,
- struct pipe_resource *src);
+si_buffer_from_user_memory(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ void *user_memory);
+void si_invalidate_resource(struct pipe_context *ctx,
+ struct pipe_resource *resource);
+void si_replace_buffer_storage(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ struct pipe_resource *src);
/* r600_common_pipe.c */
-void r600_gfx_write_event_eop(struct r600_common_context *ctx,
- unsigned event, unsigned event_flags,
- unsigned data_sel,
- struct r600_resource *buf, uint64_t va,
- uint32_t old_fence, uint32_t new_fence);
-unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
-void r600_gfx_wait_fence(struct r600_common_context *ctx,
- uint64_t va, uint32_t ref, uint32_t mask);
-void r600_draw_rectangle(struct blitter_context *blitter,
- int x1, int y1, int x2, int y2, float depth,
- enum blitter_attrib_type type,
- const union pipe_color_union *attrib);
-bool r600_common_screen_init(struct r600_common_screen *rscreen,
- struct radeon_winsys *ws, unsigned flags);
-void r600_destroy_common_screen(struct r600_common_screen *rscreen);
-void r600_preflush_suspend_features(struct r600_common_context *ctx);
-void r600_postflush_resume_features(struct r600_common_context *ctx);
-bool r600_common_context_init(struct r600_common_context *rctx,
- struct r600_common_screen *rscreen,
- unsigned context_flags);
-void r600_common_context_cleanup(struct r600_common_context *rctx);
-bool r600_can_dump_shader(struct r600_common_screen *rscreen,
- unsigned processor);
-bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
- unsigned processor);
-void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- uint64_t offset, uint64_t size, unsigned value);
-struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
- const struct pipe_resource *templ);
-const char *r600_get_llvm_processor_name(enum radeon_family family);
-void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
- struct r600_resource *dst, struct r600_resource *src);
-void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
- struct radeon_saved_cs *saved);
-void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
-bool r600_check_device_reset(struct r600_common_context *rctx);
+void si_gfx_write_event_eop(struct r600_common_context *ctx,
+ unsigned event, unsigned event_flags,
+ unsigned data_sel,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t new_fence, unsigned query_type);
+unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen);
+void si_gfx_wait_fence(struct r600_common_context *ctx,
+ uint64_t va, uint32_t ref, uint32_t mask);
+bool si_common_screen_init(struct r600_common_screen *rscreen,
+ struct radeon_winsys *ws);
+void si_destroy_common_screen(struct r600_common_screen *rscreen);
+void si_preflush_suspend_features(struct r600_common_context *ctx);
+void si_postflush_resume_features(struct r600_common_context *ctx);
+bool si_common_context_init(struct r600_common_context *rctx,
+ struct r600_common_screen *rscreen,
+ unsigned context_flags);
+void si_common_context_cleanup(struct r600_common_context *rctx);
+bool si_can_dump_shader(struct r600_common_screen *rscreen,
+ unsigned processor);
+bool si_extra_shader_checks(struct r600_common_screen *rscreen,
+ unsigned processor);
+void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
+ uint64_t offset, uint64_t size, unsigned value);
+struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
+ const struct pipe_resource *templ);
+const char *si_get_llvm_processor_name(enum radeon_family family);
+void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+ struct r600_resource *dst, struct r600_resource *src);
+void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
+ struct radeon_saved_cs *saved, bool get_buffer_list);
+void si_clear_saved_cs(struct radeon_saved_cs *saved);
+bool si_check_device_reset(struct r600_common_context *rctx);
/* r600_gpu_load.c */
-void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
-uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
-unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
- uint64_t begin);
+void si_gpu_load_kill_thread(struct r600_common_screen *rscreen);
+uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type);
+unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
+ uint64_t begin);
/* r600_perfcounters.c */
-void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
+void si_perfcounters_destroy(struct r600_common_screen *rscreen);
/* r600_query.c */
-void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
-void r600_query_init(struct r600_common_context *rctx);
-void r600_suspend_queries(struct r600_common_context *ctx);
-void r600_resume_queries(struct r600_common_context *ctx);
-void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
-
-/* r600_streamout.c */
-void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
-void r600_set_streamout_targets(struct pipe_context *ctx,
- unsigned num_targets,
- struct pipe_stream_output_target **targets,
- const unsigned *offset);
-void r600_emit_streamout_end(struct r600_common_context *rctx);
-void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
- unsigned type, int diff);
-void r600_streamout_init(struct r600_common_context *rctx);
+void si_init_screen_query_functions(struct r600_common_screen *rscreen);
+void si_init_query_functions(struct r600_common_context *rctx);
+void si_suspend_queries(struct r600_common_context *ctx);
+void si_resume_queries(struct r600_common_context *ctx);
/* r600_test_dma.c */
-void r600_test_dma(struct r600_common_screen *rscreen);
+void si_test_dma(struct r600_common_screen *rscreen);
/* r600_texture.c */
-bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
- struct r600_texture *rdst,
- unsigned dst_level, unsigned dstx,
- unsigned dsty, unsigned dstz,
- struct r600_texture *rsrc,
- unsigned src_level,
- const struct pipe_box *src_box);
-void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- unsigned nr_samples,
- struct r600_fmask_info *out);
-void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- struct r600_cmask_info *out);
-bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
- struct pipe_resource *texture,
- struct r600_texture **staging);
-void r600_print_texture_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex, FILE *f);
-struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
+ struct r600_texture *rdst,
+ unsigned dst_level, unsigned dstx,
+ unsigned dsty, unsigned dstz,
+ struct r600_texture *rsrc,
+ unsigned src_level,
+ const struct pipe_box *src_box);
+void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned nr_samples,
+ struct r600_fmask_info *out);
+bool si_init_flushed_depth_texture(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct r600_texture **staging);
+void si_print_texture_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, struct u_log_context *log);
+struct pipe_resource *si_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
bool vi_dcc_formats_compatible(enum pipe_format format1,
enum pipe_format format2);
@@ -834,12 +772,12 @@
struct pipe_resource *tex,
unsigned level,
enum pipe_format view_format);
-struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_surface *templ,
- unsigned width0, unsigned height0,
- unsigned width, unsigned height);
-unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
+struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_surface *templ,
+ unsigned width0, unsigned height0,
+ unsigned width, unsigned height);
+unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
void vi_separate_dcc_start_query(struct pipe_context *ctx,
struct r600_texture *tex);
void vi_separate_dcc_stop_query(struct pipe_context *ctx,
@@ -849,37 +787,15 @@
void vi_dcc_clear_level(struct r600_common_context *rctx,
struct r600_texture *rtex,
unsigned level, unsigned clear_value);
-void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
- struct pipe_framebuffer_state *fb,
- struct r600_atom *fb_state,
- unsigned *buffers, ubyte *dirty_cbufs,
- const union pipe_color_union *color);
-bool r600_texture_disable_dcc(struct r600_common_context *rctx,
- struct r600_texture *rtex);
-void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
-void r600_init_context_texture_functions(struct r600_common_context *rctx);
-
-/* r600_viewport.c */
-void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
- struct pipe_scissor_state *scissor);
-void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
- bool scissor_enable, bool clip_halfz);
-void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
- struct tgsi_shader_info *info);
-void r600_init_viewport_functions(struct r600_common_context *rctx);
-
-/* cayman_msaa.c */
-extern const uint32_t eg_sample_locs_2x[4];
-extern const unsigned eg_max_dist_2x;
-extern const uint32_t eg_sample_locs_4x[4];
-extern const unsigned eg_max_dist_4x;
-void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
- unsigned sample_index, float *out_value);
-void cayman_init_msaa(struct pipe_context *ctx);
-void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
-void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
- int ps_iter_samples, int overrast_samples,
- unsigned sc_mode_cntl_1);
+void si_do_fast_color_clear(struct r600_common_context *rctx,
+ struct pipe_framebuffer_state *fb,
+ struct r600_atom *fb_state,
+ unsigned *buffers, ubyte *dirty_cbufs,
+ const union pipe_color_union *color);
+bool si_texture_disable_dcc(struct r600_common_context *rctx,
+ struct r600_texture *rtex);
+void si_init_screen_texture_functions(struct r600_common_screen *rscreen);
+void si_init_context_texture_functions(struct r600_common_context *rctx);
/* Inline helpers. */
@@ -915,12 +831,6 @@
}
}
-static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
-{
- return rctx->streamout.streamout_enabled ||
- rctx->streamout.prims_gen_query_enabled;
-}
-
#define SQ_TEX_XY_FILTER_POINT 0x00
#define SQ_TEX_XY_FILTER_BILINEAR 0x01
#define SQ_TEX_XY_FILTER_ANISO_POINT 0x02
@@ -949,26 +859,6 @@
return 4;
}
-static inline unsigned r600_wavefront_size(enum radeon_family family)
-{
- switch (family) {
- case CHIP_RV610:
- case CHIP_RS780:
- case CHIP_RV620:
- case CHIP_RS880:
- return 16;
- case CHIP_RV630:
- case CHIP_RV635:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_PALM:
- case CHIP_CEDAR:
- return 32;
- default:
- return 64;
- }
-}
-
static inline enum radeon_bo_priority
r600_get_sampler_view_priority(struct r600_resource *res)
{
@@ -994,21 +884,27 @@
return tex->dcc_offset && level < tex->surface.num_dcc_levels;
}
+static inline bool
+r600_htile_enabled(struct r600_texture *tex, unsigned level)
+{
+ return tex->htile_offset && level == 0;
+}
+
+static inline bool
+vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level)
+{
+ assert(!tex->tc_compatible_htile || tex->htile_offset);
+ return tex->tc_compatible_htile && level == 0;
+}
+
#define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
- if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
+ if ((rscreen->b.debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \
} while (0);
#define R600_ERR(fmt, args...) \
fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
-/* For MSAA sample positions. */
-#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
- (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
- (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \
- (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
- (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
-
static inline int S_FIXED(float value, unsigned frac_bits)
{
return value * (1 << frac_bits);
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_query.c mesa-17.3.3/src/gallium/drivers/radeon/r600_query.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,6 +28,13 @@
#include "util/u_upload_mgr.h"
#include "os/os_time.h"
#include "tgsi/tgsi_text.h"
+#include "amd/common/sid.h"
+
+/* TODO: remove this: */
+void si_update_prims_generated_query_state(struct r600_common_context *rctx,
+ unsigned type, int diff);
+
+#define R600_MAX_STREAMS 4
struct r600_hw_query_params {
unsigned start_offset;
@@ -72,6 +79,7 @@
case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER;
+ case R600_QUERY_GFX_IB_SIZE: return RADEON_GFX_IB_SIZE_COUNTER;
case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS;
@@ -99,6 +107,12 @@
case R600_QUERY_DRAW_CALLS:
query->begin_result = rctx->num_draw_calls;
break;
+ case R600_QUERY_DECOMPRESS_CALLS:
+ query->begin_result = rctx->num_decompress_calls;
+ break;
+ case R600_QUERY_MRT_DRAW_CALLS:
+ query->begin_result = rctx->num_mrt_draw_calls;
+ break;
case R600_QUERY_PRIM_RESTART_CALLS:
query->begin_result = rctx->num_prim_restart_calls;
break;
@@ -165,6 +179,7 @@
query->begin_result = 0;
break;
case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_GFX_IB_SIZE:
case R600_QUERY_NUM_GFX_IBS:
case R600_QUERY_NUM_SDMA_IBS:
case R600_QUERY_NUM_BYTES_MOVED:
@@ -209,10 +224,9 @@
case R600_QUERY_GPU_MEQ_BUSY:
case R600_QUERY_GPU_ME_BUSY:
case R600_QUERY_GPU_SURF_SYNC_BUSY:
- case R600_QUERY_GPU_DMA_BUSY:
+ case R600_QUERY_GPU_CP_DMA_BUSY:
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
- case R600_QUERY_GPU_CE_BUSY:
- query->begin_result = r600_begin_counter(rctx->screen,
+ query->begin_result = si_begin_counter(rctx->screen,
query->b.type);
break;
case R600_QUERY_NUM_COMPILATIONS:
@@ -253,6 +267,12 @@
case R600_QUERY_DRAW_CALLS:
query->end_result = rctx->num_draw_calls;
break;
+ case R600_QUERY_DECOMPRESS_CALLS:
+ query->end_result = rctx->num_decompress_calls;
+ break;
+ case R600_QUERY_MRT_DRAW_CALLS:
+ query->end_result = rctx->num_mrt_draw_calls;
+ break;
case R600_QUERY_PRIM_RESTART_CALLS:
query->end_result = rctx->num_prim_restart_calls;
break;
@@ -315,6 +335,7 @@
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BUFFER_WAIT_TIME:
+ case R600_QUERY_GFX_IB_SIZE:
case R600_QUERY_NUM_MAPPED_BUFFERS:
case R600_QUERY_NUM_GFX_IBS:
case R600_QUERY_NUM_SDMA_IBS:
@@ -360,10 +381,9 @@
case R600_QUERY_GPU_MEQ_BUSY:
case R600_QUERY_GPU_ME_BUSY:
case R600_QUERY_GPU_SURF_SYNC_BUSY:
- case R600_QUERY_GPU_DMA_BUSY:
+ case R600_QUERY_GPU_CP_DMA_BUSY:
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
- case R600_QUERY_GPU_CE_BUSY:
- query->end_result = r600_end_counter(rctx->screen,
+ query->end_result = si_end_counter(rctx->screen,
query->b.type,
query->begin_result);
query->begin_result = 0;
@@ -482,8 +502,8 @@
return (struct pipe_query *)query;
}
-void r600_query_hw_destroy(struct r600_common_screen *rscreen,
- struct r600_query *rquery)
+void si_query_hw_destroy(struct r600_common_screen *rscreen,
+ struct r600_query *rquery)
{
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *prev = query->buffer.previous;
@@ -497,6 +517,7 @@
}
r600_resource_reference(&query->buffer.buf, NULL);
+ r600_resource_reference(&query->workaround_buf, NULL);
FREE(rquery);
}
@@ -538,7 +559,8 @@
memset(results, 0, buffer->b.b.width0);
if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
- query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
unsigned max_rbs = rscreen->info.num_render_backends;
unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
unsigned num_results;
@@ -569,10 +591,10 @@
unsigned offset);
static struct r600_query_ops query_hw_ops = {
- .destroy = r600_query_hw_destroy,
- .begin = r600_query_hw_begin,
- .end = r600_query_hw_end,
- .get_result = r600_query_hw_get_result,
+ .destroy = si_query_hw_destroy,
+ .begin = si_query_hw_begin,
+ .end = si_query_hw_end,
+ .get_result = si_query_hw_get_result,
.get_result_resource = r600_query_hw_get_result_resource,
};
@@ -598,8 +620,8 @@
.add_result = r600_query_hw_add_result,
};
-bool r600_query_hw_init(struct r600_common_screen *rscreen,
- struct r600_query_hw *query)
+bool si_query_hw_init(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query)
{
query->buffer.buf = r600_new_query_buffer(rscreen, query);
if (!query->buffer.buf)
@@ -623,19 +645,20 @@
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
query->result_size = 16 * rscreen->info.num_render_backends;
query->result_size += 16; /* for the fence + alignment */
query->num_cs_dw_begin = 6;
- query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
+ query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen);
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 24;
query->num_cs_dw_begin = 8;
- query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
+ query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen);
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 16;
- query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
+ query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen);
query->flags = R600_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -648,12 +671,18 @@
query->num_cs_dw_end = 6;
query->stream = index;
break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32 * R600_MAX_STREAMS;
+ query->num_cs_dw_begin = 6 * R600_MAX_STREAMS;
+ query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
+ break;
case PIPE_QUERY_PIPELINE_STATISTICS:
- /* 11 values on EG, 8 on R600. */
- query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
+ /* 11 values on GCN. */
+ query->result_size = 11 * 16;
query->result_size += 8; /* for the fence + alignment */
query->num_cs_dw_begin = 6;
- query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
+ query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen);
break;
default:
assert(0);
@@ -661,7 +690,7 @@
return NULL;
}
- if (!r600_query_hw_init(rscreen, query)) {
+ if (!si_query_hw_init(rscreen, query)) {
FREE(query);
return NULL;
}
@@ -673,7 +702,8 @@
unsigned type, int diff)
{
if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
- type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+ type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
bool old_enable = rctx->num_occlusion_queries != 0;
bool old_perfect_enable =
rctx->num_perfect_occlusion_queries != 0;
@@ -682,7 +712,7 @@
rctx->num_occlusion_queries += diff;
assert(rctx->num_occlusion_queries >= 0);
- if (type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ if (type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
rctx->num_perfect_occlusion_queries += diff;
assert(rctx->num_perfect_occlusion_queries >= 0);
}
@@ -691,22 +721,32 @@
perfect_enable = rctx->num_perfect_occlusion_queries != 0;
if (enable != old_enable || perfect_enable != old_perfect_enable) {
- rctx->set_occlusion_query_state(&rctx->b, enable);
+ rctx->set_occlusion_query_state(&rctx->b, old_enable,
+ old_perfect_enable);
}
}
}
-static unsigned event_type_for_stream(struct r600_query_hw *query)
+static unsigned event_type_for_stream(unsigned stream)
{
- switch (query->stream) {
+ switch (stream) {
default:
- case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
- case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
- case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
- case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
+ case 0: return V_028A90_SAMPLE_STREAMOUTSTATS;
+ case 1: return V_028A90_SAMPLE_STREAMOUTSTATS1;
+ case 2: return V_028A90_SAMPLE_STREAMOUTSTATS2;
+ case 3: return V_028A90_SAMPLE_STREAMOUTSTATS3;
}
}
+static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va,
+ unsigned stream)
+{
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+}
+
static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
struct r600_query_hw *query,
struct r600_resource *buffer,
@@ -717,8 +757,9 @@
switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
break;
@@ -726,43 +767,36 @@
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ emit_sample_streamout(cs, va, query->stream);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
+ emit_sample_streamout(cs, va + 32 * stream, stream);
break;
case PIPE_QUERY_TIME_ELAPSED:
- if (ctx->chip_class >= SI) {
- /* Write the timestamp from the CP not waiting for
- * outstanding draws (top-of-pipe).
- */
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_COUNT_SEL |
- COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- } else {
- /* Write the timestamp after the last draw is done.
- * (bottom-of-pipe)
- */
- r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
- 0, 3, NULL, va, 0, 0);
- }
+ /* Write the timestamp from the CP not waiting for
+ * outstanding draws (top-of-pipe).
+ */
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_COUNT_SEL |
+ COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
+ COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
break;
default:
assert(0);
}
- r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_QUERY);
+ radeon_add_to_buffer_list(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
}
static void r600_query_hw_emit_start(struct r600_common_context *ctx,
@@ -774,7 +808,7 @@
return; // previous buffer allocation failure
r600_update_occlusion_query_state(ctx, query->b.type, 1);
- r600_update_prims_generated_query_state(ctx, query->b.type, 1);
+ si_update_prims_generated_query_state(ctx, query->b.type, 1);
ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
true);
@@ -809,9 +843,10 @@
switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
va += 8;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
@@ -821,18 +856,21 @@
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- va += query->result_size/2;
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ va += 16;
+ emit_sample_streamout(cs, va, query->stream);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ va += 16;
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
+ emit_sample_streamout(cs, va + 32 * stream, stream);
break;
case PIPE_QUERY_TIME_ELAPSED:
va += 8;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
- r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
- 0, 3, NULL, va, 0, 0);
+ si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
+ 0, query->b.type);
fence_va = va + 8;
break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
@@ -840,7 +878,7 @@
va += sample_size;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
@@ -850,12 +888,14 @@
default:
assert(0);
}
- r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
- RADEON_PRIO_QUERY);
+ radeon_add_to_buffer_list(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+ RADEON_PRIO_QUERY);
if (fence_va)
- r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1,
- query->buffer.buf, fence_va, 0, 0x80000000);
+ si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DATA_SEL_VALUE_32BIT,
+ query->buffer.buf, fence_va, 0x80000000,
+ query->b.type);
}
static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
@@ -882,48 +922,85 @@
ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
r600_update_occlusion_query_state(ctx, query->b.type, -1);
- r600_update_prims_generated_query_state(ctx, query->b.type, -1);
+ si_update_prims_generated_query_state(ctx, query->b.type, -1);
+}
+
+static void emit_set_predicate(struct r600_common_context *ctx,
+ struct r600_resource *buf, uint64_t va,
+ uint32_t op)
+{
+ struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+ if (ctx->chip_class >= GFX9) {
+ radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, op | ((va >> 32) & 0xFF));
+ }
+ radeon_add_to_buffer_list(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
+ RADEON_PRIO_QUERY);
}
static void r600_emit_query_predication(struct r600_common_context *ctx,
struct r600_atom *atom)
{
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
struct r600_query_buffer *qbuf;
uint32_t op;
- bool flag_wait;
+ bool flag_wait, invert;
if (!query)
return;
+ invert = ctx->render_cond_invert;
flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
- switch (query->b.type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- op = PRED_OP(PREDICATION_OP_ZPASS);
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
- break;
- default:
- assert(0);
- return;
+ if (query->workaround_buf) {
+ op = PRED_OP(PREDICATION_OP_BOOL64);
+ } else {
+ switch (query->b.type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ op = PRED_OP(PREDICATION_OP_ZPASS);
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
+ invert = !invert;
+ break;
+ default:
+ assert(0);
+ return;
+ }
}
/* if true then invert, see GL_ARB_conditional_render_inverted */
- if (ctx->render_cond_invert)
- op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visable/overflow */
+ if (invert)
+ op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */
else
- op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
+ op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */
+
+ /* Use the value written by compute shader as a workaround. Note that
+ * the wait flag does not apply in this predication mode.
+ *
+ * The shader outputs the result value to L2. Workarounds only affect VI
+ * and later, where the CP reads data from L2, so we don't need an
+ * additional flush.
+ */
+ if (query->workaround_buf) {
+ uint64_t va = query->workaround_buf->gpu_address + query->workaround_offset;
+ emit_set_predicate(ctx, query->workaround_buf, va, op);
+ return;
+ }
op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
-
+
/* emit predicate packets for all data blocks */
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
unsigned results_base = 0;
@@ -932,22 +1009,19 @@
while (results_base < qbuf->results_end) {
uint64_t va = va_base + results_base;
- if (ctx->chip_class >= GFX9) {
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
+ emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op);
+
+ /* set CONTINUE bit for all packets except the first */
+ op |= PREDICATION_CONTINUE;
+ }
} else {
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
- radeon_emit(cs, va);
- radeon_emit(cs, op | ((va >> 32) & 0xFF));
+ emit_set_predicate(ctx, qbuf->buf, va, op);
+ op |= PREDICATION_CONTINUE;
}
- r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, RADEON_USAGE_READ,
- RADEON_PRIO_QUERY);
- results_base += query->result_size;
- /* set CONTINUE bit for all packets except the first */
- op |= PREDICATION_CONTINUE;
+ results_base += query->result_size;
}
}
}
@@ -982,8 +1056,8 @@
return rquery->ops->begin(rctx, rquery);
}
-void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
- struct r600_query_hw *query)
+void si_query_hw_reset_buffers(struct r600_common_context *rctx,
+ struct r600_query_hw *query)
{
struct r600_query_buffer *prev = query->buffer.previous;
@@ -999,7 +1073,7 @@
query->buffer.previous = NULL;
/* Obtain a new buffer if the current one can't be mapped without a stall. */
- if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
+ if (si_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
r600_resource_reference(&query->buffer.buf, NULL);
query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
@@ -1009,8 +1083,8 @@
}
}
-bool r600_query_hw_begin(struct r600_common_context *rctx,
- struct r600_query *rquery)
+bool si_query_hw_begin(struct r600_common_context *rctx,
+ struct r600_query *rquery)
{
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@@ -1020,7 +1094,9 @@
}
if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
- r600_query_hw_reset_buffers(rctx, query);
+ si_query_hw_reset_buffers(rctx, query);
+
+ r600_resource_reference(&query->workaround_buf, NULL);
r600_query_hw_emit_start(rctx, query);
if (!query->buffer.buf)
@@ -1038,13 +1114,13 @@
return rquery->ops->end(rctx, rquery);
}
-bool r600_query_hw_end(struct r600_common_context *rctx,
- struct r600_query *rquery)
+bool si_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
{
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
if (query->flags & R600_QUERY_HW_FLAG_NO_START)
- r600_query_hw_reset_buffers(rctx, query);
+ si_query_hw_reset_buffers(rctx, query);
r600_query_hw_emit_stop(rctx, query);
@@ -1069,6 +1145,7 @@
switch (rquery->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
params->start_offset = 0;
params->end_offset = 8;
params->fence_offset = max_rbs * 16;
@@ -1100,6 +1177,19 @@
params->end_offset = 24 - index * 8;
params->fence_offset = params->end_offset + 4;
break;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ params->pair_count = R600_MAX_STREAMS;
+ params->pair_stride = 32;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ params->start_offset = 0;
+ params->end_offset = 16;
+
+ /* We can re-use the high dword of the last 64-bit value as a
+ * fence: it is initialized as 0, and the high bit is set by
+ * the write of the streamout stats event.
+ */
+ params->fence_offset = rquery->result_size - 4;
+ break;
case PIPE_QUERY_PIPELINE_STATISTICS:
{
/* Offsets apply to EG+ */
@@ -1148,7 +1238,8 @@
}
break;
}
- case PIPE_QUERY_OCCLUSION_PREDICATE: {
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
for (unsigned i = 0; i < max_rbs; ++i) {
unsigned results_base = i * 16;
result->b = result->b ||
@@ -1186,48 +1277,37 @@
r600_query_read_result(buffer, 2, 6, true) !=
r600_query_read_result(buffer, 0, 4, true);
break;
- case PIPE_QUERY_PIPELINE_STATISTICS:
- if (rscreen->chip_class >= EVERGREEN) {
- result->pipeline_statistics.ps_invocations +=
- r600_query_read_result(buffer, 0, 22, false);
- result->pipeline_statistics.c_primitives +=
- r600_query_read_result(buffer, 2, 24, false);
- result->pipeline_statistics.c_invocations +=
- r600_query_read_result(buffer, 4, 26, false);
- result->pipeline_statistics.vs_invocations +=
- r600_query_read_result(buffer, 6, 28, false);
- result->pipeline_statistics.gs_invocations +=
- r600_query_read_result(buffer, 8, 30, false);
- result->pipeline_statistics.gs_primitives +=
- r600_query_read_result(buffer, 10, 32, false);
- result->pipeline_statistics.ia_primitives +=
- r600_query_read_result(buffer, 12, 34, false);
- result->pipeline_statistics.ia_vertices +=
- r600_query_read_result(buffer, 14, 36, false);
- result->pipeline_statistics.hs_invocations +=
- r600_query_read_result(buffer, 16, 38, false);
- result->pipeline_statistics.ds_invocations +=
- r600_query_read_result(buffer, 18, 40, false);
- result->pipeline_statistics.cs_invocations +=
- r600_query_read_result(buffer, 20, 42, false);
- } else {
- result->pipeline_statistics.ps_invocations +=
- r600_query_read_result(buffer, 0, 16, false);
- result->pipeline_statistics.c_primitives +=
- r600_query_read_result(buffer, 2, 18, false);
- result->pipeline_statistics.c_invocations +=
- r600_query_read_result(buffer, 4, 20, false);
- result->pipeline_statistics.vs_invocations +=
- r600_query_read_result(buffer, 6, 22, false);
- result->pipeline_statistics.gs_invocations +=
- r600_query_read_result(buffer, 8, 24, false);
- result->pipeline_statistics.gs_primitives +=
- r600_query_read_result(buffer, 10, 26, false);
- result->pipeline_statistics.ia_primitives +=
- r600_query_read_result(buffer, 12, 28, false);
- result->pipeline_statistics.ia_vertices +=
- r600_query_read_result(buffer, 14, 30, false);
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) {
+ result->b = result->b ||
+ r600_query_read_result(buffer, 2, 6, true) !=
+ r600_query_read_result(buffer, 0, 4, true);
+ buffer = (char *)buffer + 32;
}
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ result->pipeline_statistics.ps_invocations +=
+ r600_query_read_result(buffer, 0, 22, false);
+ result->pipeline_statistics.c_primitives +=
+ r600_query_read_result(buffer, 2, 24, false);
+ result->pipeline_statistics.c_invocations +=
+ r600_query_read_result(buffer, 4, 26, false);
+ result->pipeline_statistics.vs_invocations +=
+ r600_query_read_result(buffer, 6, 28, false);
+ result->pipeline_statistics.gs_invocations +=
+ r600_query_read_result(buffer, 8, 30, false);
+ result->pipeline_statistics.gs_primitives +=
+ r600_query_read_result(buffer, 10, 32, false);
+ result->pipeline_statistics.ia_primitives +=
+ r600_query_read_result(buffer, 12, 34, false);
+ result->pipeline_statistics.ia_vertices +=
+ r600_query_read_result(buffer, 14, 36, false);
+ result->pipeline_statistics.hs_invocations +=
+ r600_query_read_result(buffer, 16, 38, false);
+ result->pipeline_statistics.ds_invocations +=
+ r600_query_read_result(buffer, 18, 40, false);
+ result->pipeline_statistics.cs_invocations +=
+ r600_query_read_result(buffer, 20, 42, false);
#if 0 /* for testing */
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
"DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
@@ -1281,9 +1361,9 @@
util_query_clear_result(result, query->b.type);
}
-bool r600_query_hw_get_result(struct r600_common_context *rctx,
- struct r600_query *rquery,
- bool wait, union pipe_query_result *result)
+bool si_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait, union pipe_query_result *result)
{
struct r600_common_screen *rscreen = rctx->screen;
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@@ -1300,7 +1380,7 @@
if (rquery->b.flushed)
map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
else
- map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
+ map = si_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
if (!map)
return false;
@@ -1343,6 +1423,7 @@
* 32: apply timestamp conversion
* 64: store full 64 bits result
* 128: store signed 32 bits result
+ * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs
* 1.x = fence_offset
* 1.y = pair_stride
* 1.z = pair_count
@@ -1367,18 +1448,18 @@
"DCL BUFFER[0]\n"
"DCL BUFFER[1]\n"
"DCL BUFFER[2]\n"
- "DCL CONST[0..1]\n"
+ "DCL CONST[0][0..1]\n"
"DCL TEMP[0..5]\n"
"IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
"IMM[1] UINT32 {1, 2, 4, 8}\n"
"IMM[2] UINT32 {16, 32, 64, 128}\n"
"IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
- "IMM[4] UINT32 {0, 0, 0, 0}\n"
+ "IMM[4] UINT32 {256, 0, 0, 0}\n"
- "AND TEMP[5], CONST[0].wwww, IMM[2].xxxx\n"
+ "AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n"
"UIF TEMP[5]\n"
/* Check result availability. */
- "LOAD TEMP[1].x, BUFFER[0], CONST[1].xxxx\n"
+ "LOAD TEMP[1].x, BUFFER[0], CONST[0][1].xxxx\n"
"ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
"MOV TEMP[1], TEMP[0].zzzz\n"
"NOT TEMP[0].z, TEMP[0].zzzz\n"
@@ -1390,7 +1471,7 @@
"ELSE\n"
/* Load previously accumulated result if requested. */
"MOV TEMP[0], IMM[0].xxxx\n"
- "AND TEMP[4], CONST[0].wwww, IMM[1].xxxx\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n"
"UIF TEMP[4]\n"
"LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n"
"ENDIF\n"
@@ -1403,13 +1484,13 @@
"ENDIF\n"
/* Break if result_index >= result_count. */
- "USGE TEMP[5], TEMP[1].xxxx, CONST[0].zzzz\n"
+ "USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n"
"UIF TEMP[5]\n"
"BRK\n"
"ENDIF\n"
/* Load fence and check result availability */
- "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0].yyyy, CONST[1].xxxx\n"
+ "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n"
"LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
"ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
"NOT TEMP[0].z, TEMP[0].zzzz\n"
@@ -1420,19 +1501,33 @@
"MOV TEMP[1].y, IMM[0].xxxx\n"
"BGNLOOP\n"
/* Load start and end. */
- "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0].yyyy\n"
- "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[1].yyyy, TEMP[5].xxxx\n"
+ "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n"
+ "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n"
"LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
- "UADD TEMP[5].x, TEMP[5].xxxx, CONST[0].xxxx\n"
- "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].xxxx\n"
+ "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n"
+ "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
+
+ "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n"
- "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
- "U64ADD TEMP[0].xy, TEMP[0], TEMP[3]\n"
+ "AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n"
+ "UIF TEMP[5].zzzz\n"
+ /* Load second start/end half-pair and
+ * take the difference
+ */
+ "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n"
+ "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
+ "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
+
+ "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
+ "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n"
+ "ENDIF\n"
+
+ "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n"
/* Increment pair index */
"UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n"
- "USGE TEMP[5], TEMP[1].yyyy, CONST[1].zzzz\n"
+ "USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n"
"UIF TEMP[5]\n"
"BRK\n"
"ENDIF\n"
@@ -1443,19 +1538,19 @@
"ENDLOOP\n"
"ENDIF\n"
- "AND TEMP[4], CONST[0].wwww, IMM[1].yyyy\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n"
"UIF TEMP[4]\n"
/* Store accumulated data for chaining. */
"STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n"
"ELSE\n"
- "AND TEMP[4], CONST[0].wwww, IMM[1].zzzz\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n"
"UIF TEMP[4]\n"
/* Store result availability. */
"NOT TEMP[0].z, TEMP[0]\n"
"AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
"STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n"
- "AND TEMP[4], CONST[0].wwww, IMM[2].zzzz\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
"UIF TEMP[4]\n"
"STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n"
"ENDIF\n"
@@ -1464,21 +1559,21 @@
"NOT TEMP[4], TEMP[0].zzzz\n"
"UIF TEMP[4]\n"
/* Apply timestamp conversion */
- "AND TEMP[4], CONST[0].wwww, IMM[2].yyyy\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n"
"UIF TEMP[4]\n"
"U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n"
"U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n"
"ENDIF\n"
/* Convert to boolean */
- "AND TEMP[4], CONST[0].wwww, IMM[1].wwww\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n"
"UIF TEMP[4]\n"
"U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
"AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
"MOV TEMP[0].y, IMM[0].xxxx\n"
"ENDIF\n"
- "AND TEMP[4], CONST[0].wwww, IMM[2].zzzz\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
"UIF TEMP[4]\n"
"STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n"
"ELSE\n"
@@ -1487,7 +1582,7 @@
"MOV TEMP[0].x, IMM[0].wwww\n"
"ENDIF\n"
- "AND TEMP[4], CONST[0].wwww, IMM[2].wwww\n"
+ "AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n"
"UIF TEMP[4]\n"
"UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
"ENDIF\n"
@@ -1606,8 +1701,11 @@
if (index < 0)
consts.config |= 4;
if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
- query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE)
+ query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
consts.config |= 8;
+ else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+ query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+ consts.config |= 8 | 256;
else if (query->b.type == PIPE_QUERY_TIMESTAMP ||
query->b.type == PIPE_QUERY_TIME_ELAPSED)
consts.config |= 32;
@@ -1669,7 +1767,7 @@
va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
va += params.fence_offset;
- r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
+ si_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
}
rctx->b.launch_grid(&rctx->b, &grid);
@@ -1687,24 +1785,60 @@
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query_hw *rquery = (struct r600_query_hw *)query;
- struct r600_query_buffer *qbuf;
struct r600_atom *atom = &rctx->render_cond_atom;
+ if (query) {
+ bool needs_workaround = false;
+
+ /* There was a firmware regression in VI which causes successive
+ * SET_PREDICATION packets to give the wrong answer for
+ * non-inverted stream overflow predication.
+ */
+ if (((rctx->chip_class == VI && rctx->screen->info.pfp_fw_feature < 49) ||
+ (rctx->chip_class == GFX9 && rctx->screen->info.pfp_fw_feature < 38)) &&
+ !condition &&
+ (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
+ (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
+ (rquery->buffer.previous ||
+ rquery->buffer.results_end > rquery->result_size)))) {
+ needs_workaround = true;
+ }
+
+ if (needs_workaround && !rquery->workaround_buf) {
+ bool old_force_off = rctx->render_cond_force_off;
+ rctx->render_cond_force_off = true;
+
+ u_suballocator_alloc(
+ rctx->allocator_zeroed_memory, 8, 8,
+ &rquery->workaround_offset,
+ (struct pipe_resource **)&rquery->workaround_buf);
+
+ /* Reset to NULL to avoid a redundant SET_PREDICATION
+ * from launching the compute grid.
+ */
+ rctx->render_cond = NULL;
+
+ ctx->get_query_result_resource(
+ ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
+ &rquery->workaround_buf->b.b, rquery->workaround_offset);
+
+ /* Settings this in the render cond atom is too late,
+ * so set it here. */
+ rctx->flags |= rctx->screen->barrier_flags.L2_to_cp |
+ R600_CONTEXT_FLUSH_FOR_RENDER_COND;
+
+ rctx->render_cond_force_off = old_force_off;
+ }
+ }
+
rctx->render_cond = query;
rctx->render_cond_invert = condition;
rctx->render_cond_mode = mode;
- /* Compute the size of SET_PREDICATION packets. */
- atom->num_dw = 0;
- if (query) {
- for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
- atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
- }
-
rctx->set_atom_dirty(rctx, atom, query != NULL);
}
-void r600_suspend_queries(struct r600_common_context *ctx)
+void si_suspend_queries(struct r600_common_context *ctx)
{
struct r600_query_hw *query;
@@ -1731,15 +1865,13 @@
*/
num_dw += query->num_cs_dw_end;
}
- /* primitives generated query */
- num_dw += ctx->streamout.enable_atom.num_dw;
/* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
num_dw += 13;
return num_dw;
}
-void r600_resume_queries(struct r600_common_context *ctx)
+void si_resume_queries(struct r600_common_context *ctx)
{
struct r600_query_hw *query;
unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
@@ -1754,84 +1886,6 @@
}
}
-/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
-void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
-{
- struct r600_common_context *ctx =
- (struct r600_common_context*)rscreen->aux_context;
- struct radeon_winsys_cs *cs = ctx->gfx.cs;
- struct r600_resource *buffer;
- uint32_t *results;
- unsigned i, mask = 0;
- unsigned max_rbs = ctx->screen->info.num_render_backends;
-
- assert(rscreen->chip_class <= CAYMAN);
-
- /* if backend_map query is supported by the kernel */
- if (rscreen->info.r600_gb_backend_map_valid) {
- unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
- unsigned backend_map = rscreen->info.r600_gb_backend_map;
- unsigned item_width, item_mask;
-
- if (ctx->chip_class >= EVERGREEN) {
- item_width = 4;
- item_mask = 0x7;
- } else {
- item_width = 2;
- item_mask = 0x3;
- }
-
- while (num_tile_pipes--) {
- i = backend_map & item_mask;
- mask |= (1<>= item_width;
- }
- if (mask != 0) {
- rscreen->info.enabled_rb_mask = mask;
- return;
- }
- }
-
- /* otherwise backup path for older kernels */
-
- /* create buffer for event data */
- buffer = (struct r600_resource*)
- pipe_buffer_create(ctx->b.screen, 0,
- PIPE_USAGE_STAGING, max_rbs * 16);
- if (!buffer)
- return;
-
- /* initialize buffer with zeroes */
- results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
- if (results) {
- memset(results, 0, max_rbs * 4 * 4);
-
- /* emit EVENT_WRITE for ZPASS_DONE */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
- radeon_emit(cs, buffer->gpu_address);
- radeon_emit(cs, buffer->gpu_address >> 32);
-
- r600_emit_reloc(ctx, &ctx->gfx, buffer,
- RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
-
- /* analyze results */
- results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
- if (results) {
- for(i = 0; i < max_rbs; i++) {
- /* at least highest bit will be set if backend is used */
- if (results[i*4 + 1])
- mask |= (1<info.enabled_rb_mask = mask;
-}
-
#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
{ \
.name = name_, \
@@ -1852,6 +1906,8 @@
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
+ X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE),
+ X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE),
X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE),
X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
@@ -1880,6 +1936,7 @@
X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE),
X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE),
X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE),
+ X("GFX-IB-size", GFX_IB_SIZE, UINT64, AVERAGE),
X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
@@ -1924,9 +1981,8 @@
X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE),
X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE),
X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE),
- X("GPU-dma-busy", GPU_DMA_BUSY, UINT64, AVERAGE),
+ X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE),
X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE),
- X("GPU-ce-busy", GPU_CE_BUSY, UINT64, AVERAGE),
};
#undef X
@@ -1956,13 +2012,13 @@
if (!info) {
unsigned num_perfcounters =
- r600_get_perfcounter_info(rscreen, 0, NULL);
+ si_get_perfcounter_info(rscreen, 0, NULL);
return num_queries + num_perfcounters;
}
if (index >= num_queries)
- return r600_get_perfcounter_info(rscreen, index - num_queries, info);
+ return si_get_perfcounter_info(rscreen, index - num_queries, info);
*info = r600_driver_query_list[index];
@@ -2009,7 +2065,7 @@
return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
if (index < num_pc_groups)
- return r600_get_perfcounter_group_info(rscreen, index, info);
+ return si_get_perfcounter_group_info(rscreen, index, info);
index -= num_pc_groups;
if (index >= R600_NUM_SW_QUERY_GROUPS)
@@ -2021,10 +2077,10 @@
return 1;
}
-void r600_query_init(struct r600_common_context *rctx)
+void si_init_query_functions(struct r600_common_context *rctx)
{
rctx->b.create_query = r600_create_query;
- rctx->b.create_batch_query = r600_create_batch_query;
+ rctx->b.create_batch_query = si_create_batch_query;
rctx->b.destroy_query = r600_destroy_query;
rctx->b.begin_query = r600_begin_query;
rctx->b.end_query = r600_end_query;
@@ -2038,7 +2094,7 @@
LIST_INITHEAD(&rctx->active_queries);
}
-void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
+void si_init_screen_query_functions(struct r600_common_screen *rscreen)
{
rscreen->b.get_driver_query_info = r600_get_driver_query_info;
rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_query.h mesa-17.3.3/src/gallium/drivers/radeon/r600_query.h
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_query.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_query.h 2018-01-18 21:30:28.000000000 +0000
@@ -42,6 +42,8 @@
enum {
R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
+ R600_QUERY_DECOMPRESS_CALLS,
+ R600_QUERY_MRT_DRAW_CALLS,
R600_QUERY_PRIM_RESTART_CALLS,
R600_QUERY_SPILL_DRAW_CALLS,
R600_QUERY_COMPUTE_CALLS,
@@ -70,6 +72,7 @@
R600_QUERY_NUM_GFX_IBS,
R600_QUERY_NUM_SDMA_IBS,
R600_QUERY_GFX_BO_LIST_SIZE,
+ R600_QUERY_GFX_IB_SIZE,
R600_QUERY_NUM_BYTES_MOVED,
R600_QUERY_NUM_EVICTIONS,
R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
@@ -98,9 +101,8 @@
R600_QUERY_GPU_MEQ_BUSY,
R600_QUERY_GPU_ME_BUSY,
R600_QUERY_GPU_SURF_SYNC_BUSY,
- R600_QUERY_GPU_DMA_BUSY,
+ R600_QUERY_GPU_CP_DMA_BUSY,
R600_QUERY_GPU_SCRATCH_RAM_BUSY,
- R600_QUERY_GPU_CE_BUSY,
R600_QUERY_NUM_COMPILATIONS,
R600_QUERY_NUM_SHADERS_CREATED,
R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO,
@@ -193,20 +195,24 @@
struct list_head list;
/* For transform feedback: which stream the query is for */
unsigned stream;
+
+ /* Workaround via compute shader */
+ struct r600_resource *workaround_buf;
+ unsigned workaround_offset;
};
-bool r600_query_hw_init(struct r600_common_screen *rscreen,
- struct r600_query_hw *query);
-void r600_query_hw_destroy(struct r600_common_screen *rscreen,
- struct r600_query *rquery);
-bool r600_query_hw_begin(struct r600_common_context *rctx,
+bool si_query_hw_init(struct r600_common_screen *rscreen,
+ struct r600_query_hw *query);
+void si_query_hw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery);
-bool r600_query_hw_end(struct r600_common_context *rctx,
+bool si_query_hw_begin(struct r600_common_context *rctx,
struct r600_query *rquery);
-bool r600_query_hw_get_result(struct r600_common_context *rctx,
- struct r600_query *rquery,
- bool wait,
- union pipe_query_result *result);
+bool si_query_hw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery);
+bool si_query_hw_get_result(struct r600_common_context *rctx,
+ struct r600_query *rquery,
+ bool wait,
+ union pipe_query_result *result);
/* Performance counters */
enum {
@@ -292,26 +298,26 @@
bool separate_instance;
};
-struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
- unsigned num_queries,
- unsigned *query_types);
-
-int r600_get_perfcounter_info(struct r600_common_screen *,
- unsigned index,
- struct pipe_driver_query_info *info);
-int r600_get_perfcounter_group_info(struct r600_common_screen *,
- unsigned index,
- struct pipe_driver_query_group_info *info);
-
-bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
-void r600_perfcounters_add_block(struct r600_common_screen *,
- struct r600_perfcounters *,
- const char *name, unsigned flags,
- unsigned counters, unsigned selectors,
- unsigned instances, void *data);
-void r600_perfcounters_do_destroy(struct r600_perfcounters *);
-void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
- struct r600_query_hw *query);
+struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
+ unsigned num_queries,
+ unsigned *query_types);
+
+int si_get_perfcounter_info(struct r600_common_screen *,
+ unsigned index,
+ struct pipe_driver_query_info *info);
+int si_get_perfcounter_group_info(struct r600_common_screen *,
+ unsigned index,
+ struct pipe_driver_query_group_info *info);
+
+bool si_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
+void si_perfcounters_add_block(struct r600_common_screen *,
+ struct r600_perfcounters *,
+ const char *name, unsigned flags,
+ unsigned counters, unsigned selectors,
+ unsigned instances, void *data);
+void si_perfcounters_do_destroy(struct r600_perfcounters *);
+void si_query_hw_reset_buffers(struct r600_common_context *rctx,
+ struct r600_query_hw *query);
struct r600_qbo_state {
void *saved_compute;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_streamout.c mesa-17.3.3/src/gallium/drivers/radeon/r600_streamout.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_streamout.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_streamout.c 1970-01-01 00:00:00.000000000 +0000
@@ -1,381 +0,0 @@
-/*
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors: Marek Olšák
- *
- */
-
-#include "r600_pipe_common.h"
-#include "r600_cs.h"
-
-#include "util/u_memory.h"
-
-static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable);
-
-static struct pipe_stream_output_target *
-r600_create_so_target(struct pipe_context *ctx,
- struct pipe_resource *buffer,
- unsigned buffer_offset,
- unsigned buffer_size)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- struct r600_so_target *t;
- struct r600_resource *rbuffer = (struct r600_resource*)buffer;
-
- t = CALLOC_STRUCT(r600_so_target);
- if (!t) {
- return NULL;
- }
-
- u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
- &t->buf_filled_size_offset,
- (struct pipe_resource**)&t->buf_filled_size);
- if (!t->buf_filled_size) {
- FREE(t);
- return NULL;
- }
-
- t->b.reference.count = 1;
- t->b.context = ctx;
- pipe_resource_reference(&t->b.buffer, buffer);
- t->b.buffer_offset = buffer_offset;
- t->b.buffer_size = buffer_size;
-
- util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
- buffer_offset + buffer_size);
- return &t->b;
-}
-
-static void r600_so_target_destroy(struct pipe_context *ctx,
- struct pipe_stream_output_target *target)
-{
- struct r600_so_target *t = (struct r600_so_target*)target;
- pipe_resource_reference(&t->b.buffer, NULL);
- r600_resource_reference(&t->buf_filled_size, NULL);
- FREE(t);
-}
-
-void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
-{
- struct r600_atom *begin = &rctx->streamout.begin_atom;
- unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
- unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask &
- rctx->streamout.append_bitmask);
-
- if (!num_bufs)
- return;
-
- rctx->streamout.num_dw_for_end =
- 12 + /* flush_vgt_streamout */
- num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
-
- begin->num_dw = 12; /* flush_vgt_streamout */
-
- if (rctx->chip_class >= SI) {
- begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
- } else {
- begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
-
- if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
- begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
- }
-
- begin->num_dw +=
- num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */
- (num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
- (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
-
- rctx->set_atom_dirty(rctx, begin, true);
-
- r600_set_streamout_enable(rctx, true);
-}
-
-void r600_set_streamout_targets(struct pipe_context *ctx,
- unsigned num_targets,
- struct pipe_stream_output_target **targets,
- const unsigned *offsets)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- unsigned i;
- unsigned enabled_mask = 0, append_bitmask = 0;
-
- /* Stop streamout. */
- if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
- r600_emit_streamout_end(rctx);
- }
-
- /* Set the new targets. */
- for (i = 0; i < num_targets; i++) {
- pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
- if (!targets[i])
- continue;
-
- r600_context_add_resource_size(ctx, targets[i]->buffer);
- enabled_mask |= 1 << i;
- if (offsets[i] == ((unsigned)-1))
- append_bitmask |= 1 << i;
- }
- for (; i < rctx->streamout.num_targets; i++) {
- pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
- }
-
- rctx->streamout.enabled_mask = enabled_mask;
-
- rctx->streamout.num_targets = num_targets;
- rctx->streamout.append_bitmask = append_bitmask;
-
- if (num_targets) {
- r600_streamout_buffers_dirty(rctx);
- } else {
- rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
- r600_set_streamout_enable(rctx, false);
- }
-}
-
-static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
-{
- struct radeon_winsys_cs *cs = rctx->gfx.cs;
- unsigned reg_strmout_cntl;
-
- /* The register is at different places on different ASICs. */
- if (rctx->chip_class >= CIK) {
- reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
- } else if (rctx->chip_class >= EVERGREEN) {
- reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
- } else {
- reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL;
- }
-
- if (rctx->chip_class >= CIK) {
- radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
- } else {
- radeon_set_config_reg(cs, reg_strmout_cntl, 0);
- }
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
-
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
- radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
- radeon_emit(cs, 0);
- radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* reference value */
- radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* mask */
- radeon_emit(cs, 4); /* poll interval */
-}
-
-static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = rctx->gfx.cs;
- struct r600_so_target **t = rctx->streamout.targets;
- uint16_t *stride_in_dw = rctx->streamout.stride_in_dw;
- unsigned i, update_flags = 0;
-
- r600_flush_vgt_streamout(rctx);
-
- for (i = 0; i < rctx->streamout.num_targets; i++) {
- if (!t[i])
- continue;
-
- t[i]->stride_in_dw = stride_in_dw[i];
-
- if (rctx->chip_class >= SI) {
- /* SI binds streamout buffers as shader resources.
- * VGT only counts primitives and tells the shader
- * through SGPRs what to do. */
- radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
- radeon_emit(cs, (t[i]->b.buffer_offset +
- t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
- radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
- } else {
- uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
-
- update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
-
- radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
- radeon_emit(cs, (t[i]->b.buffer_offset +
- t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
- radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
- radeon_emit(cs, va >> 8); /* BUFFER_BASE */
-
- r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
-
- /* R7xx requires this packet after updating BUFFER_BASE.
- * Without this, R7xx locks up. */
- if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
- radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
- radeon_emit(cs, i);
- radeon_emit(cs, va >> 8);
-
- r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
- }
- }
-
- if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
- uint64_t va = t[i]->buf_filled_size->gpu_address +
- t[i]->buf_filled_size_offset;
-
- /* Append. */
- radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
- radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, va); /* src address lo */
- radeon_emit(cs, va >> 32); /* src address hi */
-
- r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
- RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
- } else {
- /* Start from the beginning. */
- radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
- radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
- radeon_emit(cs, 0); /* unused */
- }
- }
-
- if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) {
- radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
- radeon_emit(cs, update_flags);
- }
- rctx->streamout.begin_emitted = true;
-}
-
-void r600_emit_streamout_end(struct r600_common_context *rctx)
-{
- struct radeon_winsys_cs *cs = rctx->gfx.cs;
- struct r600_so_target **t = rctx->streamout.targets;
- unsigned i;
- uint64_t va;
-
- r600_flush_vgt_streamout(rctx);
-
- for (i = 0; i < rctx->streamout.num_targets; i++) {
- if (!t[i])
- continue;
-
- va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
- radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
- radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
- STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
- radeon_emit(cs, va); /* dst address lo */
- radeon_emit(cs, va >> 32); /* dst address hi */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
-
- r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
- RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
-
- /* Zero the buffer size. The counters (primitives generated,
- * primitives emitted) may be enabled even if there is not
- * buffer bound. This ensures that the primitives-emitted query
- * won't increment. */
- radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
-
- t[i]->buf_filled_size_valid = true;
- }
-
- rctx->streamout.begin_emitted = false;
- rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
-}
-
-/* STREAMOUT CONFIG DERIVED STATE
- *
- * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
- * The buffer mask is an independent state, so no writes occur if there
- * are no buffers bound.
- */
-
-static void r600_emit_streamout_enable(struct r600_common_context *rctx,
- struct r600_atom *atom)
-{
- unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN;
- unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
- unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN;
- unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask &
- rctx->streamout.enabled_stream_buffers_mask;
-
- if (rctx->chip_class >= EVERGREEN) {
- strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG;
-
- strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG;
- strmout_config_val |=
- S_028B94_RAST_STREAM(0) |
- S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
- S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
- S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
- }
- radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
- radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val);
-}
-
-static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
-{
- bool old_strmout_en = r600_get_strmout_en(rctx);
- unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask;
-
- rctx->streamout.streamout_enabled = enable;
-
- rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask |
- (rctx->streamout.enabled_mask << 4) |
- (rctx->streamout.enabled_mask << 8) |
- (rctx->streamout.enabled_mask << 12);
-
- if ((old_strmout_en != r600_get_strmout_en(rctx)) ||
- (old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) {
- rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
- }
-}
-
-void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
- unsigned type, int diff)
-{
- if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
- bool old_strmout_en = r600_get_strmout_en(rctx);
-
- rctx->streamout.num_prims_gen_queries += diff;
- assert(rctx->streamout.num_prims_gen_queries >= 0);
-
- rctx->streamout.prims_gen_query_enabled =
- rctx->streamout.num_prims_gen_queries != 0;
-
- if (old_strmout_en != r600_get_strmout_en(rctx)) {
- rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
- }
- }
-}
-
-void r600_streamout_init(struct r600_common_context *rctx)
-{
- rctx->b.create_stream_output_target = r600_create_so_target;
- rctx->b.stream_output_target_destroy = r600_so_target_destroy;
- rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
- rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
- rctx->streamout.enable_atom.num_dw = 6;
-}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_test_dma.c mesa-17.3.3/src/gallium/drivers/radeon/r600_test_dma.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_test_dma.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_test_dma.c 2018-01-18 21:30:28.000000000 +0000
@@ -171,7 +171,7 @@
}
}
-void r600_test_dma(struct r600_common_screen *rscreen)
+void si_test_dma(struct r600_common_screen *rscreen)
{
struct pipe_screen *screen = &rscreen->b;
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_texture.c mesa-17.3.3/src/gallium/drivers/radeon/r600_texture.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_texture.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_texture.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,12 +28,15 @@
#include "r600_cs.h"
#include "r600_query.h"
#include "util/u_format.h"
+#include "util/u_log.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "util/u_surface.h"
#include "os/os_time.h"
#include
#include
+#include "state_tracker/drm_driver.h"
+#include "amd/common/sid.h"
static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex);
@@ -42,13 +45,13 @@
const struct pipe_resource *templ);
-bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
- struct r600_texture *rdst,
- unsigned dst_level, unsigned dstx,
- unsigned dsty, unsigned dstz,
- struct r600_texture *rsrc,
- unsigned src_level,
- const struct pipe_box *src_box)
+bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
+ struct r600_texture *rdst,
+ unsigned dst_level, unsigned dstx,
+ unsigned dsty, unsigned dstz,
+ struct r600_texture *rsrc,
+ unsigned src_level,
+ const struct pipe_box *src_box)
{
if (!rctx->dma.cs)
return false;
@@ -235,7 +238,7 @@
is_depth = util_format_has_depth(desc);
is_stencil = util_format_has_stencil(desc);
- if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
+ if (!is_flushed_depth &&
ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
bpe = 4; /* stencil is allocated separately on evergreen */
} else {
@@ -280,8 +283,10 @@
flags |= RADEON_SURF_SCANOUT;
}
+ if (ptex->bind & PIPE_BIND_SHARED)
+ flags |= RADEON_SURF_SHAREABLE;
if (is_imported)
- flags |= RADEON_SURF_IMPORTED;
+ flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
@@ -340,6 +345,41 @@
}
}
+static void r600_surface_import_metadata(struct r600_common_screen *rscreen,
+ struct radeon_surf *surf,
+ struct radeon_bo_metadata *metadata,
+ enum radeon_surf_mode *array_mode,
+ bool *is_scanout)
+{
+ if (rscreen->chip_class >= GFX9) {
+ if (metadata->u.gfx9.swizzle_mode > 0)
+ *array_mode = RADEON_SURF_MODE_2D;
+ else
+ *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ *is_scanout = metadata->u.gfx9.swizzle_mode == 0 ||
+ metadata->u.gfx9.swizzle_mode % 4 == 2;
+
+ surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode;
+ } else {
+ surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
+ surf->u.legacy.bankw = metadata->u.legacy.bankw;
+ surf->u.legacy.bankh = metadata->u.legacy.bankh;
+ surf->u.legacy.tile_split = metadata->u.legacy.tile_split;
+ surf->u.legacy.mtilea = metadata->u.legacy.mtilea;
+ surf->u.legacy.num_banks = metadata->u.legacy.num_banks;
+
+ if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ *array_mode = RADEON_SURF_MODE_2D;
+ else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED)
+ *array_mode = RADEON_SURF_MODE_1D;
+ else
+ *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+ *is_scanout = metadata->u.legacy.scanout;
+ }
+}
+
static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
struct r600_texture *rtex)
{
@@ -369,10 +409,7 @@
rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
rtex->dirty_level_mask = 0;
- if (rscreen->chip_class >= SI)
- rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
- else
- rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
+ rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
if (rtex->cmask_buffer != &rtex->resource)
r600_resource_reference(&rtex->cmask_buffer, NULL);
@@ -427,8 +464,8 @@
* \param rctx the current context if you have one, or rscreen->aux_context
* if you don't.
*/
-bool r600_texture_disable_dcc(struct r600_common_context *rctx,
- struct r600_texture *rtex)
+bool si_texture_disable_dcc(struct r600_common_context *rctx,
+ struct r600_texture *rtex)
{
struct r600_common_screen *rscreen = rctx->screen;
@@ -460,10 +497,6 @@
templ.bind |= new_bind_flag;
- /* r600g doesn't react to dirty_tex_descriptor_counter */
- if (rctx->chip_class < SI)
- return;
-
if (rtex->resource.b.is_shared)
return;
@@ -554,6 +587,7 @@
struct radeon_bo_metadata metadata;
bool update_metadata = false;
unsigned stride, offset, slice_size;
+ bool flush = false;
ctx = threaded_context_unwrap_sync(ctx);
rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context);
@@ -566,13 +600,18 @@
return false;
/* Move a suballocated texture into a non-suballocated allocation. */
- if (rscreen->ws->buffer_is_suballocated(res->buf)) {
+ if (rscreen->ws->buffer_is_suballocated(res->buf) ||
+ rtex->surface.tile_swizzle ||
+ (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
assert(!res->b.is_shared);
r600_reallocate_texture_inplace(rctx, rtex,
PIPE_BIND_SHARED, false);
- rctx->b.flush(&rctx->b, NULL, 0);
+ flush = true;
assert(res->b.b.bind & PIPE_BIND_SHARED);
assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
+ assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING));
+ assert(rtex->surface.tile_swizzle == 0);
}
/* Since shader image stores don't support DCC on VI,
@@ -580,14 +619,19 @@
* access.
*/
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
- if (r600_texture_disable_dcc(rctx, rtex))
+ if (si_texture_disable_dcc(rctx, rtex)) {
update_metadata = true;
+ /* si_texture_disable_dcc flushes the context */
+ flush = false;
+ }
}
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
(rtex->cmask.size || rtex->dcc_offset)) {
/* Eliminate fast clear (both CMASK and DCC) */
r600_eliminate_fast_color_clear(rctx, rtex);
+ /* eliminate_fast_color_clear flushes the context */
+ flush = false;
/* Disable CMASK if flush_resource isn't going
* to be called.
@@ -618,8 +662,11 @@
slice_size = rtex->surface.u.legacy.level[0].slice_size;
}
} else {
+ /* Buffer exports are for the OpenCL interop. */
/* Move a suballocated buffer into a non-suballocated allocation. */
- if (rscreen->ws->buffer_is_suballocated(res->buf)) {
+ if (rscreen->ws->buffer_is_suballocated(res->buf) ||
+ /* A DMABUF export always fails if the BO is local. */
+ rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) {
assert(!res->b.is_shared);
/* Allocate a new buffer with PIPE_BIND_SHARED. */
@@ -636,8 +683,9 @@
u_box_1d(0, newb->width0, &box);
rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0,
&res->b.b, 0, &box);
+ flush = true;
/* Move the new buffer storage to the old pipe_resource. */
- r600_replace_buffer_storage(&rctx->b, &res->b.b, newb);
+ si_replace_buffer_storage(&rctx->b, &res->b.b, newb);
pipe_resource_reference(&newb, NULL);
assert(res->b.b.bind & PIPE_BIND_SHARED);
@@ -650,6 +698,9 @@
slice_size = 0;
}
+ if (flush)
+ rctx->b.flush(&rctx->b, NULL, 0);
+
if (res->b.is_shared) {
/* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
* doesn't set it.
@@ -686,10 +737,10 @@
static const struct u_resource_vtbl r600_texture_vtbl;
/* The number of samples can be specified independently of the texture. */
-void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- unsigned nr_samples,
- struct r600_fmask_info *out)
+void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex,
+ unsigned nr_samples,
+ struct r600_fmask_info *out)
{
/* FMASK is allocated like an ordinary texture. */
struct pipe_resource templ = rtex->resource.b.b;
@@ -707,17 +758,6 @@
templ.nr_samples = 1;
flags = rtex->surface.flags | RADEON_SURF_FMASK;
- if (rscreen->chip_class <= CAYMAN) {
- /* Use the same parameters and tile mode. */
- fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
- fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
- fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
- fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
-
- if (nr_samples <= 4)
- fmask.u.legacy.bankh = 4;
- }
-
switch (nr_samples) {
case 2:
case 4:
@@ -731,13 +771,6 @@
return;
}
- /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
- * This can be fixed by writing a separate FMASK allocator specifically
- * for R600-R700 asics. */
- if (rscreen->chip_class <= R700) {
- bpe *= 2;
- }
-
if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
RADEON_SURF_MODE_2D, &fmask)) {
R600_ERR("Got error in surface_init while allocating FMASK.\n");
@@ -753,6 +786,7 @@
out->tile_mode_index = fmask.u.legacy.tiling_index[0];
out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
out->bank_height = fmask.u.legacy.bankh;
+ out->tile_swizzle = fmask.tile_swizzle;
out->alignment = MAX2(256, fmask.surf_alignment);
out->size = fmask.surf_size;
}
@@ -760,47 +794,13 @@
static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
- r600_texture_get_fmask_info(rscreen, rtex,
+ si_texture_get_fmask_info(rscreen, rtex,
rtex->resource.b.b.nr_samples, &rtex->fmask);
rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
rtex->size = rtex->fmask.offset + rtex->fmask.size;
}
-void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- struct r600_cmask_info *out)
-{
- unsigned cmask_tile_width = 8;
- unsigned cmask_tile_height = 8;
- unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
- unsigned element_bits = 4;
- unsigned cmask_cache_bits = 1024;
- unsigned num_pipes = rscreen->info.num_tile_pipes;
- unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
-
- unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
- unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
- unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
- unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
- unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
-
- unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width);
- unsigned height = align(rtex->resource.b.b.height0, macro_tile_height);
-
- unsigned base_align = num_pipes * pipe_interleave_bytes;
- unsigned slice_bytes =
- ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
-
- assert(macro_tile_width % 128 == 0);
- assert(macro_tile_height % 128 == 0);
-
- out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
- out->alignment = MAX2(256, base_align);
- out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
- align(slice_bytes, base_align);
-}
-
static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out)
@@ -858,19 +858,12 @@
static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
- if (rscreen->chip_class >= SI) {
- si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
- } else {
- r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
- }
+ si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
rtex->size = rtex->cmask.offset + rtex->cmask.size;
- if (rscreen->chip_class >= SI)
- rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
- else
- rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+ rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
}
static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
@@ -881,14 +874,10 @@
assert(rtex->cmask.size == 0);
- if (rscreen->chip_class >= SI) {
- si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
- } else {
- r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
- }
+ si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
rtex->cmask_buffer = (struct r600_resource *)
- r600_aligned_buffer_create(&rscreen->b,
+ si_aligned_buffer_create(&rscreen->b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
rtex->cmask.size,
@@ -901,10 +890,7 @@
/* update colorbuffer state bits */
rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
- if (rscreen->chip_class >= SI)
- rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
- else
- rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+ rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
@@ -920,16 +906,6 @@
rtex->surface.htile_size = 0;
- if (rscreen->chip_class <= EVERGREEN &&
- rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
- return;
-
- /* HW bug on R6xx. */
- if (rscreen->chip_class == R600 &&
- (rtex->resource.b.b.width0 > 7680 ||
- rtex->resource.b.b.height0 > 7680))
- return;
-
/* HTILE is broken with 1D tiling on old kernels and CIK. */
if (rscreen->chip_class >= CIK &&
rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
@@ -1000,13 +976,13 @@
rtex->size = rtex->htile_offset + rtex->surface.htile_size;
}
-void r600_print_texture_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex, FILE *f)
+void si_print_texture_info(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex, struct u_log_context *log)
{
int i;
/* Common parameters. */
- fprintf(f, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
+ u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
"blk_h=%u, array_size=%u, last_level=%u, "
"bpe=%u, nsamples=%u, flags=0x%x, %s\n",
rtex->resource.b.b.width0, rtex->resource.b.b.height0,
@@ -1017,7 +993,7 @@
rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
if (rscreen->chip_class >= GFX9) {
- fprintf(f, " Surf: size=%"PRIu64", slice_size=%"PRIu64", "
+ u_log_printf(log, " Surf: size=%"PRIu64", slice_size=%"PRIu64", "
"alignment=%u, swmode=%u, epitch=%u, pitch=%u\n",
rtex->surface.surf_size,
rtex->surface.u.gfx9.surf_slice_size,
@@ -1027,7 +1003,7 @@
rtex->surface.u.gfx9.surf_pitch);
if (rtex->fmask.size) {
- fprintf(f, " FMASK: offset=%"PRIu64", size=%"PRIu64", "
+ u_log_printf(log, " FMASK: offset=%"PRIu64", size=%"PRIu64", "
"alignment=%u, swmode=%u, epitch=%u\n",
rtex->fmask.offset,
rtex->surface.u.gfx9.fmask_size,
@@ -1037,7 +1013,7 @@
}
if (rtex->cmask.size) {
- fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", "
+ u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", "
"alignment=%u, rb_aligned=%u, pipe_aligned=%u\n",
rtex->cmask.offset,
rtex->surface.u.gfx9.cmask_size,
@@ -1047,7 +1023,7 @@
}
if (rtex->htile_offset) {
- fprintf(f, " HTile: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
+ u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
"rb_aligned=%u, pipe_aligned=%u\n",
rtex->htile_offset,
rtex->surface.htile_size,
@@ -1057,7 +1033,7 @@
}
if (rtex->dcc_offset) {
- fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", "
+ u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", "
"alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
rtex->dcc_offset, rtex->surface.dcc_size,
rtex->surface.dcc_alignment,
@@ -1066,7 +1042,7 @@
}
if (rtex->surface.u.gfx9.stencil_offset) {
- fprintf(f, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n",
+ u_log_printf(log, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n",
rtex->surface.u.gfx9.stencil_offset,
rtex->surface.u.gfx9.stencil.swizzle_mode,
rtex->surface.u.gfx9.stencil.epitch);
@@ -1074,7 +1050,7 @@
return;
}
- fprintf(f, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
+ u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
"bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
@@ -1082,31 +1058,31 @@
(rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
if (rtex->fmask.size)
- fprintf(f, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
+ u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
"bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
if (rtex->cmask.size)
- fprintf(f, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
+ u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
"slice_tile_max=%u\n",
rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
rtex->cmask.slice_tile_max);
if (rtex->htile_offset)
- fprintf(f, " HTile: offset=%"PRIu64", size=%"PRIu64", "
+ u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", "
"alignment=%u, TC_compatible = %u\n",
rtex->htile_offset, rtex->surface.htile_size,
rtex->surface.htile_alignment,
rtex->tc_compatible_htile);
if (rtex->dcc_offset) {
- fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n",
+ u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n",
rtex->dcc_offset, rtex->surface.dcc_size,
rtex->surface.dcc_alignment);
for (i = 0; i <= rtex->resource.b.b.last_level; i++)
- fprintf(f, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
+ u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
"fast_clear_size=%"PRIu64"\n",
i, i < rtex->surface.num_dcc_levels,
rtex->surface.u.legacy.level[i].dcc_offset,
@@ -1114,7 +1090,7 @@
}
for (i = 0; i <= rtex->resource.b.b.last_level; i++)
- fprintf(f, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
+ u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
"npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
"mode=%u, tiling_index = %u\n",
i, rtex->surface.u.legacy.level[i].offset,
@@ -1127,11 +1103,11 @@
rtex->surface.u.legacy.level[i].mode,
rtex->surface.u.legacy.tiling_index[i]);
- if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
- fprintf(f, " StencilLayout: tilesplit=%u\n",
+ if (rtex->surface.has_stencil) {
+ u_log_printf(log, " StencilLayout: tilesplit=%u\n",
rtex->surface.u.legacy.stencil_tile_split);
for (i = 0; i <= rtex->resource.b.b.last_level; i++) {
- fprintf(f, " StencilLevel[%i]: offset=%"PRIu64", "
+ u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", "
"slice_size=%"PRIu64", npix_x=%u, "
"npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
"mode=%u, tiling_index = %u\n",
@@ -1210,28 +1186,19 @@
rtex->ps_draw_ratio = 0;
if (rtex->is_depth) {
- if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
- R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
- rscreen->chip_class >= EVERGREEN) {
- if (rscreen->chip_class >= GFX9) {
- rtex->can_sample_z = true;
- rtex->can_sample_s = true;
- } else {
- rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
- rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
- }
+ if (rscreen->chip_class >= GFX9) {
+ rtex->can_sample_z = true;
+ rtex->can_sample_s = true;
} else {
- if (rtex->resource.b.b.nr_samples <= 1 &&
- (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
- rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT))
- rtex->can_sample_z = true;
+ rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
+ rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
}
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
rtex->db_compatible = true;
- if (!(rscreen->debug_flags & DBG_NO_HYPERZ))
+ if (!(rscreen->debug_flags & DBG(NO_HYPERZ)))
r600_texture_allocate_htile(rscreen, rtex);
}
} else {
@@ -1252,7 +1219,7 @@
* apply_opaque_metadata later.
*/
if (rtex->surface.dcc_size &&
- (buf || !(rscreen->debug_flags & DBG_NO_DCC)) &&
+ (buf || !(rscreen->debug_flags & DBG(NO_DCC))) &&
!(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
/* Reserve space for the DCC buffer. */
rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
@@ -1262,14 +1229,10 @@
/* Now create the backing buffer. */
if (!buf) {
- r600_init_resource_fields(rscreen, resource, rtex->size,
+ si_init_resource_fields(rscreen, resource, rtex->size,
rtex->surface.surf_alignment);
- /* Displayable surfaces are not suballocated. */
- if (resource->b.b.bind & PIPE_BIND_SCANOUT)
- resource->flags |= RADEON_FLAG_NO_SUBALLOC;
-
- if (!r600_alloc_resource(rscreen, resource)) {
+ if (!si_alloc_resource(rscreen, resource)) {
FREE(rtex);
return NULL;
}
@@ -1287,7 +1250,7 @@
if (rtex->cmask.size) {
/* Initialize the cmask to 0xCC (= compressed state). */
- r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
+ si_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
rtex->cmask.offset, rtex->cmask.size,
0xCCCCCCCC);
}
@@ -1297,7 +1260,7 @@
if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
clear_value = 0x0000030F;
- r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
+ si_screen_clear_buffer(rscreen, &rtex->resource.b.b,
rtex->htile_offset,
rtex->surface.htile_size,
clear_value);
@@ -1305,7 +1268,7 @@
/* Initialize DCC only if the texture is not being imported. */
if (!buf && rtex->dcc_offset) {
- r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
+ si_screen_clear_buffer(rscreen, &rtex->resource.b.b,
rtex->dcc_offset,
rtex->surface.dcc_size,
0xFFFFFFFF);
@@ -1315,7 +1278,7 @@
rtex->cmask.base_address_reg =
(rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
- if (rscreen->debug_flags & DBG_VM) {
+ if (rscreen->debug_flags & DBG(VM)) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
rtex->resource.gpu_address,
rtex->resource.gpu_address + rtex->resource.buf->size,
@@ -1323,10 +1286,14 @@
base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
}
- if (rscreen->debug_flags & DBG_TEX) {
+ if (rscreen->debug_flags & DBG(TEX)) {
puts("Texture:");
- r600_print_texture_info(rscreen, rtex, stdout);
+ struct u_log_context log;
+ u_log_context_init(&log);
+ si_print_texture_info(rscreen, rtex, &log);
+ u_log_new_page_print(&log, stdout);
fflush(stdout);
+ u_log_context_destroy(&log);
}
return rtex;
@@ -1357,20 +1324,13 @@
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY))
return RADEON_SURF_MODE_2D;
- /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
- if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
- (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
- (templ->target == PIPE_TEXTURE_2D ||
- templ->target == PIPE_TEXTURE_3D))
- force_tiling = true;
-
/* Handle common candidates for the linear mode.
* Compressed textures and DB surfaces must always be tiled.
*/
if (!force_tiling &&
!is_depth_stencil &&
!util_format_is_compressed(templ->format)) {
- if (rscreen->debug_flags & DBG_NO_TILING)
+ if (rscreen->debug_flags & DBG(NO_TILING))
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
@@ -1379,8 +1339,7 @@
/* Cursors are linear on SI.
* (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
- if (rscreen->chip_class >= SI &&
- (templ->bind & PIPE_BIND_CURSOR))
+ if (templ->bind & PIPE_BIND_CURSOR)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
if (templ->bind & PIPE_BIND_LINEAR)
@@ -1402,15 +1361,15 @@
/* Make small textures 1D tiled. */
if (templ->width0 <= 16 || templ->height0 <= 16 ||
- (rscreen->debug_flags & DBG_NO_2D_TILING))
+ (rscreen->debug_flags & DBG(NO_2D_TILING)))
return RADEON_SURF_MODE_1D;
/* The allocator will switch to 1D if needed. */
return RADEON_SURF_MODE_2D;
}
-struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
+struct pipe_resource *si_texture_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_surf surface = {0};
@@ -1418,7 +1377,7 @@
bool tc_compatible_htile =
rscreen->chip_class >= VI &&
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
- !(rscreen->debug_flags & DBG_NO_HYPERZ) &&
+ !(rscreen->debug_flags & DBG(NO_HYPERZ)) &&
!is_flushed_depth &&
templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
util_format_is_depth_or_stencil(templ->format);
@@ -1445,8 +1404,8 @@
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct pb_buffer *buf = NULL;
unsigned stride = 0, offset = 0;
- unsigned array_mode;
- struct radeon_surf surface;
+ enum radeon_surf_mode array_mode;
+ struct radeon_surf surface = {};
int r;
struct radeon_bo_metadata metadata = {};
struct r600_texture *rtex;
@@ -1462,34 +1421,8 @@
return NULL;
rscreen->ws->buffer_get_metadata(buf, &metadata);
-
- if (rscreen->chip_class >= GFX9) {
- if (metadata.u.gfx9.swizzle_mode > 0)
- array_mode = RADEON_SURF_MODE_2D;
- else
- array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- is_scanout = metadata.u.gfx9.swizzle_mode == 0 ||
- metadata.u.gfx9.swizzle_mode % 4 == 2;
-
- surface.u.gfx9.surf.swizzle_mode = metadata.u.gfx9.swizzle_mode;
- } else {
- surface.u.legacy.pipe_config = metadata.u.legacy.pipe_config;
- surface.u.legacy.bankw = metadata.u.legacy.bankw;
- surface.u.legacy.bankh = metadata.u.legacy.bankh;
- surface.u.legacy.tile_split = metadata.u.legacy.tile_split;
- surface.u.legacy.mtilea = metadata.u.legacy.mtilea;
- surface.u.legacy.num_banks = metadata.u.legacy.num_banks;
-
- if (metadata.u.legacy.macrotile == RADEON_LAYOUT_TILED)
- array_mode = RADEON_SURF_MODE_2D;
- else if (metadata.u.legacy.microtile == RADEON_LAYOUT_TILED)
- array_mode = RADEON_SURF_MODE_1D;
- else
- array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- is_scanout = metadata.u.legacy.scanout;
- }
+ r600_surface_import_metadata(rscreen, &surface, &metadata,
+ &array_mode, &is_scanout);
r = r600_init_surface(rscreen, &surface, templ, array_mode, stride,
offset, true, is_scanout, false, false);
@@ -1507,12 +1440,13 @@
if (rscreen->apply_opaque_metadata)
rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
+ assert(rtex->surface.tile_swizzle == 0);
return &rtex->resource.b.b;
}
-bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
- struct pipe_resource *texture,
- struct r600_texture **staging)
+bool si_init_flushed_depth_texture(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct r600_texture **staging)
{
struct r600_texture *rtex = (struct r600_texture*)texture;
struct pipe_resource resource;
@@ -1612,9 +1546,7 @@
unsigned transfer_usage,
const struct pipe_box *box)
{
- /* r600g doesn't react to dirty_tex_descriptor_counter */
- return rscreen->chip_class >= SI &&
- !rtex->resource.b.is_shared &&
+ return !rtex->resource.b.is_shared &&
!(transfer_usage & PIPE_TRANSFER_READ) &&
rtex->resource.b.b.last_level == 0 &&
util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
@@ -1633,7 +1565,7 @@
assert(rtex->surface.is_linear);
/* Reallocate the buffer in the same pipe_resource. */
- r600_alloc_resource(rscreen, &rtex->resource);
+ si_alloc_resource(rscreen, &rtex->resource);
/* Initialize the CMASK base address (needed even without CMASK). */
rtex->cmask.base_address_reg =
@@ -1697,7 +1629,7 @@
rtex->resource.domains & RADEON_DOMAIN_VRAM ||
rtex->resource.flags & RADEON_FLAG_GTT_WC;
/* Write & linear only: */
- else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
+ else if (si_rings_is_buffer_referenced(rctx, rtex->resource.buf,
RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rtex->resource.buf, 0,
RADEON_USAGE_READWRITE)) {
@@ -1736,7 +1668,7 @@
r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
- if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
+ if (!si_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
@@ -1763,7 +1695,7 @@
} else {
/* XXX: only readback the rectangle which is being mapped? */
/* XXX: when discard is true, no need to read back from depth texture */
- if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
+ if (!si_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
@@ -1819,7 +1751,7 @@
buf = &rtex->resource;
}
- if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
+ if (!(map = si_buffer_map_sync_with_rings(rctx, buf, usage))) {
r600_resource_reference(&trans->staging, NULL);
FREE(trans);
return NULL;
@@ -1987,17 +1919,16 @@
{
struct r600_texture *rtex = (struct r600_texture *)tex;
- if (vi_dcc_enabled(rtex, level) &&
- !vi_dcc_formats_compatible(tex->format, view_format))
- if (!r600_texture_disable_dcc(rctx, (struct r600_texture*)tex))
+ if (vi_dcc_formats_are_incompatible(tex, level, view_format))
+ if (!si_texture_disable_dcc(rctx, (struct r600_texture*)tex))
rctx->decompress_dcc(&rctx->b, rtex);
}
-struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_surface *templ,
- unsigned width0, unsigned height0,
- unsigned width, unsigned height)
+struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_surface *templ,
+ unsigned width0, unsigned height0,
+ unsigned width, unsigned height)
{
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
@@ -2058,7 +1989,7 @@
}
}
- return r600_create_surface_custom(pipe, tex, templ,
+ return si_create_surface_custom(pipe, tex, templ,
width0, height0,
width, height);
}
@@ -2103,7 +2034,7 @@
clear = PIPE_CLEAR_DEPTH;
desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
- if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+ if (rtex->surface.has_stencil) {
clear |= PIPE_CLEAR_STENCIL;
desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
}
@@ -2138,14 +2069,14 @@
pipe_surface_reference(&sf, NULL);
}
-unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
+unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap)
{
const struct util_format_description *desc = util_format_description(format);
#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
- return V_0280A0_SWAP_STD;
+ return V_028C70_SWAP_STD;
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
return ~0U;
@@ -2153,45 +2084,45 @@
switch (desc->nr_channels) {
case 1:
if (HAS_SWIZZLE(0,X))
- return V_0280A0_SWAP_STD; /* X___ */
+ return V_028C70_SWAP_STD; /* X___ */
else if (HAS_SWIZZLE(3,X))
- return V_0280A0_SWAP_ALT_REV; /* ___X */
+ return V_028C70_SWAP_ALT_REV; /* ___X */
break;
case 2:
if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
(HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
- return V_0280A0_SWAP_STD; /* XY__ */
+ return V_028C70_SWAP_STD; /* XY__ */
else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
(HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
/* YX__ */
- return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV);
+ return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
- return V_0280A0_SWAP_ALT; /* X__Y */
+ return V_028C70_SWAP_ALT; /* X__Y */
else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
- return V_0280A0_SWAP_ALT_REV; /* Y__X */
+ return V_028C70_SWAP_ALT_REV; /* Y__X */
break;
case 3:
if (HAS_SWIZZLE(0,X))
- return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD);
+ return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
else if (HAS_SWIZZLE(0,Z))
- return V_0280A0_SWAP_STD_REV; /* ZYX */
+ return V_028C70_SWAP_STD_REV; /* ZYX */
break;
case 4:
/* check the middle channels, the 1st and 4th channel can be NONE */
if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
- return V_0280A0_SWAP_STD; /* XYZW */
+ return V_028C70_SWAP_STD; /* XYZW */
} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
- return V_0280A0_SWAP_STD_REV; /* WZYX */
+ return V_028C70_SWAP_STD_REV; /* WZYX */
} else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
- return V_0280A0_SWAP_ALT; /* ZYXW */
+ return V_028C70_SWAP_ALT; /* ZYXW */
} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
/* YZWX */
if (desc->is_array)
- return V_0280A0_SWAP_ALT_REV;
+ return V_028C70_SWAP_ALT_REV;
else
- return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV);
+ return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
}
break;
}
@@ -2359,7 +2290,7 @@
tex->last_dcc_separate_buffer = NULL;
} else {
tex->dcc_separate_buffer = (struct r600_resource*)
- r600_aligned_buffer_create(rctx->b.screen,
+ si_aligned_buffer_create(rctx->b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
tex->surface.dcc_size,
@@ -2395,7 +2326,7 @@
/* Read the results. */
ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2],
true, &result);
- r600_query_hw_reset_buffers(rctx,
+ si_query_hw_reset_buffers(rctx,
(struct r600_query_hw*)
rctx->dcc_stats[i].ps_stats[2]);
@@ -2506,7 +2437,7 @@
util_format_is_alpha(surface_format)) {
extra_channel = -1;
} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
- if(r600_translate_colorswap(surface_format, false) <= 1)
+ if(si_translate_colorswap(surface_format, false) <= 1)
extra_channel = desc->nr_channels - 1;
else
extra_channel = 0;
@@ -2588,8 +2519,11 @@
assert(rtex->resource.b.b.nr_samples <= 1);
clear_size = rtex->surface.dcc_size;
} else {
+ unsigned num_layers = util_max_layer(&rtex->resource.b.b, level) + 1;
+
dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
- clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size;
+ clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size *
+ num_layers;
}
rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, clear_size,
@@ -2704,7 +2638,7 @@
p_atomic_inc(&rscreen->dirty_tex_counter);
}
-void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
+void si_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
unsigned *buffers, ubyte *dirty_cbufs,
@@ -2769,7 +2703,7 @@
* displayable surfaces.
*/
if (rctx->chip_class >= VI &&
- !(rctx->screen->debug_flags & DBG_NO_DCC_FB)) {
+ !(rctx->screen->debug_flags & DBG(NO_DCC_FB))) {
vi_separate_dcc_try_enable(rctx, tex);
/* RB+ isn't supported with a CMASK clear only on Stoney,
@@ -2787,7 +2721,7 @@
uint32_t reset_value;
bool clear_words_needed;
- if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
+ if (rctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
continue;
if (!vi_get_fast_clear_parameters(fb->cbufs[i]->format,
@@ -2837,8 +2771,7 @@
}
/* We can change the micro tile mode before a full clear. */
- if (rctx->screen->chip_class >= SI)
- si_set_optimal_micro_tile_mode(rctx->screen, tex);
+ si_set_optimal_micro_tile_mode(rctx->screen, tex);
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
@@ -2849,13 +2782,149 @@
}
}
-void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
+static struct pipe_memory_object *
+r600_memobj_from_handle(struct pipe_screen *screen,
+ struct winsys_handle *whandle,
+ bool dedicated)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object);
+ struct pb_buffer *buf = NULL;
+ uint32_t stride, offset;
+
+ if (!memobj)
+ return NULL;
+
+ buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
+ &stride, &offset);
+ if (!buf) {
+ free(memobj);
+ return NULL;
+ }
+
+ memobj->b.dedicated = dedicated;
+ memobj->buf = buf;
+ memobj->stride = stride;
+ memobj->offset = offset;
+
+ return (struct pipe_memory_object *)memobj;
+
+}
+
+static void
+r600_memobj_destroy(struct pipe_screen *screen,
+ struct pipe_memory_object *_memobj)
+{
+ struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
+
+ pb_reference(&memobj->buf, NULL);
+ free(memobj);
+}
+
+static struct pipe_resource *
+r600_texture_from_memobj(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ struct pipe_memory_object *_memobj,
+ uint64_t offset)
+{
+ int r;
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
+ struct r600_texture *rtex;
+ struct radeon_surf surface = {};
+ struct radeon_bo_metadata metadata = {};
+ enum radeon_surf_mode array_mode;
+ bool is_scanout;
+ struct pb_buffer *buf = NULL;
+
+ if (memobj->b.dedicated) {
+ rscreen->ws->buffer_get_metadata(memobj->buf, &metadata);
+ r600_surface_import_metadata(rscreen, &surface, &metadata,
+ &array_mode, &is_scanout);
+ } else {
+ /**
+ * The bo metadata is unset for un-dedicated images. So we fall
+ * back to linear. See answer to question 5 of the
+ * VK_KHX_external_memory spec for some details.
+ *
+ * It is possible that this case isn't going to work if the
+ * surface pitch isn't correctly aligned by default.
+ *
+ * In order to support it correctly we require multi-image
+ * metadata to be syncrhonized between radv and radeonsi. The
+ * semantics of associating multiple image metadata to a memory
+ * object on the vulkan export side are not concretely defined
+ * either.
+ *
+ * All the use cases we are aware of at the moment for memory
+ * objects use dedicated allocations. So lets keep the initial
+ * implementation simple.
+ *
+ * A possible alternative is to attempt to reconstruct the
+ * tiling information when the TexParameter TEXTURE_TILING_EXT
+ * is set.
+ */
+ array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+ is_scanout = false;
+
+ }
+
+ r = r600_init_surface(rscreen, &surface, templ,
+ array_mode, memobj->stride,
+ offset, true, is_scanout,
+ false, false);
+ if (r)
+ return NULL;
+
+ rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface);
+ if (!rtex)
+ return NULL;
+
+ /* r600_texture_create_object doesn't increment refcount of
+ * memobj->buf, so increment it here.
+ */
+ pb_reference(&buf, memobj->buf);
+
+ rtex->resource.b.is_shared = true;
+ rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE;
+
+ if (rscreen->apply_opaque_metadata)
+ rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
+
+ return &rtex->resource.b.b;
+}
+
+static bool si_check_resource_capability(struct pipe_screen *screen,
+ struct pipe_resource *resource,
+ unsigned bind)
+{
+ struct r600_texture *tex = (struct r600_texture*)resource;
+
+ /* Buffers only support the linear flag. */
+ if (resource->target == PIPE_BUFFER)
+ return (bind & ~PIPE_BIND_LINEAR) == 0;
+
+ if (bind & PIPE_BIND_LINEAR && !tex->surface.is_linear)
+ return false;
+
+ if (bind & PIPE_BIND_SCANOUT && !tex->surface.is_displayable)
+ return false;
+
+ /* TODO: PIPE_BIND_CURSOR - do we care? */
+ return true;
+}
+
+void si_init_screen_texture_functions(struct r600_common_screen *rscreen)
{
rscreen->b.resource_from_handle = r600_texture_from_handle;
rscreen->b.resource_get_handle = r600_texture_get_handle;
+ rscreen->b.resource_from_memobj = r600_texture_from_memobj;
+ rscreen->b.memobj_create_from_handle = r600_memobj_from_handle;
+ rscreen->b.memobj_destroy = r600_memobj_destroy;
+ rscreen->b.check_resource_capability = si_check_resource_capability;
}
-void r600_init_context_texture_functions(struct r600_common_context *rctx)
+void si_init_context_texture_functions(struct r600_common_context *rctx)
{
rctx->b.create_surface = r600_create_surface;
rctx->b.surface_destroy = r600_surface_destroy;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/r600_viewport.c mesa-17.3.3/src/gallium/drivers/radeon/r600_viewport.c
--- mesa-17.2.4/src/gallium/drivers/radeon/r600_viewport.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/r600_viewport.c 1970-01-01 00:00:00.000000000 +0000
@@ -1,450 +0,0 @@
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "r600_cs.h"
-#include "util/u_viewport.h"
-#include "tgsi/tgsi_scan.h"
-
-#define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
-
-static void r600_set_scissor_states(struct pipe_context *ctx,
- unsigned start_slot,
- unsigned num_scissors,
- const struct pipe_scissor_state *state)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- int i;
-
- for (i = 0; i < num_scissors; i++)
- rctx->scissors.states[start_slot + i] = state[i];
-
- if (!rctx->scissor_enabled)
- return;
-
- rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
- rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
-}
-
-/* Since the guard band disables clipping, we have to clip per-pixel
- * using a scissor.
- */
-static void r600_get_scissor_from_viewport(struct r600_common_context *rctx,
- const struct pipe_viewport_state *vp,
- struct r600_signed_scissor *scissor)
-{
- float tmp, minx, miny, maxx, maxy;
-
- /* Convert (-1, -1) and (1, 1) from clip space into window space. */
- minx = -vp->scale[0] + vp->translate[0];
- miny = -vp->scale[1] + vp->translate[1];
- maxx = vp->scale[0] + vp->translate[0];
- maxy = vp->scale[1] + vp->translate[1];
-
- /* r600_draw_rectangle sets this. Disable the scissor. */
- if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
- scissor->minx = scissor->miny = 0;
- scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx);
- return;
- }
-
- /* Handle inverted viewports. */
- if (minx > maxx) {
- tmp = minx;
- minx = maxx;
- maxx = tmp;
- }
- if (miny > maxy) {
- tmp = miny;
- miny = maxy;
- maxy = tmp;
- }
-
- /* Convert to integer and round up the max bounds. */
- scissor->minx = minx;
- scissor->miny = miny;
- scissor->maxx = ceilf(maxx);
- scissor->maxy = ceilf(maxy);
-}
-
-static void r600_clamp_scissor(struct r600_common_context *rctx,
- struct pipe_scissor_state *out,
- struct r600_signed_scissor *scissor)
-{
- unsigned max_scissor = GET_MAX_SCISSOR(rctx);
- out->minx = CLAMP(scissor->minx, 0, max_scissor);
- out->miny = CLAMP(scissor->miny, 0, max_scissor);
- out->maxx = CLAMP(scissor->maxx, 0, max_scissor);
- out->maxy = CLAMP(scissor->maxy, 0, max_scissor);
-}
-
-static void r600_clip_scissor(struct pipe_scissor_state *out,
- struct pipe_scissor_state *clip)
-{
- out->minx = MAX2(out->minx, clip->minx);
- out->miny = MAX2(out->miny, clip->miny);
- out->maxx = MIN2(out->maxx, clip->maxx);
- out->maxy = MIN2(out->maxy, clip->maxy);
-}
-
-static void r600_scissor_make_union(struct r600_signed_scissor *out,
- struct r600_signed_scissor *in)
-{
- out->minx = MIN2(out->minx, in->minx);
- out->miny = MIN2(out->miny, in->miny);
- out->maxx = MAX2(out->maxx, in->maxx);
- out->maxy = MAX2(out->maxy, in->maxy);
-}
-
-void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
- struct pipe_scissor_state *scissor)
-{
- if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
- if (scissor->maxx == 0)
- scissor->minx = 1;
- if (scissor->maxy == 0)
- scissor->miny = 1;
-
- if (rctx->chip_class == CAYMAN &&
- scissor->maxx == 1 && scissor->maxy == 1)
- scissor->maxx = 2;
- }
-}
-
-static void r600_emit_one_scissor(struct r600_common_context *rctx,
- struct radeon_winsys_cs *cs,
- struct r600_signed_scissor *vp_scissor,
- struct pipe_scissor_state *scissor)
-{
- struct pipe_scissor_state final;
-
- if (rctx->vs_disables_clipping_viewport) {
- final.minx = final.miny = 0;
- final.maxx = final.maxy = GET_MAX_SCISSOR(rctx);
- } else {
- r600_clamp_scissor(rctx, &final, vp_scissor);
- }
-
- if (scissor)
- r600_clip_scissor(&final, scissor);
-
- evergreen_apply_scissor_bug_workaround(rctx, &final);
-
- radeon_emit(cs, S_028250_TL_X(final.minx) |
- S_028250_TL_Y(final.miny) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
- radeon_emit(cs, S_028254_BR_X(final.maxx) |
- S_028254_BR_Y(final.maxy));
-}
-
-/* the range is [-MAX, MAX] */
-#define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384)
-
-static void r600_emit_guardband(struct r600_common_context *rctx,
- struct r600_signed_scissor *vp_as_scissor)
-{
- struct radeon_winsys_cs *cs = rctx->gfx.cs;
- struct pipe_viewport_state vp;
- float left, top, right, bottom, max_range, guardband_x, guardband_y;
- float discard_x, discard_y;
-
- /* Reconstruct the viewport transformation from the scissor. */
- vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
- vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
- vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
- vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
-
- /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
- if (vp_as_scissor->minx == vp_as_scissor->maxx)
- vp.scale[0] = 0.5;
- if (vp_as_scissor->miny == vp_as_scissor->maxy)
- vp.scale[1] = 0.5;
-
- /* Find the biggest guard band that is inside the supported viewport
- * range. The guard band is specified as a horizontal and vertical
- * distance from (0,0) in clip space.
- *
- * This is done by applying the inverse viewport transformation
- * on the viewport limits to get those limits in clip space.
- *
- * Use a limit one pixel smaller to allow for some precision error.
- */
- max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1;
- left = (-max_range - vp.translate[0]) / vp.scale[0];
- right = ( max_range - vp.translate[0]) / vp.scale[0];
- top = (-max_range - vp.translate[1]) / vp.scale[1];
- bottom = ( max_range - vp.translate[1]) / vp.scale[1];
-
- assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
-
- guardband_x = MIN2(-left, right);
- guardband_y = MIN2(-top, bottom);
-
- discard_x = 1.0;
- discard_y = 1.0;
-
- if (rctx->current_rast_prim < PIPE_PRIM_TRIANGLES) {
- /* When rendering wide points or lines, we need to be more
- * conservative about when to discard them entirely. Since
- * point size can be determined by the VS output, we basically
- * disable discard completely completely here.
- *
- * TODO: This can hurt performance when rendering lines and
- * points with fixed size, and could be improved.
- */
- discard_x = guardband_x;
- discard_y = guardband_y;
- }
-
- /* If any of the GB registers is updated, all of them must be updated. */
- if (rctx->chip_class >= CAYMAN)
- radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
- else
- radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
-
- radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
- radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
- radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
- radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
-}
-
-static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
-{
- struct radeon_winsys_cs *cs = rctx->gfx.cs;
- struct pipe_scissor_state *states = rctx->scissors.states;
- unsigned mask = rctx->scissors.dirty_mask;
- bool scissor_enabled = rctx->scissor_enabled;
- struct r600_signed_scissor max_vp_scissor;
- int i;
-
- /* The simple case: Only 1 viewport is active. */
- if (!rctx->vs_writes_viewport_index) {
- struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0];
-
- if (!(mask & 1))
- return;
-
- radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
- r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL);
- r600_emit_guardband(rctx, vp);
- rctx->scissors.dirty_mask &= ~1; /* clear one bit */
- return;
- }
-
- /* Shaders can draw to any viewport. Make a union of all viewports. */
- max_vp_scissor = rctx->viewports.as_scissor[0];
- for (i = 1; i < R600_MAX_VIEWPORTS; i++)
- r600_scissor_make_union(&max_vp_scissor,
- &rctx->viewports.as_scissor[i]);
-
- while (mask) {
- int start, count, i;
-
- u_bit_scan_consecutive_range(&mask, &start, &count);
-
- radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
- start * 4 * 2, count * 2);
- for (i = start; i < start+count; i++) {
- r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i],
- scissor_enabled ? &states[i] : NULL);
- }
- }
- r600_emit_guardband(rctx, &max_vp_scissor);
- rctx->scissors.dirty_mask = 0;
-}
-
-static void r600_set_viewport_states(struct pipe_context *ctx,
- unsigned start_slot,
- unsigned num_viewports,
- const struct pipe_viewport_state *state)
-{
- struct r600_common_context *rctx = (struct r600_common_context *)ctx;
- unsigned mask;
- int i;
-
- for (i = 0; i < num_viewports; i++) {
- unsigned index = start_slot + i;
-
- rctx->viewports.states[index] = state[i];
- r600_get_scissor_from_viewport(rctx, &state[i],
- &rctx->viewports.as_scissor[index]);
- }
-
- mask = ((1 << num_viewports) - 1) << start_slot;
- rctx->viewports.dirty_mask |= mask;
- rctx->viewports.depth_range_dirty_mask |= mask;
- rctx->scissors.dirty_mask |= mask;
- rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
- rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
-}
-
-static void r600_emit_one_viewport(struct r600_common_context *rctx,
- struct pipe_viewport_state *state)
-{
- struct radeon_winsys_cs *cs = rctx->gfx.cs;
-
- radeon_emit(cs, fui(state->scale[0]));
- radeon_emit(cs, fui(state->translate[0]));
- radeon_emit(cs, fui(state->scale[1]));
- radeon_emit(cs, fui(state->translate[1]));
- radeon_emit(cs, fui(state->scale[2]));
- radeon_emit(cs, fui(state->translate[2]));
-}
-
-static void r600_emit_viewports(struct r600_common_context *rctx)
-{
- struct radeon_winsys_cs *cs = rctx->gfx.cs;
- struct pipe_viewport_state *states = rctx->viewports.states;
- unsigned mask = rctx->viewports.dirty_mask;
-
- /* The simple case: Only 1 viewport is active. */
- if (!rctx->vs_writes_viewport_index) {
- if (!(mask & 1))
- return;
-
- radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
- r600_emit_one_viewport(rctx, &states[0]);
- rctx->viewports.dirty_mask &= ~1; /* clear one bit */
- return;
- }
-
- while (mask) {
- int start, count, i;
-
- u_bit_scan_consecutive_range(&mask, &start, &count);
-
- radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
- start * 4 * 6, count * 6);
- for (i = start; i < start+count; i++)
- r600_emit_one_viewport(rctx, &states[i]);
- }
- rctx->viewports.dirty_mask = 0;
-}
-
-static void r600_emit_depth_ranges(struct r600_common_context *rctx)
-{
- struct radeon_winsys_cs *cs = rctx->gfx.cs;
- struct pipe_viewport_state *states = rctx->viewports.states;
- unsigned mask = rctx->viewports.depth_range_dirty_mask;
- float zmin, zmax;
-
- /* The simple case: Only 1 viewport is active. */
- if (!rctx->vs_writes_viewport_index) {
- if (!(mask & 1))
- return;
-
- util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax);
-
- radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
- radeon_emit(cs, fui(zmin));
- radeon_emit(cs, fui(zmax));
- rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
- return;
- }
-
- while (mask) {
- int start, count, i;
-
- u_bit_scan_consecutive_range(&mask, &start, &count);
-
- radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
- start * 4 * 2, count * 2);
- for (i = start; i < start+count; i++) {
- util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax);
- radeon_emit(cs, fui(zmin));
- radeon_emit(cs, fui(zmax));
- }
- }
- rctx->viewports.depth_range_dirty_mask = 0;
-}
-
-static void r600_emit_viewport_states(struct r600_common_context *rctx,
- struct r600_atom *atom)
-{
- r600_emit_viewports(rctx);
- r600_emit_depth_ranges(rctx);
-}
-
-/* Set viewport dependencies on pipe_rasterizer_state. */
-void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
- bool scissor_enable, bool clip_halfz)
-{
- if (rctx->scissor_enabled != scissor_enable) {
- rctx->scissor_enabled = scissor_enable;
- rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
- }
- if (rctx->clip_halfz != clip_halfz) {
- rctx->clip_halfz = clip_halfz;
- rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
- }
-}
-
-/**
- * Normally, we only emit 1 viewport and 1 scissor if no shader is using
- * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
- * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
- * called to emit the rest.
- */
-void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
- struct tgsi_shader_info *info)
-{
- bool vs_window_space;
-
- if (!info)
- return;
-
- /* When the VS disables clipping and viewport transformation. */
- vs_window_space =
- info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
-
- if (rctx->vs_disables_clipping_viewport != vs_window_space) {
- rctx->vs_disables_clipping_viewport = vs_window_space;
- rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
- }
-
- /* Viewport index handling. */
- rctx->vs_writes_viewport_index = info->writes_viewport_index;
- if (!rctx->vs_writes_viewport_index)
- return;
-
- if (rctx->scissors.dirty_mask)
- rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
-
- if (rctx->viewports.dirty_mask ||
- rctx->viewports.depth_range_dirty_mask)
- rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
-}
-
-void r600_init_viewport_functions(struct r600_common_context *rctx)
-{
- rctx->scissors.atom.emit = r600_emit_scissors;
- rctx->viewports.atom.emit = r600_emit_viewport_states;
-
- rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
- rctx->viewports.atom.num_dw = 2 + 16 * 6;
-
- rctx->b.set_scissor_states = r600_set_scissor_states;
- rctx->b.set_viewport_states = r600_set_viewport_states;
-}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_uvd.c mesa-17.3.3/src/gallium/drivers/radeon/radeon_uvd.c
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_uvd.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_uvd.c 2018-01-18 21:30:28.000000000 +0000
@@ -220,6 +220,9 @@
case PIPE_VIDEO_FORMAT_HEVC:
return RUVD_CODEC_H265;
+ case PIPE_VIDEO_FORMAT_JPEG:
+ return RUVD_CODEC_MJPEG;
+
default:
assert(0);
return 0;
@@ -469,6 +472,10 @@
dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
break;
+ case PIPE_VIDEO_FORMAT_JPEG:
+ dpb_size = 0;
+ break;
+
default:
// something is missing here
assert(0);
@@ -940,6 +947,139 @@
return result;
}
+static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic)
+{
+ int size = 0, saved_size, len_pos, i;
+ uint16_t *bs;
+ uint8_t *buf = dec->bs_ptr;
+
+ /* SOI */
+ buf[size++] = 0xff;
+ buf[size++] = 0xd8;
+
+ /* DQT */
+ buf[size++] = 0xff;
+ buf[size++] = 0xdb;
+
+ len_pos = size++;
+ size++;
+
+ for (i = 0; i < 4; ++i) {
+ if (pic->quantization_table.load_quantiser_table[i] == 0)
+ continue;
+
+ buf[size++] = i;
+ memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);
+ size += 64;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - 4);
+
+ saved_size = size;
+
+ /* DHT */
+ buf[size++] = 0xff;
+ buf[size++] = 0xc4;
+
+ len_pos = size++;
+ size++;
+
+ for (i = 0; i < 2; ++i) {
+ if (pic->huffman_table.load_huffman_table[i] == 0)
+ continue;
+
+ buf[size++] = 0x00 | i;
+ memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
+ size += 16;
+ memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
+ size += 12;
+ }
+
+ for (i = 0; i < 2; ++i) {
+ if (pic->huffman_table.load_huffman_table[i] == 0)
+ continue;
+
+ buf[size++] = 0x10 | i;
+ memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
+ size += 16;
+ memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
+ size += 162;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ saved_size = size;
+
+ /* DRI */
+ if (pic->slice_parameter.restart_interval) {
+ buf[size++] = 0xff;
+ buf[size++] = 0xdd;
+ buf[size++] = 0x00;
+ buf[size++] = 0x04;
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->slice_parameter.restart_interval);
+ saved_size = ++size;
+ }
+
+ /* SOF */
+ buf[size++] = 0xff;
+ buf[size++] = 0xc0;
+
+ len_pos = size++;
+ size++;
+
+ buf[size++] = 0x08;
+
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->picture_parameter.picture_height);
+ size++;
+
+ bs = (uint16_t*)&buf[size++];
+ *bs = util_bswap16(pic->picture_parameter.picture_width);
+ size++;
+
+ buf[size++] = pic->picture_parameter.num_components;
+
+ for (i = 0; i < pic->picture_parameter.num_components; ++i) {
+ buf[size++] = pic->picture_parameter.components[i].component_id;
+ buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |
+ pic->picture_parameter.components[i].v_sampling_factor;
+ buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;
+ }
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ saved_size = size;
+
+ /* SOS */
+ buf[size++] = 0xff;
+ buf[size++] = 0xda;
+
+ len_pos = size++;
+ size++;
+
+ buf[size++] = pic->slice_parameter.num_components;
+
+ for (i = 0; i < pic->slice_parameter.num_components; ++i) {
+ buf[size++] = pic->slice_parameter.components[i].component_selector;
+ buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |
+ pic->slice_parameter.components[i].ac_table_selector;
+ }
+
+ buf[size++] = 0x00;
+ buf[size++] = 0x3f;
+ buf[size++] = 0x00;
+
+ bs = (uint16_t*)&buf[len_pos];
+ *bs = util_bswap16(size - saved_size - 2);
+
+ dec->bs_ptr += size;
+ dec->bs_size += size;
+}
+
/**
* destroy this video decoder
*/
@@ -961,13 +1101,13 @@
dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
- rvid_destroy_buffer(&dec->bs_buffers[i]);
+ si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
- rvid_destroy_buffer(&dec->dpb);
- rvid_destroy_buffer(&dec->ctx);
- rvid_destroy_buffer(&dec->sessionctx);
+ si_vid_destroy_buffer(&dec->dpb);
+ si_vid_destroy_buffer(&dec->ctx);
+ si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec);
}
@@ -1018,6 +1158,7 @@
const unsigned *sizes)
{
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+ enum pipe_video_format format = u_reduce_video_profile(picture->profile);
unsigned i;
assert(decoder);
@@ -1025,13 +1166,19 @@
if (!dec->bs_ptr)
return;
+ if (format == PIPE_VIDEO_FORMAT_JPEG)
+ get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);
+
for (i = 0; i < num_buffers; ++i) {
struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
unsigned new_size = dec->bs_size + sizes[i];
+ if (format == PIPE_VIDEO_FORMAT_JPEG)
+ new_size += 2; /* save for EOI */
+
if (new_size > buf->res->buf->size) {
dec->ws->buffer_unmap(buf->res->buf);
- if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
+ if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
RVID_ERR("Can't resize bitstream buffer!");
return;
}
@@ -1048,6 +1195,13 @@
dec->bs_size += sizes[i];
dec->bs_ptr += sizes[i];
}
+
+ if (format == PIPE_VIDEO_FORMAT_JPEG) {
+ ((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */
+ ((uint8_t *)dec->bs_ptr)[1] = 0xd9;
+ dec->bs_size += 2;
+ dec->bs_ptr += 2;
+ }
}
/**
@@ -1091,7 +1245,8 @@
dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16;
}
- dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
+ if (dec->dpb.res)
+ dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));
@@ -1116,10 +1271,10 @@
ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture);
else
ctx_size = calc_ctx_size_h265_main(dec);
- if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+ if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated context buffer.\n");
}
- rvid_clear_buffer(decoder->context, &dec->ctx);
+ si_vid_clear_buffer(decoder->context, &dec->ctx);
}
if (dec->ctx.res)
@@ -1138,6 +1293,9 @@
dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
break;
+ case PIPE_VIDEO_FORMAT_JPEG:
+ break;
+
default:
assert(0);
return;
@@ -1151,8 +1309,10 @@
send_msg_buf(dec);
- send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,
- RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+ if (dec->dpb.res)
+ send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+
if (dec->ctx.res)
send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0,
RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
@@ -1181,9 +1341,9 @@
/**
* create and UVD decoder
*/
-struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
- const struct pipe_video_codec *templ,
- ruvd_set_dtb set_dtb)
+struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ ruvd_set_dtb set_dtb)
{
struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
struct r600_common_context *rctx = (struct r600_common_context*)context;
@@ -1198,7 +1358,7 @@
switch(u_reduce_video_profile(templ->profile)) {
case PIPE_VIDEO_FORMAT_MPEG12:
- if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
+ if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
return vl_create_mpeg12_decoder(context, templ);
/* fall through */
@@ -1238,7 +1398,7 @@
dec->stream_type = profile2stream_type(dec, info.family);
dec->set_dtb = set_dtb;
- dec->stream_handle = rvid_alloc_stream_handle();
+ dec->stream_handle = si_vid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
@@ -1255,48 +1415,48 @@
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
if (have_it(dec))
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
- if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
+ if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
msg_fb_it_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
- if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
+ if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i],
bs_buf_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated bitstream buffers.\n");
goto error;
}
- rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
- rvid_clear_buffer(context, &dec->bs_buffers[i]);
+ si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
+ si_vid_clear_buffer(context, &dec->bs_buffers[i]);
}
dpb_size = calc_dpb_size(dec);
-
- if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
- RVID_ERR("Can't allocated dpb.\n");
- goto error;
+ if (dpb_size) {
+ if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't allocated dpb.\n");
+ goto error;
+ }
+ si_vid_clear_buffer(context, &dec->dpb);
}
- rvid_clear_buffer(context, &dec->dpb);
-
if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) {
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
- if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+ if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated context buffer.\n");
goto error;
}
- rvid_clear_buffer(context, &dec->ctx);
+ si_vid_clear_buffer(context, &dec->ctx);
}
if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) {
- if (!rvid_create_buffer(dec->screen, &dec->sessionctx,
+ if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
UVD_SESSION_CONTEXT_SIZE,
PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated session ctx.\n");
goto error;
}
- rvid_clear_buffer(context, &dec->sessionctx);
+ si_vid_clear_buffer(context, &dec->sessionctx);
}
if (info.family >= CHIP_VEGA10) {
@@ -1332,13 +1492,13 @@
if (dec->cs) dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
- rvid_destroy_buffer(&dec->bs_buffers[i]);
+ si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
- rvid_destroy_buffer(&dec->dpb);
- rvid_destroy_buffer(&dec->ctx);
- rvid_destroy_buffer(&dec->sessionctx);
+ si_vid_destroy_buffer(&dec->dpb);
+ si_vid_destroy_buffer(&dec->ctx);
+ si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec);
@@ -1391,13 +1551,13 @@
/**
* fill decoding target field from the luma and chroma surfaces
*/
-void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
- struct radeon_surf *chroma, enum ruvd_surface_type type)
+void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
+ struct radeon_surf *chroma, enum ruvd_surface_type type)
{
switch (type) {
default:
case RUVD_SURFACE_TYPE_LEGACY:
- msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x;
+ msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;
switch (luma->u.legacy.level[0].mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED:
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
@@ -1417,25 +1577,29 @@
}
msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);
- msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);
+ if (chroma)
+ msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);
if (msg->body.decode.dt_field_mode) {
msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);
- msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);
+ if (chroma)
+ msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);
} else {
msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
}
- assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
- assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
- assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
+ if (chroma) {
+ assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
+ assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
+ assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
+ }
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));
msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));
msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));
break;
case RUVD_SURFACE_TYPE_GFX9:
- msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->bpe;
+ msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w;
/* SWIZZLE LINEAR MODE */
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_uvd.h mesa-17.3.3/src/gallium/drivers/radeon/radeon_uvd.h
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_uvd.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_uvd.h 2018-01-18 21:30:28.000000000 +0000
@@ -82,6 +82,7 @@
#define RUVD_CODEC_MPEG2 0x00000003
#define RUVD_CODEC_MPEG4 0x00000004
#define RUVD_CODEC_H264_PERF 0x00000007
+#define RUVD_CODEC_MJPEG 0x00000008
#define RUVD_CODEC_H265 0x00000010
/* UVD decode target buffer tiling mode */
@@ -436,11 +437,11 @@
(struct ruvd_msg* msg, struct vl_video_buffer *vb);
/* create an UVD decode */
-struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
- const struct pipe_video_codec *templat,
- ruvd_set_dtb set_dtb);
+struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templat,
+ ruvd_set_dtb set_dtb);
/* fill decoding target field from the luma and chroma surfaces */
-void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
- struct radeon_surf *chroma, enum ruvd_surface_type type);
+void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
+ struct radeon_surf *chroma, enum ruvd_surface_type type);
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce_40_2_2.c mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce_40_2_2.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce_40_2_2.c 2018-01-18 21:30:28.000000000 +0000
@@ -363,8 +363,8 @@
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
- struct rvce_cpb_slot *l0 = l0_slot(enc);
- rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
+ struct rvce_cpb_slot *l0 = si_l0_slot(enc);
+ si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
RVCE_CS(l0->picture_type); // encPicType
RVCE_CS(l0->frame_num); // frameNumber
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
@@ -389,8 +389,8 @@
// encReferencePictureL1[0]
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
- struct rvce_cpb_slot *l1 = l1_slot(enc);
- rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
+ struct rvce_cpb_slot *l1 = si_l1_slot(enc);
+ si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
RVCE_CS(l1->picture_type); // encPicType
RVCE_CS(l1->frame_num); // frameNumber
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
@@ -404,7 +404,7 @@
RVCE_CS(0xffffffff); // chromaOffset
}
- rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+ si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
RVCE_CS(luma_offset); // encReconstructedLumaOffset
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
RVCE_CS(0x00000000); // encColocBufferOffset
@@ -431,11 +431,11 @@
RVCE_END();
}
-void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
+void si_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
{
}
-void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
+void si_vce_40_2_2_init(struct rvce_encoder *enc)
{
enc->session = session;
enc->task_info = task_info;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce_50.c mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce_50.c
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce_50.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce_50.c 2018-01-18 21:30:28.000000000 +0000
@@ -173,8 +173,8 @@
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
- struct rvce_cpb_slot *l0 = l0_slot(enc);
- rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
+ struct rvce_cpb_slot *l0 = si_l0_slot(enc);
+ si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
RVCE_CS(l0->picture_type); // encPicType
RVCE_CS(l0->frame_num); // frameNumber
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
@@ -199,8 +199,8 @@
// encReferencePictureL1[0]
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
- struct rvce_cpb_slot *l1 = l1_slot(enc);
- rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
+ struct rvce_cpb_slot *l1 = si_l1_slot(enc);
+ si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
RVCE_CS(l1->picture_type); // encPicType
RVCE_CS(l1->frame_num); // frameNumber
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
@@ -214,7 +214,7 @@
RVCE_CS(0xffffffff); // chromaOffset
}
- rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+ si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
RVCE_CS(luma_offset); // encReconstructedLumaOffset
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
RVCE_CS(0x00000000); // encColocBufferOffset
@@ -233,13 +233,13 @@
RVCE_END();
}
-void radeon_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
+void si_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
{
}
-void radeon_vce_50_init(struct rvce_encoder *enc)
+void si_vce_50_init(struct rvce_encoder *enc)
{
- radeon_vce_40_2_2_init(enc);
+ si_vce_40_2_2_init(enc);
/* only the two below are different */
enc->rate_control = rate_control;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce_52.c mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce_52.c
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce_52.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce_52.c 2018-01-18 21:30:28.000000000 +0000
@@ -138,7 +138,7 @@
enc->enc_pic.vui.max_dec_frame_buffering = 0x00000003;
}
-void radeon_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
+void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
{
get_rate_control_param(enc, pic);
get_motion_estimation_param(enc, pic);
@@ -319,8 +319,8 @@
RVCE_CS(0x00000000); // pictureStructure
if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
- struct rvce_cpb_slot *l0 = l0_slot(enc);
- rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
+ struct rvce_cpb_slot *l0 = si_l0_slot(enc);
+ si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
RVCE_CS(l0->picture_type);
RVCE_CS(l0->frame_num);
RVCE_CS(l0->pic_order_cnt);
@@ -356,8 +356,8 @@
// encReferencePictureL1[0]
RVCE_CS(0x00000000); // pictureStructure
if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
- struct rvce_cpb_slot *l1 = l1_slot(enc);
- rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
+ struct rvce_cpb_slot *l1 = si_l1_slot(enc);
+ si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
RVCE_CS(l1->picture_type);
RVCE_CS(l1->frame_num);
RVCE_CS(l1->pic_order_cnt);
@@ -376,7 +376,7 @@
RVCE_CS(enc->enc_pic.eo.l1_chroma_offset);
}
- rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+ si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
RVCE_CS(luma_offset);
RVCE_CS(chroma_offset);
RVCE_CS(enc->enc_pic.eo.enc_coloc_buffer_offset);
@@ -646,7 +646,7 @@
RVCE_END();
}
-void radeon_vce_52_init(struct rvce_encoder *enc)
+void si_vce_52_init(struct rvce_encoder *enc)
{
enc->session = session;
enc->task_info = task_info;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce.c mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce.c
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce.c 2018-01-18 21:30:28.000000000 +0000
@@ -52,7 +52,7 @@
#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
-#define FW_53_19_4 ((53 << 24) | (19 << 16) | (4 << 8))
+#define FW_53 (53 << 24)
/**
* flush commands to the hardware
@@ -198,7 +198,7 @@
/**
* Get the slot for the currently encoded frame
*/
-struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
+struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
}
@@ -206,7 +206,7 @@
/**
* Get the slot for L0
*/
-struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
+struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
}
@@ -214,7 +214,7 @@
/**
* Get the slot for L1
*/
-struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
+struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
}
@@ -222,8 +222,8 @@
/**
* Calculate the offsets into the CPB
*/
-void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
- signed *luma_offset, signed *chroma_offset)
+void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ signed *luma_offset, signed *chroma_offset)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
unsigned pitch, vpitch, fsize;
@@ -249,15 +249,15 @@
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
if (enc->stream_handle) {
struct rvid_buffer fb;
- rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->fb = &fb;
enc->session(enc);
- enc->feedback(enc);
enc->destroy(enc);
+ enc->feedback(enc);
flush(enc);
- rvid_destroy_buffer(&fb);
+ si_vid_destroy_buffer(&fb);
}
- rvid_destroy_buffer(&enc->cpb);
+ si_vid_destroy_buffer(&enc->cpb);
enc->ws->cs_destroy(enc->cs);
FREE(enc->cpb_array);
FREE(enc);
@@ -278,7 +278,7 @@
enc->pic.quant_b_frames != pic->quant_b_frames;
enc->pic = *pic;
- get_pic_param(enc, pic);
+ si_get_pic_param(enc, pic);
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
@@ -291,8 +291,8 @@
if (!enc->stream_handle) {
struct rvid_buffer fb;
- enc->stream_handle = rvid_alloc_stream_handle();
- rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+ enc->stream_handle = si_vid_alloc_stream_handle();
+ si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->fb = &fb;
enc->session(enc);
enc->create(enc);
@@ -300,7 +300,7 @@
enc->feedback(enc);
flush(enc);
//dump_feedback(enc, &fb);
- rvid_destroy_buffer(&fb);
+ si_vid_destroy_buffer(&fb);
need_rate_control = false;
}
@@ -321,7 +321,7 @@
enc->bs_size = destination->width0;
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
- if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
+ if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't create feedback buffer.\n");
return;
}
@@ -370,7 +370,7 @@
enc->ws->buffer_unmap(fb->res->buf);
}
//dump_feedback(enc, fb);
- rvid_destroy_buffer(fb);
+ si_vid_destroy_buffer(fb);
FREE(fb);
}
@@ -390,10 +390,10 @@
// just ignored
}
-struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
- const struct pipe_video_codec *templ,
- struct radeon_winsys* ws,
- rvce_get_buffer get_buffer)
+struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ,
+ struct radeon_winsys* ws,
+ rvce_get_buffer get_buffer)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
struct r600_common_context *rctx = (struct r600_common_context*)context;
@@ -406,7 +406,7 @@
RVID_ERR("Kernel doesn't supports VCE!\n");
return NULL;
- } else if (!rvce_is_fw_version_supported(rscreen)) {
+ } else if (!si_vce_is_fw_version_supported(rscreen)) {
RVID_ERR("Unsupported VCE fw version loaded!\n");
return NULL;
}
@@ -479,7 +479,7 @@
cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
tmp_buf->destroy(tmp_buf);
- if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+ if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't create CPB buffer.\n");
goto error;
}
@@ -492,31 +492,31 @@
switch (rscreen->info.vce_fw_version) {
case FW_40_2_2:
- radeon_vce_40_2_2_init(enc);
- get_pic_param = radeon_vce_40_2_2_get_param;
+ si_vce_40_2_2_init(enc);
+ si_get_pic_param = si_vce_40_2_2_get_param;
break;
case FW_50_0_1:
case FW_50_1_2:
case FW_50_10_2:
case FW_50_17_3:
- radeon_vce_50_init(enc);
- get_pic_param = radeon_vce_50_get_param;
+ si_vce_50_init(enc);
+ si_get_pic_param = si_vce_50_get_param;
break;
case FW_52_0_3:
case FW_52_4_3:
case FW_52_8_3:
- radeon_vce_52_init(enc);
- get_pic_param = radeon_vce_52_get_param;
- break;
- case FW_53_19_4:
- radeon_vce_52_init(enc);
- get_pic_param = radeon_vce_52_get_param;
+ si_vce_52_init(enc);
+ si_get_pic_param = si_vce_52_get_param;
break;
default:
- goto error;
+ if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) {
+ si_vce_52_init(enc);
+ si_get_pic_param = si_vce_52_get_param;
+ } else
+ goto error;
}
return &enc->base;
@@ -525,7 +525,7 @@
if (enc->cs)
enc->ws->cs_destroy(enc->cs);
- rvid_destroy_buffer(&enc->cpb);
+ si_vid_destroy_buffer(&enc->cpb);
FREE(enc->cpb_array);
FREE(enc);
@@ -535,7 +535,7 @@
/**
* check if kernel has the right fw version loaded
*/
-bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
+bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
switch (rscreen->info.vce_fw_version) {
case FW_40_2_2:
@@ -546,19 +546,21 @@
case FW_52_0_3:
case FW_52_4_3:
case FW_52_8_3:
- case FW_53_19_4:
return true;
default:
- return false;
+ if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53)
+ return true;
+ else
+ return false;
}
}
/**
* Add the buffer as relocation to the current command submission
*/
-void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
- enum radeon_bo_usage usage, enum radeon_bo_domain domain,
- signed offset)
+void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset)
{
int reloc_idx;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce.h mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce.h
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_vce.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_vce.h 2018-01-18 21:30:28.000000000 +0000
@@ -40,9 +40,9 @@
#define RVCE_BEGIN(cmd) { \
uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
RVCE_CS(cmd)
-#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
-#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
-#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
+#define RVCE_READ(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RVCE_WRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RVCE_READWRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
@@ -417,46 +417,46 @@
};
/* CPB handling functions */
-struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
-struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
-struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
-void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
- signed *luma_offset, signed *chroma_offset);
-
-struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
- const struct pipe_video_codec *templat,
- struct radeon_winsys* ws,
- rvce_get_buffer get_buffer);
-
-bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
-
-void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
- enum radeon_bo_usage usage, enum radeon_bo_domain domain,
- signed offset);
+struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc);
+void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+ signed *luma_offset, signed *chroma_offset);
+
+struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
+ const struct pipe_video_codec *templat,
+ struct radeon_winsys* ws,
+ rvce_get_buffer get_buffer);
+
+bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen);
+
+void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset);
/* init vce fw 40.2.2 specific callbacks */
-void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
+void si_vce_40_2_2_init(struct rvce_encoder *enc);
/* init vce fw 50 specific callbacks */
-void radeon_vce_50_init(struct rvce_encoder *enc);
+void si_vce_50_init(struct rvce_encoder *enc);
/* init vce fw 52 specific callbacks */
-void radeon_vce_52_init(struct rvce_encoder *enc);
+void si_vce_52_init(struct rvce_encoder *enc);
/* version specific function for getting parameters */
-void (*get_pic_param)(struct rvce_encoder *enc,
+void (*si_get_pic_param)(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 40.2.2 */
-void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
- struct pipe_h264_enc_picture_desc *pic);
+void si_vce_40_2_2_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 50 */
-void radeon_vce_50_get_param(struct rvce_encoder *enc,
- struct pipe_h264_enc_picture_desc *pic);
+void si_vce_50_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 52 */
-void radeon_vce_52_get_param(struct rvce_encoder *enc,
- struct pipe_h264_enc_picture_desc *pic);
+void si_vce_52_get_param(struct rvce_encoder *enc,
+ struct pipe_h264_enc_picture_desc *pic);
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_vcn_dec.c mesa-17.3.3/src/gallium/drivers/radeon/radeon_vcn_dec.c
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_vcn_dec.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_vcn_dec.c 2018-01-18 21:30:28.000000000 +0000
@@ -324,13 +324,17 @@
result.direct_reflist[i][j] = pic->RefPicList[i][j];
}
- if ((pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) &&
- (target->buffer_format == PIPE_FORMAT_NV12)) {
- result.p010_mode = 0;
- result.luma_10to8 = 5;
- result.chroma_10to8 = 5;
- result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
- result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
+ if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
+ if (target->buffer_format == PIPE_FORMAT_P016) {
+ result.p010_mode = 1;
+ result.msb_mode = 1;
+ } else {
+ result.p010_mode = 0;
+ result.luma_10to8 = 5;
+ result.chroma_10to8 = 5;
+ result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
+ result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
+ }
}
return result;
@@ -631,7 +635,7 @@
decode->db_pitch = align(dec->base.width, 32);
decode->db_surf_tile_config = 0;
- decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.bpe;;
+ decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
decode->dt_uv_pitch = decode->dt_pitch / 2;
decode->dt_tiling_mode = 0;
@@ -674,9 +678,9 @@
(struct pipe_h265_picture_desc*)picture);
else
ctx_size = calc_ctx_size_h265_main(dec);
- if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
+ if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
RVID_ERR("Can't allocated context buffer.\n");
- rvid_clear_buffer(dec->base.context, &dec->ctx);
+ si_vid_clear_buffer(dec->base.context, &dec->ctx);
}
break;
}
@@ -1022,13 +1026,13 @@
dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
- rvid_destroy_buffer(&dec->bs_buffers[i]);
+ si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
- rvid_destroy_buffer(&dec->dpb);
- rvid_destroy_buffer(&dec->ctx);
- rvid_destroy_buffer(&dec->sessionctx);
+ si_vid_destroy_buffer(&dec->dpb);
+ si_vid_destroy_buffer(&dec->ctx);
+ si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec);
}
@@ -1092,7 +1096,7 @@
if (new_size > buf->res->buf->size) {
dec->ws->buffer_unmap(buf->res->buf);
- if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
+ if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
RVID_ERR("Can't resize bitstream buffer!");
return;
}
@@ -1223,7 +1227,7 @@
dec->base.flush = radeon_dec_flush;
dec->stream_type = stream_type;
- dec->stream_handle = rvid_alloc_stream_handle();
+ dec->stream_handle = si_vid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
dec->cs = ws->cs_create(rctx->ctx, RING_VCN_DEC, NULL, NULL);
@@ -1237,47 +1241,48 @@
unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
if (have_it(dec))
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
- if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
- msg_fb_it_size, PIPE_USAGE_STAGING)) {
+ /* use vram to improve performance, workaround an unknown bug */
+ if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
+ msg_fb_it_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
- if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
- bs_buf_size, PIPE_USAGE_STAGING)) {
+ if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i],
+ bs_buf_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated bitstream buffers.\n");
goto error;
}
- rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
- rvid_clear_buffer(context, &dec->bs_buffers[i]);
+ si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
+ si_vid_clear_buffer(context, &dec->bs_buffers[i]);
}
dpb_size = calc_dpb_size(dec);
- if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
+ if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated dpb.\n");
goto error;
}
- rvid_clear_buffer(context, &dec->dpb);
+ si_vid_clear_buffer(context, &dec->dpb);
if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
- if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+ if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated context buffer.\n");
goto error;
}
- rvid_clear_buffer(context, &dec->ctx);
+ si_vid_clear_buffer(context, &dec->ctx);
}
- if (!rvid_create_buffer(dec->screen, &dec->sessionctx,
- RDECODE_SESSION_CONTEXT_SIZE,
- PIPE_USAGE_DEFAULT)) {
+ if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
+ RDECODE_SESSION_CONTEXT_SIZE,
+ PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated session ctx.\n");
goto error;
}
- rvid_clear_buffer(context, &dec->sessionctx);
+ si_vid_clear_buffer(context, &dec->sessionctx);
map_msg_fb_it_buf(dec);
rvcn_dec_message_create(dec);
@@ -1294,13 +1299,13 @@
if (dec->cs) dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
- rvid_destroy_buffer(&dec->bs_buffers[i]);
+ si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
+ si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
- rvid_destroy_buffer(&dec->dpb);
- rvid_destroy_buffer(&dec->ctx);
- rvid_destroy_buffer(&dec->sessionctx);
+ si_vid_destroy_buffer(&dec->dpb);
+ si_vid_destroy_buffer(&dec->ctx);
+ si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec);
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_video.c mesa-17.3.3/src/gallium/drivers/radeon/radeon_video.c
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_video.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_video.c 2018-01-18 21:30:28.000000000 +0000
@@ -46,7 +46,7 @@
#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
/* generate an stream handle */
-unsigned rvid_alloc_stream_handle()
+unsigned si_vid_alloc_stream_handle()
{
static unsigned counter = 0;
unsigned stream_handle = 0;
@@ -61,8 +61,8 @@
}
/* create a buffer in the winsys */
-bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
- unsigned size, unsigned usage)
+bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
+ unsigned size, unsigned usage)
{
memset(buffer, 0, sizeof(*buffer));
buffer->usage = usage;
@@ -79,14 +79,14 @@
}
/* destroy a buffer */
-void rvid_destroy_buffer(struct rvid_buffer *buffer)
+void si_vid_destroy_buffer(struct rvid_buffer *buffer)
{
r600_resource_reference(&buffer->res, NULL);
}
/* reallocate a buffer, preserving its content */
-bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
- struct rvid_buffer *new_buf, unsigned new_size)
+bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+ struct rvid_buffer *new_buf, unsigned new_size)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
struct radeon_winsys* ws = rscreen->ws;
@@ -94,7 +94,7 @@
struct rvid_buffer old_buf = *new_buf;
void *src = NULL, *dst = NULL;
- if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
+ if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
@@ -113,19 +113,19 @@
}
ws->buffer_unmap(new_buf->res->buf);
ws->buffer_unmap(old_buf.res->buf);
- rvid_destroy_buffer(&old_buf);
+ si_vid_destroy_buffer(&old_buf);
return true;
error:
if (src)
ws->buffer_unmap(old_buf.res->buf);
- rvid_destroy_buffer(new_buf);
+ si_vid_destroy_buffer(new_buf);
*new_buf = old_buf;
return false;
}
/* clear the buffer with zeros */
-void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
+void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)context;
@@ -138,9 +138,9 @@
* join surfaces into the same buffer with identical tiling params
* sumup their sizes and replace the backend buffers with a single bo
*/
-void rvid_join_surfaces(struct r600_common_context *rctx,
- struct pb_buffer** buffers[VL_NUM_COMPONENTS],
- struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
+void si_vid_join_surfaces(struct r600_common_context *rctx,
+ struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+ struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
{
struct radeon_winsys* ws;
unsigned best_tiling, best_wh, off;
@@ -182,8 +182,11 @@
for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
surfaces[i]->u.legacy.level[j].offset += off;
- } else
+ } else {
surfaces[i]->u.gfx9.surf_offset += off;
+ for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.gfx9.offset); ++j)
+ surfaces[i]->u.gfx9.offset[j] += off;
+ }
off += surfaces[i]->surf_size;
}
@@ -218,10 +221,10 @@
pb_reference(&pb, NULL);
}
-int rvid_get_video_param(struct pipe_screen *screen,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint,
- enum pipe_video_cap param)
+int si_vid_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
enum pipe_video_format codec = u_reduce_video_profile(profile);
@@ -233,7 +236,7 @@
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
- rvce_is_fw_version_supported(rscreen);
+ si_vce_is_fw_version_supported(rscreen);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
@@ -261,8 +264,7 @@
case PIPE_VIDEO_FORMAT_MPEG12:
return profile != PIPE_VIDEO_PROFILE_MPEG1;
case PIPE_VIDEO_FORMAT_MPEG4:
- /* no support for MPEG4 on older hw */
- return rscreen->family >= CHIP_PALM;
+ return 1;
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
if ((rscreen->family == CHIP_POLARIS10 ||
rscreen->family == CHIP_POLARIS11) &&
@@ -280,6 +282,15 @@
profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
else if (rscreen->family >= CHIP_CARRIZO)
return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
+ return false;
+ case PIPE_VIDEO_FORMAT_JPEG:
+ if (rscreen->family < CHIP_CARRIZO || rscreen->family >= CHIP_VEGA10)
+ return false;
+ if (!(rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 19)) {
+ RVID_ERR("No MJPEG support for the kernel version\n");
+ return false;
+ }
+ return true;
default:
return false;
}
@@ -296,17 +307,15 @@
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
- case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
- if (rscreen->family < CHIP_PALM) {
- /* MPEG2 only with shaders and no support for
- interlacing on R6xx style UVD */
- return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
- rscreen->family > CHIP_RV770;
- } else {
- if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
- return false; //The firmware doesn't support interlaced HEVC.
- return true;
- }
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: {
+ enum pipe_video_format format = u_reduce_video_profile(profile);
+
+ if (format == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The firmware doesn't support interlaced HEVC.
+ else if (format == PIPE_VIDEO_FORMAT_JPEG)
+ return false;
+ return true;
+ }
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
case PIPE_VIDEO_CAP_MAX_LEVEL:
@@ -341,10 +350,10 @@
}
}
-boolean rvid_is_format_supported(struct pipe_screen *screen,
- enum pipe_format format,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint)
+boolean si_vid_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint)
{
/* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_video.h mesa-17.3.3/src/gallium/drivers/radeon/radeon_video.h
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_video.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_video.h 2018-01-18 21:30:28.000000000 +0000
@@ -48,38 +48,38 @@
};
/* generate an stream handle */
-unsigned rvid_alloc_stream_handle(void);
+unsigned si_vid_alloc_stream_handle(void);
/* create a buffer in the winsys */
-bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
- unsigned size, unsigned usage);
+bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
+ unsigned size, unsigned usage);
/* destroy a buffer */
-void rvid_destroy_buffer(struct rvid_buffer *buffer);
+void si_vid_destroy_buffer(struct rvid_buffer *buffer);
/* reallocate a buffer, preserving its content */
-bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
- struct rvid_buffer *new_buf, unsigned new_size);
+bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+ struct rvid_buffer *new_buf, unsigned new_size);
/* clear the buffer with zeros */
-void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
+void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
/* join surfaces into the same buffer with identical tiling params
sumup their sizes and replace the backend buffers with a single bo */
-void rvid_join_surfaces(struct r600_common_context *rctx,
- struct pb_buffer** buffers[VL_NUM_COMPONENTS],
- struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
+void si_vid_join_surfaces(struct r600_common_context *rctx,
+ struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+ struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
/* returns supported codecs and other parameters */
-int rvid_get_video_param(struct pipe_screen *screen,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint,
- enum pipe_video_cap param);
+int si_vid_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param);
/* the hardware only supports NV12 */
-boolean rvid_is_format_supported(struct pipe_screen *screen,
- enum pipe_format format,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint);
+boolean si_vid_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint);
#endif // RADEON_VIDEO_H
diff -Nru mesa-17.2.4/src/gallium/drivers/radeon/radeon_winsys.h mesa-17.3.3/src/gallium/drivers/radeon/radeon_winsys.h
--- mesa-17.2.4/src/gallium/drivers/radeon/radeon_winsys.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeon/radeon_winsys.h 2018-01-18 21:30:28.000000000 +0000
@@ -54,6 +54,7 @@
RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
RADEON_FLAG_NO_SUBALLOC = (1 << 2),
RADEON_FLAG_SPARSE = (1 << 3),
+ RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
};
enum radeon_bo_usage { /* bitfield */
@@ -91,6 +92,7 @@
RADEON_NUM_GFX_IBS,
RADEON_NUM_SDMA_IBS,
RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */
+ RADEON_GFX_IB_SIZE_COUNTER,
RADEON_NUM_BYTES_MOVED,
RADEON_NUM_EVICTIONS,
RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
@@ -173,8 +175,7 @@
unsigned max_prev; /* Space in array pointed to by prev. */
unsigned prev_dw; /* Total number of dwords in previous chunks. */
- /* Memory usage of the buffer list. These are always 0 for CE and preamble
- * IBs. */
+ /* Memory usage of the buffer list. These are always 0 for preamble IBs. */
uint64_t used_vram;
uint64_t used_gart;
};
@@ -457,36 +458,6 @@
void *flush_ctx);
/**
- * Add a constant engine IB to a graphics CS. This makes the graphics CS
- * from "cs_create" a group of two IBs that share a buffer list and are
- * flushed together.
- *
- * The returned constant CS is only a stream for writing packets to the new
- * IB. Calling other winsys functions with it is not allowed, not even
- * "cs_destroy".
- *
- * In order to add buffers and check memory usage, use the graphics CS.
- * In order to flush it, use the graphics CS, which will flush both IBs.
- * Destroying the graphics CS will destroy both of them.
- *
- * \param cs The graphics CS from "cs_create" that will hold the buffer
- * list and will be used for flushing.
- */
- struct radeon_winsys_cs *(*cs_add_const_ib)(struct radeon_winsys_cs *cs);
-
- /**
- * Add a constant engine preamble IB to a graphics CS. This add an extra IB
- * in similar manner to cs_add_const_ib. This should always be called after
- * cs_add_const_ib.
- *
- * The returned IB is a constant engine IB that only gets flushed if the
- * context changed.
- *
- * \param cs The graphics CS from "cs_create" that will hold the buffer
- * list and will be used for flushing.
- */
- struct radeon_winsys_cs *(*cs_add_const_preamble_ib)(struct radeon_winsys_cs *cs);
- /**
* Destroy a command stream.
*
* \param cs A command stream to destroy.
@@ -604,6 +575,13 @@
void (*cs_sync_flush)(struct radeon_winsys_cs *cs);
/**
+ * Add a fence dependency to the CS, so that the CS will wait for
+ * the fence before execution.
+ */
+ void (*cs_add_fence_dependency)(struct radeon_winsys_cs *cs,
+ struct pipe_fence_handle *fence);
+
+ /**
* Wait for the fence and return true if the fence has been signalled.
* The timeout of 0 will only return the status.
* The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
@@ -620,6 +598,18 @@
struct pipe_fence_handle *src);
/**
+ * Create a new fence object corresponding to the given sync_file.
+ */
+ struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws,
+ int fd);
+
+ /**
+ * Return a sync_file FD corresponding to the given fence object.
+ */
+ int (*fence_export_sync_file)(struct radeon_winsys *ws,
+ struct pipe_fence_handle *fence);
+
+ /**
* Initialize surface
*
* \param ws The winsys this function is called from.
@@ -692,14 +682,19 @@
{
switch (heap) {
case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
- return RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS;
+ return RADEON_FLAG_GTT_WC |
+ RADEON_FLAG_NO_CPU_ACCESS |
+ RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
case RADEON_HEAP_VRAM:
case RADEON_HEAP_VRAM_GTT:
case RADEON_HEAP_GTT_WC:
- return RADEON_FLAG_GTT_WC;
+ return RADEON_FLAG_GTT_WC |
+ RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
case RADEON_HEAP_GTT:
default:
- return 0;
+ return RADEON_FLAG_NO_INTERPROCESS_SHARING;
}
}
@@ -731,8 +726,14 @@
/* NO_CPU_ACCESS implies VRAM only. */
assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == RADEON_DOMAIN_VRAM);
+ /* Resources with interprocess sharing don't use any winsys allocators. */
+ if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
+ return -1;
+
/* Unsupported flags: NO_SUBALLOC, SPARSE. */
- if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_CPU_ACCESS))
+ if (flags & ~(RADEON_FLAG_GTT_WC |
+ RADEON_FLAG_NO_CPU_ACCESS |
+ RADEON_FLAG_NO_INTERPROCESS_SHARING))
return -1;
switch (domain) {
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/Automake.inc mesa-17.3.3/src/gallium/drivers/radeonsi/Automake.inc
--- mesa-17.2.4/src/gallium/drivers/radeonsi/Automake.inc 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/Automake.inc 2018-01-18 21:30:28.000000000 +0000
@@ -8,6 +8,9 @@
$(LIBDRM_LIBS) \
$(AMDGPU_LIBS)
+TARGET_COMPILER_LIB_DEPS = \
+ $(top_builddir)/src/compiler/nir/libnir.la
+
TARGET_RADEON_WINSYS = \
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
$(top_builddir)/src/gallium/winsys/amdgpu/drm/libamdgpuwinsys.la
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/cik_sdma.c mesa-17.3.3/src/gallium/drivers/radeonsi/cik_sdma.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/cik_sdma.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/cik_sdma.c 2018-01-18 21:30:28.000000000 +0000
@@ -50,7 +50,7 @@
src_offset += rsrc->gpu_address;
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
- r600_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc);
+ si_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
@@ -95,7 +95,7 @@
/* the same maximum size as for copying */
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
- r600_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL);
+ si_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
@@ -162,6 +162,10 @@
unsigned src_tile_mode = info->si_tile_mode_array[src_tile_index];
unsigned dst_micro_mode = G_009910_MICRO_TILE_MODE_NEW(dst_tile_mode);
unsigned src_micro_mode = G_009910_MICRO_TILE_MODE_NEW(src_tile_mode);
+ unsigned dst_tile_swizzle = dst_mode == RADEON_SURF_MODE_2D ?
+ rdst->surface.tile_swizzle : 0;
+ unsigned src_tile_swizzle = src_mode == RADEON_SURF_MODE_2D ?
+ rsrc->surface.tile_swizzle : 0;
unsigned dst_pitch = rdst->surface.u.legacy.level[dst_level].nblk_x;
unsigned src_pitch = rsrc->surface.u.legacy.level[src_level].nblk_x;
uint64_t dst_slice_pitch = rdst->surface.u.legacy.level[dst_level].slice_size / bpp;
@@ -190,7 +194,7 @@
src_slice_pitch * bpp * (srcz + src_box->depth) <=
rsrc->resource.buf->size);
- if (!r600_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
+ if (!si_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
dstz, rsrc, src_level, src_box))
return false;
@@ -205,6 +209,9 @@
dstz >= (1 << 11))
return false;
+ dst_address |= dst_tile_swizzle << 8;
+ src_address |= src_tile_swizzle << 8;
+
/* Linear -> linear sub-window copy. */
if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED &&
src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED &&
@@ -228,7 +235,7 @@
srcy + copy_height != (1 << 14)))) {
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
- r600_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource);
+ si_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource);
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
@@ -391,7 +398,7 @@
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
uint32_t direction = linear == rdst ? 1u << 31 : 0;
- r600_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource);
+ si_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource);
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) |
@@ -485,7 +492,7 @@
dstx + copy_width != (1 << 14)))) {
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
- r600_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource);
+ si_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource);
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0));
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/driinfo_radeonsi.h mesa-17.3.3/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
--- mesa-17.2.4/src/gallium/drivers/radeonsi/driinfo_radeonsi.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/driinfo_radeonsi.h 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,10 @@
+// DriConf options specific to radeonsi
+DRI_CONF_SECTION_PERFORMANCE
+ DRI_CONF_RADEONSI_ENABLE_SISCHED("false")
+ DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false")
+ DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false")
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_DEBUG
+ DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
+DRI_CONF_SECTION_END
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/Makefile.am mesa-17.3.3/src/gallium/drivers/radeonsi/Makefile.am
--- mesa-17.2.4/src/gallium/drivers/radeonsi/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -26,6 +26,7 @@
AM_CFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
-I$(top_builddir)/src/amd/common \
+ -I$(top_builddir)/src/compiler/nir \
-I$(top_srcdir)/src/amd/common \
$(RADEON_CFLAGS) \
$(LLVM_CFLAGS)
@@ -33,3 +34,16 @@
noinst_LTLIBRARIES = libradeonsi.la
libradeonsi_la_SOURCES = $(C_SOURCES)
+
+GEN_DRIINFO_INPUTS = \
+ $(top_srcdir)/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h \
+ $(srcdir)/driinfo_radeonsi.h
+
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+MERGE_DRIINFO = $(top_srcdir)/src/util/merge_driinfo.py
+
+si_driinfo.h: $(MERGE_DRIINFO) $(GEN_DRIINFO_INPUTS)
+ $(PYTHON_GEN) $(MERGE_DRIINFO) $(GEN_DRIINFO_INPUTS) > $@ || ($(RM) $@; false)
+
+BUILT_SOURCES = $(GENERATED_SOURCES)
+CLEANFILES = $(GENERATED_SOURCES)
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/Makefile.in mesa-17.3.3/src/gallium/drivers/radeonsi/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/radeonsi/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -123,7 +123,8 @@
subdir = src/gallium/drivers/radeonsi
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -141,13 +142,16 @@
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libradeonsi_la_LIBADD =
-am__objects_1 = cik_sdma.lo si_blit.lo si_compute.lo si_cp_dma.lo \
- si_debug.lo si_descriptors.lo si_dma.lo si_hw_context.lo \
- si_pipe.lo si_pm4.lo si_perfcounter.lo si_shader.lo \
- si_shader_tgsi_alu.lo si_shader_tgsi_mem.lo \
- si_shader_tgsi_setup.lo si_state.lo si_state_draw.lo \
- si_state_shaders.lo si_uvd.lo
-am_libradeonsi_la_OBJECTS = $(am__objects_1)
+am__objects_1 =
+am__objects_2 = $(am__objects_1) cik_sdma.lo si_blit.lo si_compute.lo \
+ si_cp_dma.lo si_debug.lo si_descriptors.lo si_dma.lo \
+ si_hw_context.lo si_pipe.lo si_pm4.lo si_perfcounter.lo \
+ si_shader.lo si_shader_nir.lo si_shader_tgsi_alu.lo \
+ si_shader_tgsi_mem.lo si_shader_tgsi_setup.lo si_state.lo \
+ si_state_binning.lo si_state_draw.lo si_state_msaa.lo \
+ si_state_shaders.lo si_state_streamout.lo si_state_viewport.lo \
+ si_uvd.lo
+am_libradeonsi_la_OBJECTS = $(am__objects_2)
libradeonsi_la_OBJECTS = $(am_libradeonsi_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -355,9 +359,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -411,6 +415,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -422,12 +428,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -511,8 +520,13 @@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
+GENERATED_SOURCES := \
+ si_driinfo.h
+
C_SOURCES := \
+ $(GENERATED_SOURCES) \
cik_sdma.c \
+ driinfo_radeonsi.h \
si_blit.c \
si_compute.c \
si_compute.h \
@@ -530,12 +544,17 @@
si_shader.c \
si_shader.h \
si_shader_internal.h \
+ si_shader_nir.c \
si_shader_tgsi_alu.c \
si_shader_tgsi_mem.c \
si_shader_tgsi_setup.c \
si_state.c \
+ si_state_binning.c \
si_state_draw.c \
+ si_state_msaa.c \
si_state_shaders.c \
+ si_state_streamout.c \
+ si_state_viewport.c \
si_state.h \
si_uvd.c
@@ -581,6 +600,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
@@ -603,13 +624,23 @@
AM_CFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
-I$(top_builddir)/src/amd/common \
+ -I$(top_builddir)/src/compiler/nir \
-I$(top_srcdir)/src/amd/common \
$(RADEON_CFLAGS) \
$(LLVM_CFLAGS)
noinst_LTLIBRARIES = libradeonsi.la
libradeonsi_la_SOURCES = $(C_SOURCES)
-all: all-am
+GEN_DRIINFO_INPUTS = \
+ $(top_srcdir)/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h \
+ $(srcdir)/driinfo_radeonsi.h
+
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+MERGE_DRIINFO = $(top_srcdir)/src/util/merge_driinfo.py
+BUILT_SOURCES = $(GENERATED_SOURCES)
+CLEANFILES = $(GENERATED_SOURCES)
+all: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
@@ -676,12 +707,17 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_pipe.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_pm4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_shader.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_shader_nir.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_shader_tgsi_alu.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_shader_tgsi_mem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_shader_tgsi_setup.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_state_binning.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_state_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_state_msaa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_state_shaders.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_state_streamout.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_state_viewport.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/si_uvd.Plo@am__quote@
.c.o:
@@ -797,10 +833,12 @@
fi; \
done
check-am: all-am
-check: check-am
+check: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) check-am
all-am: Makefile $(LTLIBRARIES)
installdirs:
-install: install-am
+install: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
@@ -822,6 +860,7 @@
mostlyclean-generic:
clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
@@ -830,6 +869,7 @@
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
+ -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
@@ -901,7 +941,7 @@
uninstall-am:
-.MAKE: install-am install-strip
+.MAKE: all check install install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
@@ -920,6 +960,9 @@
.PRECIOUS: Makefile
+si_driinfo.h: $(MERGE_DRIINFO) $(GEN_DRIINFO_INPUTS)
+ $(PYTHON_GEN) $(MERGE_DRIINFO) $(GEN_DRIINFO_INPUTS) > $@ || ($(RM) $@; false)
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/Makefile.sources mesa-17.3.3/src/gallium/drivers/radeonsi/Makefile.sources
--- mesa-17.2.4/src/gallium/drivers/radeonsi/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -1,5 +1,10 @@
+GENERATED_SOURCES := \
+ si_driinfo.h
+
C_SOURCES := \
+ $(GENERATED_SOURCES) \
cik_sdma.c \
+ driinfo_radeonsi.h \
si_blit.c \
si_compute.c \
si_compute.h \
@@ -17,11 +22,16 @@
si_shader.c \
si_shader.h \
si_shader_internal.h \
+ si_shader_nir.c \
si_shader_tgsi_alu.c \
si_shader_tgsi_mem.c \
si_shader_tgsi_setup.c \
si_state.c \
+ si_state_binning.c \
si_state_draw.c \
+ si_state_msaa.c \
si_state_shaders.c \
+ si_state_streamout.c \
+ si_state_viewport.c \
si_state.h \
si_uvd.c
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/meson.build mesa-17.3.3/src/gallium/drivers/radeonsi/meson.build
--- mesa-17.2.4/src/gallium/drivers/radeonsi/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,78 @@
+# Copyright © 2017 Dylan Baker
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_libradeonsi = files(
+ 'cik_sdma.c',
+ 'driinfo_radeonsi.h',
+ 'si_blit.c',
+ 'si_compute.c',
+ 'si_compute.h',
+ 'si_cp_dma.c',
+ 'si_debug.c',
+ 'si_descriptors.c',
+ 'si_dma.c',
+ 'si_hw_context.c',
+ 'si_pipe.c',
+ 'si_pipe.h',
+ 'si_pm4.c',
+ 'si_pm4.h',
+ 'si_perfcounter.c',
+ 'si_public.h',
+ 'si_shader.c',
+ 'si_shader.h',
+ 'si_shader_internal.h',
+ 'si_shader_nir.c',
+ 'si_shader_tgsi_alu.c',
+ 'si_shader_tgsi_mem.c',
+ 'si_shader_tgsi_setup.c',
+ 'si_state.c',
+ 'si_state_binning.c',
+ 'si_state_draw.c',
+ 'si_state_msaa.c',
+ 'si_state_shaders.c',
+ 'si_state_streamout.c',
+ 'si_state_viewport.c',
+ 'si_state.h',
+ 'si_uvd.c',
+)
+
+si_driinfo_h = custom_target(
+ 'si_driinfo.h',
+ input : files(
+ '../../../util/merge_driinfo.py',
+ '../../auxiliary/pipe-loader/driinfo_gallium.h', 'driinfo_radeonsi.h'
+ ),
+ output : 'si_driinfo.h',
+ command : [prog_python2, '@INPUT@'],
+ capture : true,
+)
+
+libradeonsi = static_library(
+ 'radeonsi',
+ [files_libradeonsi, si_driinfo_h, nir_opcodes_h, sid_tables_h],
+ include_directories : [
+ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common,
+ inc_gallium_drivers,
+ ],
+ c_args : [c_vis_args],
+ cpp_args : [cpp_vis_args],
+ dependencies : dep_llvm,
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_blit.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_blit.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_blit.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_blit.c 2018-01-18 21:30:28.000000000 +0000
@@ -24,6 +24,7 @@
#include "si_pipe.h"
#include "si_compute.h"
#include "util/u_format.h"
+#include "util/u_log.h"
#include "util/u_surface.h"
enum si_blitter_op /* bitmask */
@@ -53,14 +54,12 @@
{
struct si_context *sctx = (struct si_context *)ctx;
- util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
- util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
- util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
- (struct pipe_stream_output_target**)sctx->b.streamout.targets);
+ util_blitter_save_so_targets(sctx->blitter, sctx->streamout.num_targets,
+ (struct pipe_stream_output_target**)sctx->streamout.targets);
util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
if (op & SI_SAVE_FRAGMENT_STATE) {
@@ -69,8 +68,7 @@
util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
- util_blitter_save_viewport(sctx->blitter, &sctx->b.viewports.states[0]);
- util_blitter_save_scissor(sctx->blitter, &sctx->b.scissors.states[0]);
+ util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
}
if (op & SI_SAVE_FRAMEBUFFER)
@@ -79,10 +77,10 @@
if (op & SI_SAVE_TEXTURES) {
util_blitter_save_fragment_sampler_states(
sctx->blitter, 2,
- (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].views.sampler_states);
+ (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].sampler_states);
util_blitter_save_fragment_sampler_views(sctx->blitter, 2,
- sctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
+ sctx->samplers[PIPE_SHADER_FRAGMENT].views);
}
if (op & SI_DISABLE_RENDER_COND)
@@ -94,6 +92,12 @@
struct si_context *sctx = (struct si_context *)ctx;
sctx->b.render_cond_force_off = false;
+
+ /* Restore shader pointers because the VS blit shader changed all
+ * non-global VS user SGPRs. */
+ sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
+ sctx->vertex_buffer_pointer_dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
}
static unsigned u_max_sample(struct pipe_resource *r)
@@ -338,12 +342,18 @@
}
}
+ if (unlikely(sctx->b.log))
+ u_log_printf(sctx->b.log,
+ "\n------------------------------------------------\n"
+ "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n",
+ first_level, last_level, levels_z, levels_s);
+
/* We may have to allocate the flushed texture here when called from
* si_decompress_subresource.
*/
if (copy_planes &&
(tex->flushed_depth_texture ||
- r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
+ si_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
struct r600_texture *dst = tex->flushed_depth_texture;
unsigned fully_copied_levels;
unsigned levels = 0;
@@ -374,44 +384,47 @@
}
if (inplace_planes) {
- if (!tex->tc_compatible_htile) {
+ bool has_htile = r600_htile_enabled(tex, first_level);
+ bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, first_level);
+
+ /* Don't decompress if there is no HTILE or when HTILE is
+ * TC-compatible. */
+ if (has_htile && !tc_compat_htile) {
si_blit_decompress_zs_in_place(
sctx, tex,
levels_z, levels_s,
first_layer, last_layer);
+ } else {
+ /* This is only a cache flush.
+ *
+ * Only clear the mask that we are flushing, because
+ * si_make_DB_shader_coherent() treats different levels
+ * and depth and stencil differently.
+ */
+ if (inplace_planes & PIPE_MASK_Z)
+ tex->dirty_level_mask &= ~levels_z;
+ if (inplace_planes & PIPE_MASK_S)
+ tex->stencil_dirty_level_mask &= ~levels_s;
}
/* Only in-place decompression needs to flush DB caches, or
* when we don't decompress but TC-compatible planes are dirty.
*/
- sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_INV_VMEM_L1;
-
- /* If we flush DB caches for TC-compatible depth, the dirty
- * state becomes 0 for the whole mipmap tree and all planes.
- * (there is nothing else to flush)
- */
- if (tex->tc_compatible_htile) {
- if (r600_can_sample_zs(tex, false))
- tex->dirty_level_mask = 0;
- if (r600_can_sample_zs(tex, true))
- tex->stencil_dirty_level_mask = 0;
- }
+ si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
+ inplace_planes & PIPE_MASK_S,
+ tc_compat_htile);
}
/* set_framebuffer_state takes care of coherency for single-sample.
* The DB->CB copy uses CB for the final writes.
*/
- if (copy_planes && tex->resource.b.b.nr_samples > 1) {
- sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_FLUSH_AND_INV_CB;
- }
+ if (copy_planes && tex->resource.b.b.nr_samples > 1)
+ si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
+ false);
}
static void
si_decompress_sampler_depth_textures(struct si_context *sctx,
- struct si_textures_info *textures)
+ struct si_samplers *textures)
{
unsigned i;
unsigned mask = textures->needs_depth_decompress_mask;
@@ -423,7 +436,7 @@
i = u_bit_scan(&mask);
- view = textures->views.views[i];
+ view = textures->views[i];
assert(view);
sview = (struct si_sampler_view*)view;
@@ -454,9 +467,17 @@
if (!level_mask)
return;
- if (rtex->dcc_offset && need_dcc_decompress) {
+ if (unlikely(sctx->b.log))
+ u_log_printf(sctx->b.log,
+ "\n------------------------------------------------\n"
+ "Decompress Color (levels %u - %u, mask 0x%x)\n\n",
+ first_level, last_level, level_mask);
+
+ if (need_dcc_decompress) {
custom_blend = sctx->custom_blend_dcc_decompress;
+ assert(rtex->dcc_offset);
+
/* disable levels without DCC */
for (int i = first_level; i <= last_level; i++) {
if (!vi_dcc_enabled(rtex, i))
@@ -511,10 +532,8 @@
}
sctx->decompression_enabled = false;
-
- sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_INV_VMEM_L1;
+ si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
+ vi_dcc_enabled(rtex, first_level));
}
static void
@@ -532,7 +551,7 @@
static void
si_decompress_sampler_color_textures(struct si_context *sctx,
- struct si_textures_info *textures)
+ struct si_samplers *textures)
{
unsigned i;
unsigned mask = textures->needs_color_decompress_mask;
@@ -543,7 +562,7 @@
i = u_bit_scan(&mask);
- view = textures->views.views[i];
+ view = textures->views[i];
assert(view);
tex = (struct r600_texture *)view->texture;
@@ -555,7 +574,7 @@
static void
si_decompress_image_color_textures(struct si_context *sctx,
- struct si_images_info *images)
+ struct si_images *images)
{
unsigned i;
unsigned mask = images->needs_color_decompress_mask;
@@ -607,13 +626,13 @@
}
if (render_feedback)
- r600_texture_disable_dcc(&sctx->b, tex);
+ si_texture_disable_dcc(&sctx->b, tex);
}
static void si_check_render_feedback_textures(struct si_context *sctx,
- struct si_textures_info *textures)
+ struct si_samplers *textures)
{
- uint32_t mask = textures->views.enabled_mask;
+ uint32_t mask = textures->enabled_mask;
while (mask) {
const struct pipe_sampler_view *view;
@@ -621,7 +640,7 @@
unsigned i = u_bit_scan(&mask);
- view = textures->views.views[i];
+ view = textures->views[i];
if(view->texture->target == PIPE_BUFFER)
continue;
@@ -636,7 +655,7 @@
}
static void si_check_render_feedback_images(struct si_context *sctx,
- struct si_images_info *images)
+ struct si_images *images)
{
uint32_t mask = images->enabled_mask;
@@ -755,7 +774,7 @@
}
}
-static void si_decompress_textures(struct si_context *sctx, unsigned shader_mask)
+void si_decompress_textures(struct si_context *sctx, unsigned shader_mask)
{
unsigned compressed_colortex_counter, mask;
@@ -800,16 +819,6 @@
si_check_render_feedback(sctx);
}
-void si_decompress_graphics_textures(struct si_context *sctx)
-{
- si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
-}
-
-void si_decompress_compute_textures(struct si_context *sctx)
-{
- si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
-}
-
static void si_clear(struct pipe_context *ctx, unsigned buffers,
const union pipe_color_union *color,
double depth, unsigned stencil)
@@ -821,7 +830,7 @@
zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
if (buffers & PIPE_CLEAR_COLOR) {
- evergreen_do_fast_color_clear(&sctx->b, fb,
+ si_do_fast_color_clear(&sctx->b, fb,
&sctx->framebuffer.atom, &buffers,
&sctx->framebuffer.dirty_cbufs,
color);
@@ -849,8 +858,8 @@
}
}
- if (zstex && zstex->htile_offset &&
- zsbuf->u.tex.level == 0 &&
+ if (zstex &&
+ r600_htile_enabled(zstex, zsbuf->u.tex.level) &&
zsbuf->u.tex.first_layer == 0 &&
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
/* TC-compatible HTILE only supports depth clears to 0 or 1. */
@@ -887,6 +896,21 @@
sctx->db_stencil_clear = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
+
+ /* TODO: Find out what's wrong here. Fast depth clear leads to
+ * corruption in ARK: Survival Evolved, but that may just be
+ * a coincidence and the root cause is elsewhere.
+ *
+ * The corruption can be fixed by putting the DB flush before
+ * or after the depth clear. (surprisingly)
+ *
+ * https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
+ *
+ * This hack decreases back-to-back ClearDepth performance.
+ */
+ if (sctx->screen->clear_db_cache_before_clear) {
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+ }
}
si_blitter_begin(ctx, SI_CLEAR);
@@ -959,7 +983,7 @@
if (rtex->db_compatible) {
planes &= PIPE_MASK_Z | PIPE_MASK_S;
- if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
+ if (!rtex->surface.has_stencil)
planes &= ~PIPE_MASK_S;
/* If we've rendered into the framebuffer and it's a blitting
@@ -1161,7 +1185,7 @@
src_templ.format);
/* Initialize the surface. */
- dst_view = r600_create_surface_custom(ctx, dst, &dst_templ,
+ dst_view = si_create_surface_custom(ctx, dst, &dst_templ,
dst_width0, dst_height0,
dst_width, dst_height);
@@ -1203,9 +1227,7 @@
si_blitter_end(&sctx->b.b);
/* Flush caches for possible texturing. */
- sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_INV_VMEM_L1;
+ si_make_CB_shader_coherent(sctx, 1, false);
}
static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
@@ -1377,7 +1399,7 @@
info->src.box.z,
info->src.box.z + info->src.box.depth - 1);
- if (sctx->screen->b.debug_flags & DBG_FORCE_DMA &&
+ if (sctx->screen->b.debug_flags & DBG(FORCE_DMA) &&
util_try_blit_via_copy_region(ctx, info))
return;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_compute.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_compute.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_compute.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_compute.c 2018-01-18 21:30:28.000000000 +0000
@@ -151,6 +151,7 @@
struct si_screen *sscreen = (struct si_screen *)ctx->screen;
struct si_compute *program = CALLOC_STRUCT(si_compute);
+ pipe_reference_init(&program->reference, 1);
program->screen = (struct si_screen *)ctx->screen;
program->ir_type = cso->ir_type;
program->local_size = cso->req_local_mem;
@@ -174,7 +175,7 @@
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
sctx->is_debug ||
- r600_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
+ si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
si_create_compute_state_async(program, -1);
else
util_queue_add_job(&sscreen->shader_compiler_queue,
@@ -327,7 +328,7 @@
r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
sctx->compute_scratch_buffer = (struct r600_resource*)
- r600_aligned_buffer_create(&sctx->screen->b.b,
+ si_aligned_buffer_create(&sctx->screen->b.b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
scratch_needed, 256);
@@ -786,7 +787,7 @@
sctx->b.last_num_draw_calls = sctx->b.num_draw_calls;
}
- si_decompress_compute_textures(sctx);
+ si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
/* Add buffer sizes for memory checking in need_cs_space. */
r600_context_add_resource_size(ctx, &program->shader.bo->b.b);
@@ -816,7 +817,7 @@
return;
si_upload_compute_shader_descriptors(sctx);
- si_emit_compute_shader_userdata(sctx);
+ si_emit_compute_shader_pointers(sctx);
if (si_is_atom_dirty(sctx, sctx->atoms.s.render_cond)) {
sctx->atoms.s.render_cond->emit(&sctx->b,
@@ -845,11 +846,12 @@
if (program->ir_type == PIPE_SHADER_IR_TGSI)
si_setup_tgsi_grid(sctx, info);
- si_ce_pre_draw_synchronization(sctx);
-
si_emit_dispatch_packets(sctx, info);
- si_ce_post_draw_synchronization(sctx);
+ if (unlikely(sctx->current_saved_cs)) {
+ si_trace_emit(sctx);
+ si_log_compute_state(sctx, sctx->b.log);
+ }
sctx->compute_is_busy = true;
sctx->b.num_compute_calls++;
@@ -860,20 +862,24 @@
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
}
+void si_destroy_compute(struct si_compute *program)
+{
+ if (program->ir_type == PIPE_SHADER_IR_TGSI) {
+ util_queue_drop_job(&program->screen->shader_compiler_queue,
+ &program->ready);
+ util_queue_fence_destroy(&program->ready);
+ }
+
+ si_shader_destroy(&program->shader);
+ FREE(program);
+}
static void si_delete_compute_state(struct pipe_context *ctx, void* state){
struct si_compute *program = (struct si_compute *)state;
struct si_context *sctx = (struct si_context*)ctx;
- if (!state) {
+ if (!state)
return;
- }
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI) {
- util_queue_drop_job(&sctx->screen->shader_compiler_queue,
- &program->ready);
- util_queue_fence_destroy(&program->ready);
- }
if (program == sctx->cs_shader_state.program)
sctx->cs_shader_state.program = NULL;
@@ -881,8 +887,7 @@
if (program == sctx->cs_shader_state.emitted_program)
sctx->cs_shader_state.emitted_program = NULL;
- si_shader_destroy(&program->shader);
- FREE(program);
+ si_compute_reference(&program, NULL);
}
static void si_set_compute_resources(struct pipe_context * ctx_,
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_compute.h mesa-17.3.3/src/gallium/drivers/radeonsi/si_compute.h
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_compute.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_compute.h 2018-01-18 21:30:28.000000000 +0000
@@ -24,11 +24,14 @@
#ifndef SI_COMPUTE_H
#define SI_COMPUTE_H
+#include "util/u_inlines.h"
+
#include "si_shader.h"
#define MAX_GLOBAL_BUFFERS 22
struct si_compute {
+ struct pipe_reference reference;
struct si_screen *screen;
struct tgsi_token *tokens;
struct util_queue_fence ready;
@@ -53,4 +56,15 @@
unsigned uses_bindless_images:1;
};
+void si_destroy_compute(struct si_compute *program);
+
+static inline void
+si_compute_reference(struct si_compute **dst, struct si_compute *src)
+{
+ if (pipe_reference(&(*dst)->reference, &src->reference))
+ si_destroy_compute(*dst);
+
+ *dst = src;
+}
+
#endif /* SI_COMPUTE_H */
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_cp_dma.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_cp_dma.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_cp_dma.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_cp_dma.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,6 +28,12 @@
#include "sid.h"
#include "radeon/r600_cs.h"
+/* Recommended maximum sizes for optimal performance.
+ * Fall back to compute or SDMA if the size is greater.
+ */
+#define CP_DMA_COPY_PERF_THRESHOLD (64 * 1024) /* copied from Vulkan */
+#define CP_DMA_CLEAR_PERF_THRESHOLD (32 * 1024) /* guess (clear is much slower) */
+
/* Set this if you want the ME to wait until CP DMA is done.
* It should be set on the last CP DMA packet. */
#define CP_DMA_SYNC (1 << 0)
@@ -230,7 +236,7 @@
(offset % 4 == 0) &&
/* CP DMA is very slow. Always use SDMA for big clears. This
* alone improves DeusEx:MD performance by 70%. */
- (size > 128 * 1024 ||
+ (size > CP_DMA_CLEAR_PERF_THRESHOLD ||
/* Buffers not used by the GFX IB yet will be cleared by SDMA.
* This happens to move most buffer clears to SDMA, including
* DCC and CMASK clears, because pipe->clear clears them before
@@ -309,7 +315,7 @@
sctx->scratch_buffer->b.b.width0 < scratch_size) {
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = (struct r600_resource*)
- r600_aligned_buffer_create(&sctx->screen->b.b,
+ si_aligned_buffer_create(&sctx->screen->b.b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
scratch_size, 256);
@@ -442,43 +448,90 @@
static void cik_prefetch_shader_async(struct si_context *sctx,
struct si_pm4_state *state)
{
- if (state) {
- struct pipe_resource *bo = &state->bo[0]->b.b;
- assert(state->nbo == 1);
+ struct pipe_resource *bo = &state->bo[0]->b.b;
+ assert(state->nbo == 1);
- cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
- }
+ cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
+}
+
+static void cik_prefetch_VBO_descriptors(struct si_context *sctx)
+{
+ if (!sctx->vertex_elements)
+ return;
+
+ cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
+ sctx->vertex_buffers.gpu_address -
+ sctx->vertex_buffers.buffer->gpu_address,
+ sctx->vertex_elements->desc_list_byte_size);
}
-static void cik_emit_prefetch_L2(struct si_context *sctx, struct r600_atom *atom)
+void cik_emit_prefetch_L2(struct si_context *sctx)
{
/* Prefetch shaders and VBO descriptors to TC L2. */
- if (si_pm4_state_changed(sctx, ls))
- cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
- if (si_pm4_state_changed(sctx, hs))
- cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
- if (si_pm4_state_changed(sctx, es))
- cik_prefetch_shader_async(sctx, sctx->queued.named.es);
- if (si_pm4_state_changed(sctx, gs))
- cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (si_pm4_state_changed(sctx, vs))
- cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
-
- /* Vertex buffer descriptors are uploaded uncached, so prefetch
- * them right after the VS binary. */
- if (sctx->vertex_buffer_pointer_dirty) {
- cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
- sctx->vertex_buffers.buffer_offset,
- sctx->vertex_elements->desc_list_byte_size);
+ if (sctx->b.chip_class >= GFX9) {
+ /* Choose the right spot for the VBO prefetch. */
+ if (sctx->tes_shader.cso) {
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else if (sctx->gs_shader.cso) {
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else {
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ }
+ } else {
+ /* SI-CI-VI */
+ /* Choose the right spot for the VBO prefetch. */
+ if (sctx->tes_shader.cso) {
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_LS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_ES)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.es);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else if (sctx->gs_shader.cso) {
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_ES)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.es);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else {
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ }
}
- if (si_pm4_state_changed(sctx, ps))
+
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_PS)
cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
+
+ sctx->prefetch_L2_mask = 0;
}
void si_init_cp_dma_functions(struct si_context *sctx)
{
sctx->b.clear_buffer = si_clear_buffer;
-
- si_init_atom(sctx, &sctx->prefetch_L2, &sctx->atoms.s.prefetch_L2,
- cik_emit_prefetch_L2);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_debug.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_debug.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_debug.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_debug.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,9 +30,13 @@
#include "gfx9d.h"
#include "sid_tables.h"
#include "ddebug/dd_util.h"
+#include "util/u_log.h"
#include "util/u_memory.h"
#include "ac_debug.h"
+static void si_dump_bo_list(struct si_context *sctx,
+ const struct radeon_saved_cs *saved, FILE *f);
+
DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
static void si_dump_shader(struct si_screen *sscreen,
@@ -45,24 +49,78 @@
si_shader_dump(sscreen, shader, NULL, processor, f, false);
}
-static void si_dump_gfx_shader(struct si_screen *sscreen,
- const struct si_shader_ctx_state *state, FILE *f)
+struct si_log_chunk_shader {
+ /* The shader destroy code assumes a current context for unlinking of
+ * PM4 packets etc.
+ *
+ * While we should be able to destroy shaders without a context, doing
+ * so would happen only very rarely and be therefore likely to fail
+ * just when you're trying to debug something. Let's just remember the
+ * current context in the chunk.
+ */
+ struct si_context *ctx;
+ struct si_shader *shader;
+ enum pipe_shader_type processor;
+
+ /* For keep-alive reference counts */
+ struct si_shader_selector *sel;
+ struct si_compute *program;
+};
+
+static void
+si_log_chunk_shader_destroy(void *data)
+{
+ struct si_log_chunk_shader *chunk = data;
+ si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
+ si_compute_reference(&chunk->program, NULL);
+ FREE(chunk);
+}
+
+static void
+si_log_chunk_shader_print(void *data, FILE *f)
+{
+ struct si_log_chunk_shader *chunk = data;
+ struct si_screen *sscreen = chunk->ctx->screen;
+ si_dump_shader(sscreen, chunk->processor,
+ chunk->shader, f);
+}
+
+static struct u_log_chunk_type si_log_chunk_type_shader = {
+ .destroy = si_log_chunk_shader_destroy,
+ .print = si_log_chunk_shader_print,
+};
+
+static void si_dump_gfx_shader(struct si_context *ctx,
+ const struct si_shader_ctx_state *state,
+ struct u_log_context *log)
{
- const struct si_shader *current = state->current;
+ struct si_shader *current = state->current;
if (!state->cso || !current)
return;
- si_dump_shader(sscreen, state->cso->info.processor, current, f);
+ struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
+ chunk->ctx = ctx;
+ chunk->processor = state->cso->info.processor;
+ chunk->shader = current;
+ si_shader_selector_reference(ctx, &chunk->sel, current->selector);
+ u_log_chunk(log, &si_log_chunk_type_shader, chunk);
}
-static void si_dump_compute_shader(struct si_screen *sscreen,
- const struct si_cs_shader_state *state, FILE *f)
+static void si_dump_compute_shader(struct si_context *ctx,
+ struct u_log_context *log)
{
- if (!state->program || state->program != state->emitted_program)
+ const struct si_cs_shader_state *state = &ctx->cs_shader_state;
+
+ if (!state->program)
return;
- si_dump_shader(sscreen, PIPE_SHADER_COMPUTE, &state->program->shader, f);
+ struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
+ chunk->ctx = ctx;
+ chunk->processor = PIPE_SHADER_COMPUTE;
+ chunk->shader = &state->program->shader;
+ si_compute_reference(&chunk->program, state->program);
+ u_log_chunk(log, &si_log_chunk_type_shader, chunk);
}
/**
@@ -173,7 +231,7 @@
uint32_t value;
if (ws->read_registers(ws, offset, 1, &value))
- ac_dump_reg(f, offset, value, ~0);
+ ac_dump_reg(f, sctx->b.chip_class, offset, value, ~0);
}
static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
@@ -217,40 +275,154 @@
fprintf(f, "\n");
}
-static void si_dump_last_ib(struct si_context *sctx, FILE *f)
+struct si_log_chunk_cs {
+ struct si_context *ctx;
+ struct si_saved_cs *cs;
+ bool dump_bo_list;
+ unsigned gfx_begin, gfx_end;
+};
+
+static void si_log_chunk_type_cs_destroy(void *data)
{
+ struct si_log_chunk_cs *chunk = data;
+ si_saved_cs_reference(&chunk->cs, NULL);
+ free(chunk);
+}
+
+static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
+ unsigned begin, unsigned end,
+ int *last_trace_id, unsigned trace_id_count,
+ const char *name, enum chip_class chip_class)
+{
+ unsigned orig_end = end;
+
+ assert(begin <= end);
+
+ fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
+ name, begin);
+
+ for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
+ struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
+
+ if (begin < chunk->cdw) {
+ ac_parse_ib_chunk(f, chunk->buf + begin,
+ MIN2(end, chunk->cdw) - begin,
+ last_trace_id, trace_id_count,
+ chip_class, NULL, NULL);
+ }
+
+ if (end <= chunk->cdw)
+ return;
+
+ if (begin < chunk->cdw)
+ fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
+ name);
+
+ begin -= MIN2(begin, chunk->cdw);
+ end -= chunk->cdw;
+ }
+
+ assert(end <= cs->current.cdw);
+
+ ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
+ trace_id_count, chip_class, NULL, NULL);
+
+ fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
+ name, orig_end);
+}
+
+static void si_log_chunk_type_cs_print(void *data, FILE *f)
+{
+ struct si_log_chunk_cs *chunk = data;
+ struct si_context *ctx = chunk->ctx;
+ struct si_saved_cs *scs = chunk->cs;
int last_trace_id = -1;
- if (!sctx->last_gfx.ib)
+ /* We are expecting that the ddebug pipe has already
+ * waited for the context, so this buffer should be idle.
+ * If the GPU is hung, there is no point in waiting for it.
+ */
+ uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
+ NULL,
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_READ);
+ if (map)
+ last_trace_id = map[0];
+
+ if (chunk->gfx_end != chunk->gfx_begin) {
+ if (chunk->gfx_begin == 0) {
+ if (ctx->init_config)
+ ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
+ NULL, 0, "IB2: Init config", ctx->b.chip_class,
+ NULL, NULL);
+
+ if (ctx->init_config_gs_rings)
+ ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
+ ctx->init_config_gs_rings->ndw,
+ NULL, 0, "IB2: Init GS rings", ctx->b.chip_class,
+ NULL, NULL);
+ }
+
+ if (scs->flushed) {
+ ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
+ chunk->gfx_end - chunk->gfx_begin,
+ &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class,
+ NULL, NULL);
+ } else {
+ si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
+ chunk->gfx_end, &last_trace_id, map ? 1 : 0,
+ "IB", ctx->b.chip_class);
+ }
+ }
+
+ if (chunk->dump_bo_list) {
+ fprintf(f, "Flushing.\n\n");
+ si_dump_bo_list(ctx, &scs->gfx, f);
+ }
+}
+
+static const struct u_log_chunk_type si_log_chunk_type_cs = {
+ .destroy = si_log_chunk_type_cs_destroy,
+ .print = si_log_chunk_type_cs_print,
+};
+
+static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
+ bool dump_bo_list)
+{
+ assert(ctx->current_saved_cs);
+
+ struct si_saved_cs *scs = ctx->current_saved_cs;
+ unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
+
+ if (!dump_bo_list &&
+ gfx_cur == scs->gfx_last_dw)
+ return;
+
+ struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
+
+ chunk->ctx = ctx;
+ si_saved_cs_reference(&chunk->cs, scs);
+ chunk->dump_bo_list = dump_bo_list;
+
+ chunk->gfx_begin = scs->gfx_last_dw;
+ chunk->gfx_end = gfx_cur;
+ scs->gfx_last_dw = gfx_cur;
+
+ u_log_chunk(log, &si_log_chunk_type_cs, chunk);
+}
+
+void si_auto_log_cs(void *data, struct u_log_context *log)
+{
+ struct si_context *ctx = (struct si_context *)data;
+ si_log_cs(ctx, log, false);
+}
+
+void si_log_hw_flush(struct si_context *sctx)
+{
+ if (!sctx->b.log)
return;
- if (sctx->last_trace_buf) {
- /* We are expecting that the ddebug pipe has already
- * waited for the context, so this buffer should be idle.
- * If the GPU is hung, there is no point in waiting for it.
- */
- uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
- NULL,
- PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_READ);
- if (map)
- last_trace_id = *map;
- }
-
- if (sctx->init_config)
- ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
- -1, "IB2: Init config", sctx->b.chip_class,
- NULL, NULL);
-
- if (sctx->init_config_gs_rings)
- ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
- sctx->init_config_gs_rings->ndw,
- -1, "IB2: Init GS rings", sctx->b.chip_class,
- NULL, NULL);
-
- ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
- last_trace_id, "IB", sctx->b.chip_class,
- NULL, NULL);
+ si_log_cs(sctx, sctx->b.log, true);
}
static const char *priority_to_string(enum radeon_bo_priority priority)
@@ -356,7 +528,7 @@
" Other buffers can still be allocated there.\n\n");
}
-static void si_dump_framebuffer(struct si_context *sctx, FILE *f)
+static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
{
struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
struct r600_texture *rtex;
@@ -367,86 +539,171 @@
continue;
rtex = (struct r600_texture*)state->cbufs[i]->texture;
- fprintf(f, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
- r600_print_texture_info(sctx->b.screen, rtex, f);
- fprintf(f, "\n");
+ u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
+ si_print_texture_info(sctx->b.screen, rtex, log);
+ u_log_printf(log, "\n");
}
if (state->zsbuf) {
rtex = (struct r600_texture*)state->zsbuf->texture;
- fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
- r600_print_texture_info(sctx->b.screen, rtex, f);
- fprintf(f, "\n");
+ u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
+ si_print_texture_info(sctx->b.screen, rtex, log);
+ u_log_printf(log, "\n");
}
}
typedef unsigned (*slot_remap_func)(unsigned);
-static void si_dump_descriptor_list(struct si_descriptors *desc,
- const char *shader_name,
- const char *elem_name,
- unsigned element_dw_size,
- unsigned num_elements,
- slot_remap_func slot_remap,
- FILE *f)
+struct si_log_chunk_desc_list {
+ /** Pointer to memory map of buffer where the list is uploader */
+ uint32_t *gpu_list;
+ /** Reference of buffer where the list is uploaded, so that gpu_list
+ * is kept live. */
+ struct r600_resource *buf;
+
+ const char *shader_name;
+ const char *elem_name;
+ slot_remap_func slot_remap;
+ enum chip_class chip_class;
+ unsigned element_dw_size;
+ unsigned num_elements;
+
+ uint32_t list[0];
+};
+
+static void
+si_log_chunk_desc_list_destroy(void *data)
{
- unsigned i, j;
+ struct si_log_chunk_desc_list *chunk = data;
+ r600_resource_reference(&chunk->buf, NULL);
+ FREE(chunk);
+}
- for (i = 0; i < num_elements; i++) {
- unsigned dw_offset = slot_remap(i) * element_dw_size;
- uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : desc->list;
- const char *list_note = desc->gpu_list ? "GPU list" : "CPU list";
- uint32_t *cpu_list = desc->list + dw_offset;
- uint32_t *gpu_list = gpu_ptr + dw_offset;
+static void
+si_log_chunk_desc_list_print(void *data, FILE *f)
+{
+ struct si_log_chunk_desc_list *chunk = data;
+
+ for (unsigned i = 0; i < chunk->num_elements; i++) {
+ unsigned cpu_dw_offset = i * chunk->element_dw_size;
+ unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
+ const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
+ uint32_t *cpu_list = chunk->list + cpu_dw_offset;
+ uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
- shader_name, elem_name, i, list_note);
+ chunk->shader_name, chunk->elem_name, i, list_note);
- switch (element_dw_size) {
+ switch (chunk->element_dw_size) {
case 4:
- for (j = 0; j < 4; j++)
- ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
+ for (unsigned j = 0; j < 4; j++)
+ ac_dump_reg(f, chunk->chip_class,
+ R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
gpu_list[j], 0xffffffff);
break;
case 8:
- for (j = 0; j < 8; j++)
- ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chunk->chip_class,
+ R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
gpu_list[j], 0xffffffff);
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
- for (j = 0; j < 4; j++)
- ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
+ for (unsigned j = 0; j < 4; j++)
+ ac_dump_reg(f, chunk->chip_class,
+ R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
gpu_list[4+j], 0xffffffff);
break;
case 16:
- for (j = 0; j < 8; j++)
- ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chunk->chip_class,
+ R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
gpu_list[j], 0xffffffff);
fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
- for (j = 0; j < 4; j++)
- ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
+ for (unsigned j = 0; j < 4; j++)
+ ac_dump_reg(f, chunk->chip_class,
+ R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
gpu_list[4+j], 0xffffffff);
fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
- for (j = 0; j < 8; j++)
- ac_dump_reg(f, R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chunk->chip_class,
+ R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
gpu_list[8+j], 0xffffffff);
fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
- for (j = 0; j < 4; j++)
- ac_dump_reg(f, R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
+ for (unsigned j = 0; j < 4; j++)
+ ac_dump_reg(f, chunk->chip_class,
+ R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
gpu_list[12+j], 0xffffffff);
break;
}
- if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) != 0) {
+ if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
COLOR_RESET "\n");
}
fprintf(f, "\n");
}
+
+}
+
+static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
+ .destroy = si_log_chunk_desc_list_destroy,
+ .print = si_log_chunk_desc_list_print,
+};
+
+static void si_dump_descriptor_list(struct si_screen *screen,
+ struct si_descriptors *desc,
+ const char *shader_name,
+ const char *elem_name,
+ unsigned element_dw_size,
+ unsigned num_elements,
+ slot_remap_func slot_remap,
+ struct u_log_context *log)
+{
+ if (!desc->list)
+ return;
+
+ /* In some cases, the caller doesn't know how many elements are really
+ * uploaded. Reduce num_elements to fit in the range of active slots. */
+ unsigned active_range_dw_begin =
+ desc->first_active_slot * desc->element_dw_size;
+ unsigned active_range_dw_end =
+ active_range_dw_begin + desc->num_active_slots * desc->element_dw_size;
+
+ while (num_elements > 0) {
+ int i = slot_remap(num_elements - 1);
+ unsigned dw_begin = i * element_dw_size;
+ unsigned dw_end = dw_begin + element_dw_size;
+
+ if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end)
+ break;
+
+ num_elements--;
+ }
+
+ struct si_log_chunk_desc_list *chunk =
+ CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
+ 4 * element_dw_size * num_elements);
+ chunk->shader_name = shader_name;
+ chunk->elem_name = elem_name;
+ chunk->element_dw_size = element_dw_size;
+ chunk->num_elements = num_elements;
+ chunk->slot_remap = slot_remap;
+ chunk->chip_class = screen->b.chip_class;
+
+ r600_resource_reference(&chunk->buf, desc->buffer);
+ chunk->gpu_list = desc->gpu_list;
+
+ for (unsigned i = 0; i < num_elements; ++i) {
+ memcpy(&chunk->list[i * element_dw_size],
+ &desc->list[slot_remap(i) * element_dw_size],
+ 4 * element_dw_size);
+ }
+
+ u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
}
static unsigned si_identity(unsigned slot)
@@ -456,7 +713,8 @@
static void si_dump_descriptors(struct si_context *sctx,
enum pipe_shader_type processor,
- const struct tgsi_shader_info *info, FILE *f)
+ const struct tgsi_shader_info *info,
+ struct u_log_context *log)
{
struct si_descriptors *descs =
&sctx->descriptors[SI_DESCS_FIRST_SHADER +
@@ -478,53 +736,57 @@
u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
(32 - SI_NUM_SHADER_BUFFERS);
- enabled_samplers = sctx->samplers[processor].views.enabled_mask;
+ enabled_samplers = sctx->samplers[processor].enabled_mask;
enabled_images = sctx->images[processor].enabled_mask;
}
if (processor == PIPE_SHADER_VERTEX) {
assert(info); /* only CS may not have an info struct */
- si_dump_descriptor_list(&sctx->vertex_buffers, name,
+ si_dump_descriptor_list(sctx->screen, &sctx->vertex_buffers, name,
" - Vertex buffer", 4, info->num_inputs,
- si_identity, f);
+ si_identity, log);
}
- si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
+ si_dump_descriptor_list(sctx->screen,
+ &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
name, " - Constant buffer", 4,
util_last_bit(enabled_constbuf),
- si_get_constbuf_slot, f);
- si_dump_descriptor_list(&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
+ si_get_constbuf_slot, log);
+ si_dump_descriptor_list(sctx->screen,
+ &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
name, " - Shader buffer", 4,
util_last_bit(enabled_shaderbuf),
- si_get_shaderbuf_slot, f);
- si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
+ si_get_shaderbuf_slot, log);
+ si_dump_descriptor_list(sctx->screen,
+ &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
name, " - Sampler", 16,
util_last_bit(enabled_samplers),
- si_get_sampler_slot, f);
- si_dump_descriptor_list(&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
+ si_get_sampler_slot, log);
+ si_dump_descriptor_list(sctx->screen,
+ &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
name, " - Image", 8,
util_last_bit(enabled_images),
- si_get_image_slot, f);
+ si_get_image_slot, log);
}
static void si_dump_gfx_descriptors(struct si_context *sctx,
const struct si_shader_ctx_state *state,
- FILE *f)
+ struct u_log_context *log)
{
if (!state->cso || !state->current)
return;
- si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f);
+ si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
}
-static void si_dump_compute_descriptors(struct si_context *sctx, FILE *f)
+static void si_dump_compute_descriptors(struct si_context *sctx,
+ struct u_log_context *log)
{
- if (!sctx->cs_shader_state.program ||
- sctx->cs_shader_state.program != sctx->cs_shader_state.emitted_program)
+ if (!sctx->cs_shader_state.program)
return;
- si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, f);
+ si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
}
struct si_shader_inst {
@@ -567,102 +829,12 @@
}
}
-#define MAX_WAVES_PER_CHIP (64 * 40)
-
-struct si_wave_info {
- unsigned se; /* shader engine */
- unsigned sh; /* shader array */
- unsigned cu; /* compute unit */
- unsigned simd;
- unsigned wave;
- uint32_t status;
- uint64_t pc; /* program counter */
- uint32_t inst_dw0;
- uint32_t inst_dw1;
- uint64_t exec;
- bool matched; /* whether the wave is used by a currently-bound shader */
-};
-
-static int compare_wave(const void *p1, const void *p2)
-{
- struct si_wave_info *w1 = (struct si_wave_info *)p1;
- struct si_wave_info *w2 = (struct si_wave_info *)p2;
-
- /* Sort waves according to PC and then SE, SH, CU, etc. */
- if (w1->pc < w2->pc)
- return -1;
- if (w1->pc > w2->pc)
- return 1;
- if (w1->se < w2->se)
- return -1;
- if (w1->se > w2->se)
- return 1;
- if (w1->sh < w2->sh)
- return -1;
- if (w1->sh > w2->sh)
- return 1;
- if (w1->cu < w2->cu)
- return -1;
- if (w1->cu > w2->cu)
- return 1;
- if (w1->simd < w2->simd)
- return -1;
- if (w1->simd > w2->simd)
- return 1;
- if (w1->wave < w2->wave)
- return -1;
- if (w1->wave > w2->wave)
- return 1;
-
- return 0;
-}
-
-/* Return wave information. "waves" should be a large enough array. */
-static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
-{
- char line[2000];
- unsigned num_waves = 0;
-
- FILE *p = popen("umr -wa", "r");
- if (!p)
- return 0;
-
- if (!fgets(line, sizeof(line), p) ||
- strncmp(line, "SE", 2) != 0) {
- pclose(p);
- return 0;
- }
-
- while (fgets(line, sizeof(line), p)) {
- struct si_wave_info *w;
- uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
-
- assert(num_waves < MAX_WAVES_PER_CHIP);
- w = &waves[num_waves];
-
- if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
- &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
- &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
- &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
- w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
- w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
- w->matched = false;
- num_waves++;
- }
- }
-
- qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
-
- pclose(p);
- return num_waves;
-}
-
/* If the shader is being executed, print its asm instructions, and annotate
* those that are being executed right now with information about waves that
* execute them. This is most useful during a GPU hang.
*/
static void si_print_annotated_shader(struct si_shader *shader,
- struct si_wave_info *waves,
+ struct ac_wave_info *waves,
unsigned num_waves,
FILE *f)
{
@@ -748,8 +920,8 @@
static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
{
- struct si_wave_info waves[MAX_WAVES_PER_CHIP];
- unsigned num_waves = si_get_wave_info(waves);
+ struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
+ unsigned num_waves = ac_get_wave_info(waves);
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
"\n\n", num_waves);
@@ -803,49 +975,51 @@
{
struct si_context *sctx = (struct si_context*)ctx;
- if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS)
- si_dump_debug_registers(sctx, f);
+ if (sctx->b.log)
+ u_log_flush(sctx->b.log);
- if (flags & PIPE_DUMP_CURRENT_STATES)
- si_dump_framebuffer(sctx, f);
+ if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
+ si_dump_debug_registers(sctx, f);
- if (flags & PIPE_DUMP_CURRENT_SHADERS) {
- si_dump_gfx_shader(sctx->screen, &sctx->vs_shader, f);
- si_dump_gfx_shader(sctx->screen, &sctx->tcs_shader, f);
- si_dump_gfx_shader(sctx->screen, &sctx->tes_shader, f);
- si_dump_gfx_shader(sctx->screen, &sctx->gs_shader, f);
- si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f);
- si_dump_compute_shader(sctx->screen, &sctx->cs_shader_state, f);
-
- if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
- si_dump_annotated_shaders(sctx, f);
- si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
- si_dump_command("Wave information", "umr -O bits -wa", f);
- }
-
- si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS],
- "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
- si_identity, f);
- si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f);
- si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f);
- si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f);
- si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f);
- si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f);
- si_dump_compute_descriptors(sctx, f);
- }
-
- if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
- si_dump_bo_list(sctx, &sctx->last_gfx, f);
- si_dump_last_ib(sctx, f);
-
- fprintf(f, "Done.\n");
-
- /* dump only once */
- radeon_clear_saved_cs(&sctx->last_gfx);
- r600_resource_reference(&sctx->last_trace_buf, NULL);
+ si_dump_annotated_shaders(sctx, f);
+ si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
+ si_dump_command("Wave information", "umr -O bits -wa", f);
}
}
+void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
+{
+ if (!log)
+ return;
+
+ si_dump_framebuffer(sctx, log);
+
+ si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
+ si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
+ si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
+ si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
+ si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
+
+ si_dump_descriptor_list(sctx->screen,
+ &sctx->descriptors[SI_DESCS_RW_BUFFERS],
+ "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
+ si_identity, log);
+ si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
+ si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
+ si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
+ si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
+ si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
+}
+
+void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)
+{
+ if (!log)
+ return;
+
+ si_dump_compute_shader(sctx, log);
+ si_dump_compute_descriptors(sctx, log);
+}
+
static void si_dump_dma(struct si_context *sctx,
struct radeon_saved_cs *saved, FILE *f)
{
@@ -866,106 +1040,6 @@
fprintf(f, "SDMA Dump Done.\n");
}
-static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
-{
- char line[2000];
- unsigned sec, usec;
- int progress = 0;
- uint64_t timestamp = 0;
- bool fault = false;
-
- FILE *p = popen("dmesg", "r");
- if (!p)
- return false;
-
- while (fgets(line, sizeof(line), p)) {
- char *msg, len;
-
- if (!line[0] || line[0] == '\n')
- continue;
-
- /* Get the timestamp. */
- if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
- static bool hit = false;
- if (!hit) {
- fprintf(stderr, "%s: failed to parse line '%s'\n",
- __func__, line);
- hit = true;
- }
- continue;
- }
- timestamp = sec * 1000000ull + usec;
-
- /* If just updating the timestamp. */
- if (!out_addr)
- continue;
-
- /* Process messages only if the timestamp is newer. */
- if (timestamp <= sctx->dmesg_timestamp)
- continue;
-
- /* Only process the first VM fault. */
- if (fault)
- continue;
-
- /* Remove trailing \n */
- len = strlen(line);
- if (len && line[len-1] == '\n')
- line[len-1] = 0;
-
- /* Get the message part. */
- msg = strchr(line, ']');
- if (!msg) {
- assert(0);
- continue;
- }
- msg++;
-
- const char *header_line, *addr_line_prefix, *addr_line_format;
-
- if (sctx->b.chip_class >= GFX9) {
- /* Match this:
- * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
- * ..: at page 0x0000000219f8f000 from 27
- * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
- */
- header_line = "VMC page fault";
- addr_line_prefix = " at page";
- addr_line_format = "%"PRIx64;
- } else {
- header_line = "GPU fault detected:";
- addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
- addr_line_format = "%"PRIX64;
- }
-
- switch (progress) {
- case 0:
- if (strstr(msg, header_line))
- progress = 1;
- break;
- case 1:
- msg = strstr(msg, addr_line_prefix);
- if (msg) {
- msg = strstr(msg, "0x");
- if (msg) {
- msg += 2;
- if (sscanf(msg, addr_line_format, out_addr) == 1)
- fault = true;
- }
- }
- progress = 0;
- break;
- default:
- progress = 0;
- }
- }
- pclose(p);
-
- if (timestamp > sctx->dmesg_timestamp)
- sctx->dmesg_timestamp = timestamp;
- return fault;
-}
-
void si_check_vm_faults(struct r600_common_context *ctx,
struct radeon_saved_cs *saved, enum ring_type ring)
{
@@ -975,7 +1049,8 @@
uint64_t addr;
char cmd_line[4096];
- if (!si_vm_fault_occured(sctx, &addr))
+ if (!ac_vm_fault_occured(sctx->b.chip_class,
+ &sctx->dmesg_timestamp, &addr))
return;
f = dd_get_debug_file(false);
@@ -995,13 +1070,17 @@
sctx->apitrace_call_number);
switch (ring) {
- case RING_GFX:
- si_dump_debug_state(&sctx->b.b, f,
- PIPE_DUMP_CURRENT_STATES |
- PIPE_DUMP_CURRENT_SHADERS |
- PIPE_DUMP_LAST_COMMAND_BUFFER);
- break;
+ case RING_GFX: {
+ struct u_log_context log;
+ u_log_context_init(&log);
+
+ si_log_draw_state(sctx, &log);
+ si_log_compute_state(sctx, &log);
+ u_log_new_page_print(&log, f);
+ u_log_context_destroy(&log);
+ break;
+ }
case RING_DMA:
si_dump_dma(sctx, saved, f);
break;
@@ -1024,6 +1103,7 @@
/* Set the initial dmesg timestamp for this context, so that
* only new messages will be checked for VM faults.
*/
- if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
- si_vm_fault_occured(sctx, NULL);
+ if (sctx->screen->b.debug_flags & DBG(CHECK_VM))
+ ac_vm_fault_occured(sctx->b.chip_class,
+ &sctx->dmesg_timestamp, NULL);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_descriptors.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_descriptors.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_descriptors.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_descriptors.c 2018-01-18 21:30:28.000000000 +0000
@@ -61,6 +61,7 @@
#include "gfx9d.h"
#include "util/hash_table.h"
+#include "util/u_idalloc.h"
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
@@ -96,6 +97,11 @@
* descriptor */
};
+static uint64_t si_desc_extract_buffer_address(uint32_t *desc)
+{
+ return desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
+}
+
static void si_init_descriptor_list(uint32_t *desc_list,
unsigned element_dw_size,
unsigned num_elements,
@@ -111,31 +117,16 @@
}
}
-static void si_init_descriptors(struct si_context *sctx,
- struct si_descriptors *desc,
+static void si_init_descriptors(struct si_descriptors *desc,
unsigned shader_userdata_index,
unsigned element_dw_size,
- unsigned num_elements,
- unsigned first_ce_slot,
- unsigned num_ce_slots,
- unsigned *ce_offset)
+ unsigned num_elements)
{
- assert(num_elements <= sizeof(desc->dirty_mask)*8);
-
desc->list = CALLOC(num_elements, element_dw_size * 4);
desc->element_dw_size = element_dw_size;
desc->num_elements = num_elements;
- desc->first_ce_slot = sctx->ce_ib ? first_ce_slot : 0;
- desc->num_ce_slots = sctx->ce_ib ? num_ce_slots : 0;
- desc->dirty_mask = u_bit_consecutive64(0, num_elements);
desc->shader_userdata_offset = shader_userdata_index * 4;
-
- if (desc->num_ce_slots) {
- desc->uses_ce = true;
- desc->ce_offset = *ce_offset;
-
- *ce_offset += element_dw_size * desc->num_ce_slots * 4;
- }
+ desc->slot_index_to_bind_directly = -1;
}
static void si_release_descriptors(struct si_descriptors *desc)
@@ -144,76 +135,8 @@
FREE(desc->list);
}
-static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size,
- unsigned *out_offset, struct r600_resource **out_buf)
-{
- uint64_t va;
-
- u_suballocator_alloc(sctx->ce_suballocator, size,
- si_optimal_tcc_alignment(sctx, size),
- out_offset,
- (struct pipe_resource**)out_buf);
- if (!out_buf)
- return false;
-
- va = (*out_buf)->gpu_address + *out_offset;
-
- radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0));
- radeon_emit(sctx->ce_ib, ce_offset);
- radeon_emit(sctx->ce_ib, size / 4);
- radeon_emit(sctx->ce_ib, va);
- radeon_emit(sctx->ce_ib, va >> 32);
-
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *out_buf,
- RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
-
- sctx->ce_need_synchronization = true;
- return true;
-}
-
-void si_ce_save_all_descriptors_at_ib_end(struct si_context* sctx)
-{
- bool success = si_ce_upload(sctx, 0, sctx->total_ce_ram_allocated,
- &sctx->ce_ram_saved_offset,
- &sctx->ce_ram_saved_buffer);
- (void)success;
- assert(success);
-}
-
-void si_ce_restore_all_descriptors_at_ib_start(struct si_context *sctx)
-{
- if (!sctx->ce_ram_saved_buffer)
- return;
-
- struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
- if (!ib)
- ib = sctx->ce_ib;
-
- uint64_t va = sctx->ce_ram_saved_buffer->gpu_address +
- sctx->ce_ram_saved_offset;
-
- radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
- radeon_emit(ib, va);
- radeon_emit(ib, va >> 32);
- radeon_emit(ib, sctx->total_ce_ram_allocated / 4);
- radeon_emit(ib, 0);
-
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- sctx->ce_ram_saved_buffer,
- RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
-}
-
-void si_ce_enable_loads(struct radeon_winsys_cs *ib)
-{
- radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(ib, CONTEXT_CONTROL_LOAD_ENABLE(1) |
- CONTEXT_CONTROL_LOAD_CE_RAM(1));
- radeon_emit(ib, CONTEXT_CONTROL_SHADOW_ENABLE(1));
-}
-
static bool si_upload_descriptors(struct si_context *sctx,
- struct si_descriptors *desc,
- struct r600_atom * atom)
+ struct si_descriptors *desc)
{
unsigned slot_size = desc->element_dw_size * 4;
unsigned first_slot_offset = desc->first_active_slot * slot_size;
@@ -226,59 +149,44 @@
if (!upload_size)
return true;
- if (desc->uses_ce) {
- const uint32_t *list = desc->list +
- desc->first_ce_slot * desc->element_dw_size;
- uint64_t mask = (desc->dirty_mask >> desc->first_ce_slot) &
- u_bit_consecutive64(0, desc->num_ce_slots);
-
-
- while (mask) {
- int begin, count;
- u_bit_scan_consecutive_range64(&mask, &begin, &count);
-
- begin *= desc->element_dw_size;
- count *= desc->element_dw_size;
-
- radeon_emit(sctx->ce_ib,
- PKT3(PKT3_WRITE_CONST_RAM, count, 0));
- radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4);
- radeon_emit_array(sctx->ce_ib, list + begin, count);
- }
-
- if (!si_ce_upload(sctx,
- desc->ce_offset +
- (first_slot_offset - desc->first_ce_slot * slot_size),
- upload_size, (unsigned*)&desc->buffer_offset,
- &desc->buffer))
- return false;
- } else {
- uint32_t *ptr;
-
- u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
- si_optimal_tcc_alignment(sctx, upload_size),
- (unsigned*)&desc->buffer_offset,
- (struct pipe_resource**)&desc->buffer,
- (void**)&ptr);
- if (!desc->buffer)
- return false; /* skip the draw call */
-
- util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
- upload_size);
- desc->gpu_list = ptr - first_slot_offset / 4;
+ /* If there is just one active descriptor, bind it directly. */
+ if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
+ desc->num_active_slots == 1) {
+ uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly *
+ desc->element_dw_size];
+
+ /* The buffer is already in the buffer list. */
+ r600_resource_reference(&desc->buffer, NULL);
+ desc->gpu_list = NULL;
+ desc->gpu_address = si_desc_extract_buffer_address(descriptor);
+ si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+ return true;
+ }
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
- RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
+ uint32_t *ptr;
+ int buffer_offset;
+ u_upload_alloc(sctx->b.b.const_uploader, 0, upload_size,
+ si_optimal_tcc_alignment(sctx, upload_size),
+ (unsigned*)&buffer_offset,
+ (struct pipe_resource**)&desc->buffer,
+ (void**)&ptr);
+ if (!desc->buffer) {
+ desc->gpu_address = 0;
+ return false; /* skip the draw call */
}
- /* The shader pointer should point to slot 0. */
- desc->buffer_offset -= first_slot_offset;
+ util_memcpy_cpu_to_le32(ptr, (char*)desc->list + first_slot_offset,
+ upload_size);
+ desc->gpu_list = ptr - first_slot_offset / 4;
- desc->dirty_mask = 0;
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
+ RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
- if (atom)
- si_mark_atom_dirty(sctx, atom);
+ /* The shader pointer should point to slot 0. */
+ buffer_offset -= first_slot_offset;
+ desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
+ si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
return true;
}
@@ -307,12 +215,12 @@
return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
}
-static void si_release_sampler_views(struct si_sampler_views *views)
+static void si_release_sampler_views(struct si_samplers *samplers)
{
int i;
- for (i = 0; i < ARRAY_SIZE(views->views); i++) {
- pipe_sampler_view_reference(&views->views[i], NULL);
+ for (i = 0; i < ARRAY_SIZE(samplers->views); i++) {
+ pipe_sampler_view_reference(&samplers->views[i], NULL);
}
}
@@ -356,14 +264,14 @@
}
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
- struct si_sampler_views *views)
+ struct si_samplers *samplers)
{
- unsigned mask = views->enabled_mask;
+ unsigned mask = samplers->enabled_mask;
/* Add buffers to the CS. */
while (mask) {
int i = u_bit_scan(&mask);
- struct si_sampler_view *sview = (struct si_sampler_view *)views->views[i];
+ struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
si_sampler_view_add_buffer(sctx, sview->base.texture,
RADEON_USAGE_READ,
@@ -422,6 +330,13 @@
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
+ /* Only macrotiled modes can set tile swizzle.
+ * GFX9 doesn't use (legacy) base_level_info.
+ */
+ if (sscreen->b.chip_class >= GFX9 ||
+ base_level_info->mode == RADEON_SURF_MODE_2D)
+ state[0] |= tex->surface.tile_swizzle;
+
if (sscreen->b.chip_class >= VI) {
state[6] &= C_008F28_COMPRESSION_EN;
state[7] = 0;
@@ -430,9 +345,13 @@
meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
tex->dcc_offset;
- if (sscreen->b.chip_class <= VI)
+ if (sscreen->b.chip_class == VI) {
meta_va += base_level_info->dcc_offset;
- } else if (tex->tc_compatible_htile) {
+ assert(base_level_info->mode == RADEON_SURF_MODE_2D);
+ }
+
+ meta_va |= (uint32_t)tex->surface.tile_swizzle << 8;
+ } else if (vi_tc_compat_htile_enabled(tex, first_level)) {
meta_va = tex->resource.gpu_address + tex->htile_offset;
}
@@ -481,6 +400,20 @@
}
}
+static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
+ struct si_sampler_view *sview,
+ struct r600_texture *tex,
+ uint32_t *desc)
+{
+ if (sview && sview->is_integer)
+ memcpy(desc, sstate->integer_val, 4*4);
+ else if (tex && tex->upgraded_depth &&
+ (!sview || !sview->is_stencil_sampler))
+ memcpy(desc, sstate->upgraded_depth_val, 4*4);
+ else
+ memcpy(desc, sstate->val, 4*4);
+}
+
static void si_set_sampler_view_desc(struct si_context *sctx,
struct si_sampler_view *sview,
struct si_sampler_state *sstate,
@@ -492,7 +425,7 @@
if (unlikely(!is_buffer && sview->dcc_incompatible)) {
if (vi_dcc_enabled(rtex, view->u.tex.first_level))
- if (!r600_texture_disable_dcc(&sctx->b, rtex))
+ if (!si_texture_disable_dcc(&sctx->b, rtex))
sctx->b.decompress_dcc(&sctx->b.b, rtex);
sview->dcc_incompatible = false;
@@ -524,41 +457,73 @@
/* Disable FMASK and bind sampler state in [12:15]. */
memcpy(desc + 8, null_texture_descriptor, 4*4);
- if (sstate) {
- if (!is_buffer && rtex->upgraded_depth &&
- !sview->is_stencil_sampler)
- memcpy(desc + 12, sstate->upgraded_depth_val, 4*4);
- else
- memcpy(desc + 12, sstate->val, 4*4);
- }
+ if (sstate)
+ si_set_sampler_state_desc(sstate, sview,
+ is_buffer ? NULL : rtex,
+ desc + 12);
}
}
+static bool color_needs_decompression(struct r600_texture *rtex)
+{
+ return rtex->fmask.size ||
+ (rtex->dirty_level_mask &&
+ (rtex->cmask.size || rtex->dcc_offset));
+}
+
+static bool depth_needs_decompression(struct r600_texture *rtex)
+{
+ /* If the depth/stencil texture is TC-compatible, no decompression
+ * will be done. The decompression function will only flush DB caches
+ * to make it coherent with shaders. That's necessary because the driver
+ * doesn't flush DB caches in any other case.
+ */
+ return rtex->db_compatible;
+}
+
static void si_set_sampler_view(struct si_context *sctx,
unsigned shader,
unsigned slot, struct pipe_sampler_view *view,
bool disallow_early_out)
{
- struct si_sampler_views *views = &sctx->samplers[shader].views;
+ struct si_samplers *samplers = &sctx->samplers[shader];
struct si_sampler_view *rview = (struct si_sampler_view*)view;
struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
unsigned desc_slot = si_get_sampler_slot(slot);
uint32_t *desc = descs->list + desc_slot * 16;
- if (views->views[slot] == view && !disallow_early_out)
+ if (samplers->views[slot] == view && !disallow_early_out)
return;
if (view) {
struct r600_texture *rtex = (struct r600_texture *)view->texture;
si_set_sampler_view_desc(sctx, rview,
- views->sampler_states[slot], desc);
+ samplers->sampler_states[slot], desc);
- if (rtex->resource.b.b.target == PIPE_BUFFER)
+ if (rtex->resource.b.b.target == PIPE_BUFFER) {
rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
+ samplers->needs_depth_decompress_mask &= ~(1u << slot);
+ samplers->needs_color_decompress_mask &= ~(1u << slot);
+ } else {
+ if (depth_needs_decompression(rtex)) {
+ samplers->needs_depth_decompress_mask |= 1u << slot;
+ } else {
+ samplers->needs_depth_decompress_mask &= ~(1u << slot);
+ }
+ if (color_needs_decompression(rtex)) {
+ samplers->needs_color_decompress_mask |= 1u << slot;
+ } else {
+ samplers->needs_color_decompress_mask &= ~(1u << slot);
+ }
+
+ if (rtex->dcc_offset &&
+ p_atomic_read(&rtex->framebuffers_bound))
+ sctx->need_check_render_feedback = true;
+ }
- pipe_sampler_view_reference(&views->views[slot], view);
- views->enabled_mask |= 1u << slot;
+ pipe_sampler_view_reference(&samplers->views[slot], view);
+ samplers->enabled_mask |= 1u << slot;
/* Since this can flush, it must be done after enabled_mask is
* updated. */
@@ -566,43 +531,27 @@
RADEON_USAGE_READ,
rview->is_stencil_sampler, true);
} else {
- pipe_sampler_view_reference(&views->views[slot], NULL);
+ pipe_sampler_view_reference(&samplers->views[slot], NULL);
memcpy(desc, null_texture_descriptor, 8*4);
/* Only clear the lower dwords of FMASK. */
memcpy(desc + 8, null_texture_descriptor, 4*4);
/* Re-set the sampler state if we are transitioning from FMASK. */
- if (views->sampler_states[slot])
- memcpy(desc + 12,
- views->sampler_states[slot]->val, 4*4);
-
- views->enabled_mask &= ~(1u << slot);
+ if (samplers->sampler_states[slot])
+ si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL,
+ desc + 12);
+
+ samplers->enabled_mask &= ~(1u << slot);
+ samplers->needs_depth_decompress_mask &= ~(1u << slot);
+ samplers->needs_color_decompress_mask &= ~(1u << slot);
}
- descs->dirty_mask |= 1ull << desc_slot;
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
}
-static bool color_needs_decompression(struct r600_texture *rtex)
-{
- return rtex->fmask.size ||
- (rtex->dirty_level_mask &&
- (rtex->cmask.size || rtex->dcc_offset));
-}
-
-static bool depth_needs_decompression(struct r600_texture *rtex)
-{
- /* If the depth/stencil texture is TC-compatible, no decompression
- * will be done. The decompression function will only flush DB caches
- * to make it coherent with shaders. That's necessary because the driver
- * doesn't flush DB caches in any other case.
- */
- return rtex->db_compatible;
-}
-
static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
unsigned shader)
{
- struct si_textures_info *samplers = &sctx->samplers[shader];
+ struct si_samplers *samplers = &sctx->samplers[shader];
unsigned shader_bit = 1 << shader;
if (samplers->needs_depth_decompress_mask ||
@@ -619,59 +568,30 @@
struct pipe_sampler_view **views)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_textures_info *samplers = &sctx->samplers[shader];
int i;
if (!count || shader >= SI_NUM_SHADERS)
return;
- for (i = 0; i < count; i++) {
- unsigned slot = start + i;
-
- if (!views || !views[i]) {
- samplers->needs_depth_decompress_mask &= ~(1u << slot);
- samplers->needs_color_decompress_mask &= ~(1u << slot);
- si_set_sampler_view(sctx, shader, slot, NULL, false);
- continue;
- }
-
- si_set_sampler_view(sctx, shader, slot, views[i], false);
-
- if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
- struct r600_texture *rtex =
- (struct r600_texture*)views[i]->texture;
-
- if (depth_needs_decompression(rtex)) {
- samplers->needs_depth_decompress_mask |= 1u << slot;
- } else {
- samplers->needs_depth_decompress_mask &= ~(1u << slot);
- }
- if (color_needs_decompression(rtex)) {
- samplers->needs_color_decompress_mask |= 1u << slot;
- } else {
- samplers->needs_color_decompress_mask &= ~(1u << slot);
- }
-
- if (rtex->dcc_offset &&
- p_atomic_read(&rtex->framebuffers_bound))
- sctx->need_check_render_feedback = true;
- } else {
- samplers->needs_depth_decompress_mask &= ~(1u << slot);
- samplers->needs_color_decompress_mask &= ~(1u << slot);
- }
+ if (views) {
+ for (i = 0; i < count; i++)
+ si_set_sampler_view(sctx, shader, start + i, views[i], false);
+ } else {
+ for (i = 0; i < count; i++)
+ si_set_sampler_view(sctx, shader, start + i, NULL, false);
}
si_update_shader_needs_decompress_mask(sctx, shader);
}
static void
-si_samplers_update_needs_color_decompress_mask(struct si_textures_info *samplers)
+si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers)
{
- unsigned mask = samplers->views.enabled_mask;
+ unsigned mask = samplers->enabled_mask;
while (mask) {
int i = u_bit_scan(&mask);
- struct pipe_resource *res = samplers->views.views[i]->texture;
+ struct pipe_resource *res = samplers->views[i]->texture;
if (res && res->target != PIPE_BUFFER) {
struct r600_texture *rtex = (struct r600_texture *)res;
@@ -688,7 +608,7 @@
/* IMAGE VIEWS */
static void
-si_release_image_views(struct si_images_info *images)
+si_release_image_views(struct si_images *images)
{
unsigned i;
@@ -700,7 +620,7 @@
}
static void
-si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *images)
+si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images)
{
uint mask = images->enabled_mask;
@@ -719,7 +639,7 @@
static void
si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
{
- struct si_images_info *images = &ctx->images[shader];
+ struct si_images *images = &ctx->images[shader];
if (images->enabled_mask & (1u << slot)) {
struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
@@ -730,8 +650,6 @@
memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4);
images->enabled_mask &= ~(1u << slot);
- /* two 8-byte images share one 16-byte slot */
- descs->dirty_mask |= 1u << (desc_slot / 2);
ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
}
}
@@ -784,7 +702,7 @@
* The decompression is relatively cheap if the surface
* has been decompressed already.
*/
- if (!r600_texture_disable_dcc(&ctx->b, tex))
+ if (!si_texture_disable_dcc(&ctx->b, tex))
ctx->b.decompress_dcc(&ctx->b.b, tex);
}
@@ -830,7 +748,7 @@
unsigned slot, const struct pipe_image_view *view,
bool skip_decompress)
{
- struct si_images_info *images = &ctx->images[shader];
+ struct si_images *images = &ctx->images[shader];
struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
struct r600_resource *res;
unsigned desc_slot = si_get_image_slot(slot);
@@ -867,8 +785,6 @@
}
images->enabled_mask |= 1u << slot;
- /* two 8-byte images share one 16-byte slot */
- descs->dirty_mask |= 1u << (desc_slot / 2);
ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
/* Since this can flush, it must be done after enabled_mask is updated. */
@@ -906,7 +822,7 @@
}
static void
-si_images_update_needs_color_decompress_mask(struct si_images_info *images)
+si_images_update_needs_color_decompress_mask(struct si_images *images)
{
unsigned mask = images->enabled_mask;
@@ -933,7 +849,7 @@
unsigned start, unsigned count, void **states)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_textures_info *samplers = &sctx->samplers[shader];
+ struct si_samplers *samplers = &sctx->samplers[shader];
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
struct si_sampler_state **sstates = (struct si_sampler_state**)states;
int i;
@@ -946,19 +862,19 @@
unsigned desc_slot = si_get_sampler_slot(slot);
if (!sstates[i] ||
- sstates[i] == samplers->views.sampler_states[slot])
+ sstates[i] == samplers->sampler_states[slot])
continue;
#ifdef DEBUG
assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
#endif
- samplers->views.sampler_states[slot] = sstates[i];
+ samplers->sampler_states[slot] = sstates[i];
/* If FMASK is bound, don't overwrite it.
* The sampler state will be set after FMASK is unbound.
*/
struct si_sampler_view *sview =
- (struct si_sampler_view *)samplers->views.views[slot];
+ (struct si_sampler_view *)samplers->views[slot];
struct r600_texture *tex = NULL;
@@ -969,13 +885,8 @@
if (tex && tex->fmask.size)
continue;
- if (tex && tex->upgraded_depth && !sview->is_stencil_sampler)
- memcpy(desc->list + desc_slot * 16 + 12,
- sstates[i]->upgraded_depth_val, 4*4);
- else
- memcpy(desc->list + desc_slot * 16 + 12,
- sstates[i]->val, 4*4);
- desc->dirty_mask |= 1ull << desc_slot;
+ si_set_sampler_state_desc(sstates[i], sview, tex,
+ desc->list + desc_slot * 16 + 12);
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
}
@@ -983,18 +894,14 @@
/* BUFFER RESOURCES */
-static void si_init_buffer_resources(struct si_context *sctx,
- struct si_buffer_resources *buffers,
+static void si_init_buffer_resources(struct si_buffer_resources *buffers,
struct si_descriptors *descs,
unsigned num_buffers,
- unsigned first_ce_slot,
- unsigned num_ce_slots,
unsigned shader_userdata_index,
enum radeon_bo_usage shader_usage,
enum radeon_bo_usage shader_usage_constbuf,
enum radeon_bo_priority priority,
- enum radeon_bo_priority priority_constbuf,
- unsigned *ce_offset)
+ enum radeon_bo_priority priority_constbuf)
{
buffers->shader_usage = shader_usage;
buffers->shader_usage_constbuf = shader_usage_constbuf;
@@ -1002,8 +909,7 @@
buffers->priority_constbuf = priority_constbuf;
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
- si_init_descriptors(sctx, descs, shader_userdata_index, 4, num_buffers,
- first_ce_slot, num_ce_slots, ce_offset);
+ si_init_descriptors(descs, shader_userdata_index, 4, num_buffers);
}
static void si_release_buffer_resources(struct si_buffer_resources *buffers,
@@ -1110,14 +1016,19 @@
* directly through a staging buffer and don't go through
* the fine-grained upload path.
*/
+ unsigned buffer_offset = 0;
u_upload_alloc(sctx->b.b.const_uploader, 0,
desc_list_byte_size,
si_optimal_tcc_alignment(sctx, desc_list_byte_size),
- (unsigned*)&desc->buffer_offset,
+ &buffer_offset,
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
- if (!desc->buffer)
+ if (!desc->buffer) {
+ desc->gpu_address = 0;
return false;
+ }
+ desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
+ desc->list = ptr;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_DESCRIPTORS);
@@ -1168,11 +1079,10 @@
* on performance (confirmed by testing). New descriptors are always
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
- si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
- if (sctx->b.chip_class >= CIK)
- si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
+ si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
sctx->vertex_buffers_dirty = false;
sctx->vertex_buffer_pointer_dirty = true;
+ sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
return true;
}
@@ -1269,7 +1179,6 @@
buffers->enabled_mask &= ~(1u << slot);
}
- descs->dirty_mask |= 1u << slot;
sctx->descriptors_dirty |= 1u << descriptors_idx;
}
@@ -1331,7 +1240,6 @@
pipe_resource_reference(&buffers->buffers[slot], NULL);
memset(desc, 0, sizeof(uint32_t) * 4);
buffers->enabled_mask &= ~(1u << slot);
- descs->dirty_mask |= 1u << slot;
sctx->descriptors_dirty |=
1u << si_const_and_shader_buffer_descriptors_idx(shader);
continue;
@@ -1358,7 +1266,6 @@
buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
buffers->enabled_mask |= 1u << slot;
- descs->dirty_mask |= 1u << slot;
sctx->descriptors_dirty |=
1u << si_const_and_shader_buffer_descriptors_idx(shader);
@@ -1478,122 +1385,6 @@
buffers->enabled_mask &= ~(1u << slot);
}
- descs->dirty_mask |= 1u << slot;
- sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
-}
-
-/* STREAMOUT BUFFERS */
-
-static void si_set_streamout_targets(struct pipe_context *ctx,
- unsigned num_targets,
- struct pipe_stream_output_target **targets,
- const unsigned *offsets)
-{
- struct si_context *sctx = (struct si_context *)ctx;
- struct si_buffer_resources *buffers = &sctx->rw_buffers;
- struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
- unsigned old_num_targets = sctx->b.streamout.num_targets;
- unsigned i, bufidx;
-
- /* We are going to unbind the buffers. Mark which caches need to be flushed. */
- if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) {
- /* Since streamout uses vector writes which go through TC L2
- * and most other clients can use TC L2 as well, we don't need
- * to flush it.
- *
- * The only cases which requires flushing it is VGT DMA index
- * fetching (on <= CIK) and indirect draw data, which are rare
- * cases. Thus, flag the TC L2 dirtiness in the resource and
- * handle it at draw call time.
- */
- for (i = 0; i < sctx->b.streamout.num_targets; i++)
- if (sctx->b.streamout.targets[i])
- r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
-
- /* Invalidate the scalar cache in case a streamout buffer is
- * going to be used as a constant buffer.
- *
- * Invalidate TC L1, because streamout bypasses it (done by
- * setting GLC=1 in the store instruction), but it can contain
- * outdated data of streamout buffers.
- *
- * VS_PARTIAL_FLUSH is required if the buffers are going to be
- * used as an input immediately.
- */
- sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
- SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_VS_PARTIAL_FLUSH;
- }
-
- /* All readers of the streamout targets need to be finished before we can
- * start writing to the targets.
- */
- if (num_targets)
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- SI_CONTEXT_CS_PARTIAL_FLUSH;
-
- /* Streamout buffers must be bound in 2 places:
- * 1) in VGT by setting the VGT_STRMOUT registers
- * 2) as shader resources
- */
-
- /* Set the VGT regs. */
- r600_set_streamout_targets(ctx, num_targets, targets, offsets);
-
- /* Set the shader resources.*/
- for (i = 0; i < num_targets; i++) {
- bufidx = SI_VS_STREAMOUT_BUF0 + i;
-
- if (targets[i]) {
- struct pipe_resource *buffer = targets[i]->buffer;
- uint64_t va = r600_resource(buffer)->gpu_address;
-
- /* Set the descriptor.
- *
- * On VI, the format must be non-INVALID, otherwise
- * the buffer will be considered not bound and store
- * instructions will be no-ops.
- */
- uint32_t *desc = descs->list + bufidx*4;
- desc[0] = va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
- desc[2] = 0xffffffff;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-
- /* Set the resource. */
- pipe_resource_reference(&buffers->buffers[bufidx],
- buffer);
- radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
- (struct r600_resource*)buffer,
- buffers->shader_usage,
- RADEON_PRIO_SHADER_RW_BUFFER,
- true);
- r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
-
- buffers->enabled_mask |= 1u << bufidx;
- } else {
- /* Clear the descriptor and unset the resource. */
- memset(descs->list + bufidx*4, 0,
- sizeof(uint32_t) * 4);
- pipe_resource_reference(&buffers->buffers[bufidx],
- NULL);
- buffers->enabled_mask &= ~(1u << bufidx);
- }
- descs->dirty_mask |= 1u << bufidx;
- }
- for (; i < old_num_targets; i++) {
- bufidx = SI_VS_STREAMOUT_BUF0 + i;
- /* Clear the descriptor and unset the resource. */
- memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
- pipe_resource_reference(&buffers->buffers[bufidx], NULL);
- buffers->enabled_mask &= ~(1u << bufidx);
- descs->dirty_mask |= 1u << bufidx;
- }
-
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
}
@@ -1602,8 +1393,7 @@
struct pipe_resource *new_buf)
{
/* Retrieve the buffer offset from the descriptor. */
- uint64_t old_desc_va =
- desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
+ uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
assert(old_buf_va <= old_desc_va);
uint64_t offset_within_buffer = old_desc_va - old_buf_va;
@@ -1710,7 +1500,6 @@
si_desc_reset_buffer_offset(&sctx->b.b,
descs->list + i*4,
old_va, buf);
- descs->dirty_mask |= 1u << i;
sctx->descriptors_dirty |= 1u << descriptors_idx;
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
@@ -1764,7 +1553,6 @@
si_desc_reset_buffer_offset(ctx, descs->list + i*4,
old_va, buf);
- descs->dirty_mask |= 1u << i;
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
@@ -1773,11 +1561,11 @@
true);
/* Update the streamout state. */
- if (sctx->b.streamout.begin_emitted)
- r600_emit_streamout_end(&sctx->b);
- sctx->b.streamout.append_bitmask =
- sctx->b.streamout.enabled_mask;
- r600_streamout_buffers_dirty(&sctx->b);
+ if (sctx->streamout.begin_emitted)
+ si_emit_streamout_end(sctx);
+ sctx->streamout.append_bitmask =
+ sctx->streamout.enabled_mask;
+ si_streamout_buffers_dirty(sctx);
}
}
@@ -1805,21 +1593,20 @@
if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
/* Texture buffers - update bindings. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
- struct si_sampler_views *views = &sctx->samplers[shader].views;
+ struct si_samplers *samplers = &sctx->samplers[shader];
struct si_descriptors *descs =
si_sampler_and_image_descriptors(sctx, shader);
- unsigned mask = views->enabled_mask;
+ unsigned mask = samplers->enabled_mask;
while (mask) {
unsigned i = u_bit_scan(&mask);
- if (views->views[i]->texture == buf) {
+ if (samplers->views[i]->texture == buf) {
unsigned desc_slot = si_get_sampler_slot(i);
si_desc_reset_buffer_offset(ctx,
descs->list +
desc_slot * 16 + 4,
old_va, buf);
- descs->dirty_mask |= 1ull << desc_slot;
sctx->descriptors_dirty |=
1u << si_sampler_and_image_descriptors_idx(shader);
@@ -1835,7 +1622,7 @@
/* Shader images */
if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
- struct si_images_info *images = &sctx->images[shader];
+ struct si_images *images = &sctx->images[shader];
struct si_descriptors *descs =
si_sampler_and_image_descriptors(sctx, shader);
unsigned mask = images->enabled_mask;
@@ -1852,8 +1639,6 @@
si_desc_reset_buffer_offset(
ctx, descs->list + desc_slot * 8 + 4,
old_va, buf);
- /* two 8-byte images share one 16-byte slot */
- descs->dirty_mask |= 1u << (desc_slot / 2);
sctx->descriptors_dirty |=
1u << si_sampler_and_image_descriptors_idx(shader);
@@ -1868,16 +1653,20 @@
/* Bindless texture handles */
if (rbuffer->texture_handle_allocated) {
+ struct si_descriptors *descs = &sctx->bindless_descriptors;
+
util_dynarray_foreach(&sctx->resident_tex_handles,
struct si_texture_handle *, tex_handle) {
struct pipe_sampler_view *view = (*tex_handle)->view;
- struct si_bindless_descriptor *desc = (*tex_handle)->desc;
+ unsigned desc_slot = (*tex_handle)->desc_slot;
if (view->texture == buf) {
si_set_buf_desc_address(rbuffer,
view->u.buf.offset,
- &desc->desc_list[4]);
- desc->dirty = true;
+ descs->list +
+ desc_slot * 16 + 4);
+
+ (*tex_handle)->desc_dirty = true;
sctx->bindless_descriptors_dirty = true;
radeon_add_to_buffer_list_check_mem(
@@ -1890,10 +1679,12 @@
/* Bindless image handles */
if (rbuffer->image_handle_allocated) {
+ struct si_descriptors *descs = &sctx->bindless_descriptors;
+
util_dynarray_foreach(&sctx->resident_img_handles,
struct si_image_handle *, img_handle) {
struct pipe_image_view *view = &(*img_handle)->view;
- struct si_bindless_descriptor *desc = (*img_handle)->desc;
+ unsigned desc_slot = (*img_handle)->desc_slot;
if (view->resource == buf) {
if (view->access & PIPE_IMAGE_ACCESS_WRITE)
@@ -1901,8 +1692,10 @@
si_set_buf_desc_address(rbuffer,
view->u.buf.offset,
- &desc->desc_list[4]);
- desc->dirty = true;
+ descs->list +
+ desc_slot * 16 + 4);
+
+ (*img_handle)->desc_dirty = true;
sctx->bindless_descriptors_dirty = true;
radeon_add_to_buffer_list_check_mem(
@@ -1928,17 +1721,23 @@
uint64_t old_va = rbuffer->gpu_address;
/* Reallocate the buffer in the same pipe_resource. */
- r600_alloc_resource(&sctx->screen->b, rbuffer);
+ si_alloc_resource(&sctx->screen->b, rbuffer);
si_rebind_buffer(ctx, buf, old_va);
}
static void si_upload_bindless_descriptor(struct si_context *sctx,
- struct si_bindless_descriptor *desc)
+ unsigned desc_slot,
+ unsigned num_dwords)
{
+ struct si_descriptors *desc = &sctx->bindless_descriptors;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- uint64_t va = desc->buffer->gpu_address + desc->offset;
- unsigned num_dwords = sizeof(desc->desc_list) / 4;
+ unsigned desc_slot_offset = desc_slot * 16;
+ uint32_t *data;
+ uint64_t va;
+
+ data = desc->list + desc_slot_offset;
+ va = desc->gpu_address + desc_slot_offset * 4;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
@@ -1946,7 +1745,7 @@
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
- radeon_emit_array(cs, desc->desc_list, num_dwords);
+ radeon_emit_array(cs, data, num_dwords);
}
static void si_upload_bindless_descriptors(struct si_context *sctx)
@@ -1963,24 +1762,24 @@
util_dynarray_foreach(&sctx->resident_tex_handles,
struct si_texture_handle *, tex_handle) {
- struct si_bindless_descriptor *desc = (*tex_handle)->desc;
+ unsigned desc_slot = (*tex_handle)->desc_slot;
- if (!desc->dirty)
+ if (!(*tex_handle)->desc_dirty)
continue;
- si_upload_bindless_descriptor(sctx, desc);
- desc->dirty = false;
+ si_upload_bindless_descriptor(sctx, desc_slot, 16);
+ (*tex_handle)->desc_dirty = false;
}
util_dynarray_foreach(&sctx->resident_img_handles,
struct si_image_handle *, img_handle) {
- struct si_bindless_descriptor *desc = (*img_handle)->desc;
+ unsigned desc_slot = (*img_handle)->desc_slot;
- if (!desc->dirty)
+ if (!(*img_handle)->desc_dirty)
continue;
- si_upload_bindless_descriptor(sctx, desc);
- desc->dirty = false;
+ si_upload_bindless_descriptor(sctx, desc_slot, 8);
+ (*img_handle)->desc_dirty = false;
}
/* Invalidate L1 because it doesn't know that L2 changed. */
@@ -1991,41 +1790,47 @@
}
/* Update mutable image descriptor fields of all resident textures. */
-static void si_update_resident_texture_descriptor(struct si_context *sctx,
+static void si_update_bindless_texture_descriptor(struct si_context *sctx,
struct si_texture_handle *tex_handle)
{
struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view;
- struct si_bindless_descriptor *desc = tex_handle->desc;
+ struct si_descriptors *desc = &sctx->bindless_descriptors;
+ unsigned desc_slot_offset = tex_handle->desc_slot * 16;
uint32_t desc_list[16];
if (sview->base.texture->target == PIPE_BUFFER)
return;
- memcpy(desc_list, desc->desc_list, sizeof(desc_list));
+ memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list));
si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate,
- &desc->desc_list[0]);
+ desc->list + desc_slot_offset);
- if (memcmp(desc_list, desc->desc_list, sizeof(desc_list))) {
- desc->dirty = true;
+ if (memcmp(desc_list, desc->list + desc_slot_offset,
+ sizeof(desc_list))) {
+ tex_handle->desc_dirty = true;
sctx->bindless_descriptors_dirty = true;
}
}
-static void si_update_resident_image_descriptor(struct si_context *sctx,
+static void si_update_bindless_image_descriptor(struct si_context *sctx,
struct si_image_handle *img_handle)
{
- struct si_bindless_descriptor *desc = img_handle->desc;
+ struct si_descriptors *desc = &sctx->bindless_descriptors;
+ unsigned desc_slot_offset = img_handle->desc_slot * 16;
struct pipe_image_view *view = &img_handle->view;
- uint32_t desc_list[16];
+ uint32_t desc_list[8];
if (view->resource->target == PIPE_BUFFER)
return;
- memcpy(desc_list, desc->desc_list, sizeof(desc_list));
- si_set_shader_image_desc(sctx, view, true, &desc->desc_list[0]);
-
- if (memcmp(desc_list, desc->desc_list, sizeof(desc_list))) {
- desc->dirty = true;
+ memcpy(desc_list, desc->list + desc_slot_offset,
+ sizeof(desc_list));
+ si_set_shader_image_desc(sctx, view, true,
+ desc->list + desc_slot_offset);
+
+ if (memcmp(desc_list, desc->list + desc_slot_offset,
+ sizeof(desc_list))) {
+ img_handle->desc_dirty = true;
sctx->bindless_descriptors_dirty = true;
}
}
@@ -2034,12 +1839,12 @@
{
util_dynarray_foreach(&sctx->resident_tex_handles,
struct si_texture_handle *, tex_handle) {
- si_update_resident_texture_descriptor(sctx, *tex_handle);
+ si_update_bindless_texture_descriptor(sctx, *tex_handle);
}
util_dynarray_foreach(&sctx->resident_img_handles,
struct si_image_handle *, img_handle) {
- si_update_resident_image_descriptor(sctx, *img_handle);
+ si_update_bindless_image_descriptor(sctx, *img_handle);
}
si_upload_bindless_descriptors(sctx);
@@ -2051,8 +1856,8 @@
unsigned shader;
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
- struct si_sampler_views *samplers = &sctx->samplers[shader].views;
- struct si_images_info *images = &sctx->images[shader];
+ struct si_samplers *samplers = &sctx->samplers[shader];
+ struct si_images *images = &sctx->images[shader];
unsigned mask;
/* Images. */
@@ -2101,14 +1906,16 @@
if (shader == PIPE_SHADER_VERTEX)
sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
- si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
+ si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
}
-static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
+static void si_shader_pointers_begin_new_cs(struct si_context *sctx)
{
sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
sctx->vertex_buffer_pointer_dirty = sctx->vertex_buffers.buffer != NULL;
- si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
+ si_mark_atom_dirty(sctx, &sctx->shader_pointers.atom);
+ sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
+ sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
}
/* Set a base register address for user data constants in the given shader.
@@ -2117,7 +1924,7 @@
static void si_set_user_data_base(struct si_context *sctx,
unsigned shader, uint32_t new_base)
{
- uint32_t *base = &sctx->shader_userdata.sh_base[shader];
+ uint32_t *base = &sctx->shader_pointers.sh_base[shader];
if (*base != new_base) {
*base = new_base;
@@ -2169,73 +1976,101 @@
}
}
+static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs,
+ struct si_descriptors *desc,
+ unsigned sh_base,
+ unsigned pointer_count)
+{
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0));
+ radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
+}
+
+static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
+ struct si_descriptors *desc)
+{
+ uint64_t va = desc->gpu_address;
+
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+}
+
static void si_emit_shader_pointer(struct si_context *sctx,
struct si_descriptors *desc,
unsigned sh_base)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- uint64_t va;
-
- if (!desc->buffer)
- return; /* the pointer is not used by current shaders */
- va = desc->buffer->gpu_address +
- desc->buffer_offset;
-
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
- radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ si_emit_shader_pointer_head(cs, desc, sh_base, 1);
+ si_emit_shader_pointer_body(cs, desc);
}
-void si_emit_graphics_shader_userdata(struct si_context *sctx,
- struct r600_atom *atom)
+static void si_emit_consecutive_shader_pointers(struct si_context *sctx,
+ unsigned pointer_mask,
+ unsigned sh_base)
{
- unsigned mask;
- uint32_t *sh_base = sctx->shader_userdata.sh_base;
- struct si_descriptors *descs;
+ if (!sh_base)
+ return;
- descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
- if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
- si_emit_shader_pointer(sctx, descs,
- R_00B030_SPI_SHADER_USER_DATA_PS_0);
- si_emit_shader_pointer(sctx, descs,
- R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ while (mask) {
+ int start, count;
+ u_bit_scan_consecutive_range(&mask, &start, &count);
- if (sctx->b.chip_class >= GFX9) {
- /* GFX9 merged LS-HS and ES-GS.
- * Set RW_BUFFERS in the special registers, so that
- * it's preloaded into s[0:1] instead of s[8:9].
- */
- si_emit_shader_pointer(sctx, descs,
- R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
- si_emit_shader_pointer(sctx, descs,
- R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
- } else {
- si_emit_shader_pointer(sctx, descs,
- R_00B230_SPI_SHADER_USER_DATA_GS_0);
- si_emit_shader_pointer(sctx, descs,
- R_00B330_SPI_SHADER_USER_DATA_ES_0);
- si_emit_shader_pointer(sctx, descs,
- R_00B430_SPI_SHADER_USER_DATA_HS_0);
- si_emit_shader_pointer(sctx, descs,
- R_00B530_SPI_SHADER_USER_DATA_LS_0);
- }
+ struct si_descriptors *descs = &sctx->descriptors[start];
+
+ si_emit_shader_pointer_head(cs, descs, sh_base, count);
+ for (int i = 0; i < count; i++)
+ si_emit_shader_pointer_body(cs, descs + i);
}
+}
- mask = sctx->shader_pointers_dirty &
- u_bit_consecutive(SI_DESCS_FIRST_SHADER,
- SI_DESCS_FIRST_COMPUTE - SI_DESCS_FIRST_SHADER);
+static void si_emit_global_shader_pointers(struct si_context *sctx,
+ struct si_descriptors *descs)
+{
+ if (sctx->b.chip_class == GFX9) {
+ /* Broadcast it to all shader stages. */
+ si_emit_shader_pointer(sctx, descs,
+ R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
+ return;
+ }
- while (mask) {
- unsigned i = u_bit_scan(&mask);
- unsigned shader = (i - SI_DESCS_FIRST_SHADER) / SI_NUM_SHADER_DESCS;
- unsigned base = sh_base[shader];
+ si_emit_shader_pointer(sctx, descs,
+ R_00B030_SPI_SHADER_USER_DATA_PS_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B530_SPI_SHADER_USER_DATA_LS_0);
+}
- if (base)
- si_emit_shader_pointer(sctx, descs + i, base);
+void si_emit_graphics_shader_pointers(struct si_context *sctx,
+ struct r600_atom *atom)
+{
+ uint32_t *sh_base = sctx->shader_pointers.sh_base;
+
+ if (sctx->shader_pointers_dirty & (1 << SI_DESCS_RW_BUFFERS)) {
+ si_emit_global_shader_pointers(sctx,
+ &sctx->descriptors[SI_DESCS_RW_BUFFERS]);
}
+
+ si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
+ sh_base[PIPE_SHADER_VERTEX]);
+ si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
+ sh_base[PIPE_SHADER_TESS_CTRL]);
+ si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
+ sh_base[PIPE_SHADER_TESS_EVAL]);
+ si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
+ sh_base[PIPE_SHADER_GEOMETRY]);
+ si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
+ sh_base[PIPE_SHADER_FRAGMENT]);
+
sctx->shader_pointers_dirty &=
~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
@@ -2244,158 +2079,128 @@
sh_base[PIPE_SHADER_VERTEX]);
sctx->vertex_buffer_pointer_dirty = false;
}
+
+ if (sctx->graphics_bindless_pointer_dirty) {
+ si_emit_global_shader_pointers(sctx,
+ &sctx->bindless_descriptors);
+ sctx->graphics_bindless_pointer_dirty = false;
+ }
}
-void si_emit_compute_shader_userdata(struct si_context *sctx)
+void si_emit_compute_shader_pointers(struct si_context *sctx)
{
unsigned base = R_00B900_COMPUTE_USER_DATA_0;
- struct si_descriptors *descs = sctx->descriptors;
- unsigned compute_mask =
- u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_SHADER_DESCS);
- unsigned mask = sctx->shader_pointers_dirty & compute_mask;
- while (mask) {
- unsigned i = u_bit_scan(&mask);
-
- si_emit_shader_pointer(sctx, descs + i, base);
+ si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
+ R_00B900_COMPUTE_USER_DATA_0);
+ sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
+
+ if (sctx->compute_bindless_pointer_dirty) {
+ si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base);
+ sctx->compute_bindless_pointer_dirty = false;
}
- sctx->shader_pointers_dirty &= ~compute_mask;
}
/* BINDLESS */
-struct si_bindless_descriptor_slab
+static void si_init_bindless_descriptors(struct si_context *sctx,
+ struct si_descriptors *desc,
+ unsigned shader_userdata_index,
+ unsigned num_elements)
{
- struct pb_slab base;
- struct r600_resource *buffer;
- struct si_bindless_descriptor *entries;
-};
-
-bool si_bindless_descriptor_can_reclaim_slab(void *priv,
- struct pb_slab_entry *entry)
-{
- /* Do not allow to reclaim any bindless descriptors for now because the
- * GPU might be using them. This should be improved later on.
- */
- return false;
-}
-
-struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
- unsigned entry_size,
- unsigned group_index)
-{
- struct si_context *sctx = priv;
- struct si_screen *sscreen = sctx->screen;
- struct si_bindless_descriptor_slab *slab;
-
- slab = CALLOC_STRUCT(si_bindless_descriptor_slab);
- if (!slab)
- return NULL;
-
- /* Create a buffer in VRAM for 1024 bindless descriptors. */
- slab->buffer = (struct r600_resource *)
- pipe_buffer_create(&sscreen->b.b, 0,
- PIPE_USAGE_DEFAULT, 64 * 1024);
- if (!slab->buffer)
- goto fail;
-
- slab->base.num_entries = slab->buffer->bo_size / entry_size;
- slab->base.num_free = slab->base.num_entries;
- slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
- if (!slab->entries)
- goto fail_buffer;
+ MAYBE_UNUSED unsigned desc_slot;
- LIST_INITHEAD(&slab->base.free);
+ si_init_descriptors(desc, shader_userdata_index, 16, num_elements);
+ sctx->bindless_descriptors.num_active_slots = num_elements;
- for (unsigned i = 0; i < slab->base.num_entries; ++i) {
- struct si_bindless_descriptor *desc = &slab->entries[i];
-
- desc->entry.slab = &slab->base;
- desc->entry.group_index = group_index;
- desc->buffer = slab->buffer;
- desc->offset = i * entry_size;
-
- LIST_ADDTAIL(&desc->entry.head, &slab->base.free);
- }
+ /* The first bindless descriptor is stored at slot 1, because 0 is not
+ * considered to be a valid handle.
+ */
+ sctx->num_bindless_descriptors = 1;
- /* Add the descriptor to the per-context list. */
- util_dynarray_append(&sctx->bindless_descriptors,
- struct r600_resource *, slab->buffer);
+ /* Track which bindless slots are used (or not). */
+ util_idalloc_init(&sctx->bindless_used_slots);
+ util_idalloc_resize(&sctx->bindless_used_slots, num_elements);
- return &slab->base;
+ /* Reserve slot 0 because it's an invalid handle for bindless. */
+ desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
+ assert(desc_slot == 0);
+}
-fail_buffer:
- r600_resource_reference(&slab->buffer, NULL);
-fail:
- FREE(slab);
- return NULL;
+static void si_release_bindless_descriptors(struct si_context *sctx)
+{
+ si_release_descriptors(&sctx->bindless_descriptors);
+ util_idalloc_fini(&sctx->bindless_used_slots);
}
-void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab)
+static unsigned si_get_first_free_bindless_slot(struct si_context *sctx)
{
- struct si_context *sctx = priv;
- struct si_bindless_descriptor_slab *slab =
- (struct si_bindless_descriptor_slab *)pslab;
+ struct si_descriptors *desc = &sctx->bindless_descriptors;
+ unsigned desc_slot;
+
+ desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
+ if (desc_slot >= desc->num_elements) {
+ /* The array of bindless descriptors is full, resize it. */
+ unsigned slot_size = desc->element_dw_size * 4;
+ unsigned new_num_elements = desc->num_elements * 2;
- /* Remove the descriptor from the per-context list. */
- util_dynarray_delete_unordered(&sctx->bindless_descriptors,
- struct r600_resource *, slab->buffer);
+ desc->list = REALLOC(desc->list, desc->num_elements * slot_size,
+ new_num_elements * slot_size);
+ desc->num_elements = new_num_elements;
+ desc->num_active_slots = new_num_elements;
+ }
- r600_resource_reference(&slab->buffer, NULL);
- FREE(slab->entries);
- FREE(slab);
+ assert(desc_slot);
+ return desc_slot;
}
-static struct si_bindless_descriptor *
+static unsigned
si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
unsigned size)
{
- struct si_screen *sscreen = sctx->screen;
- struct si_bindless_descriptor *desc;
- struct pb_slab_entry *entry;
- void *ptr;
+ struct si_descriptors *desc = &sctx->bindless_descriptors;
+ unsigned desc_slot, desc_slot_offset;
+
+ /* Find a free slot. */
+ desc_slot = si_get_first_free_bindless_slot(sctx);
- /* Sub-allocate the bindless descriptor from a slab to avoid dealing
- * with a ton of buffers and for reducing the winsys overhead.
+ /* For simplicity, sampler and image bindless descriptors use fixed
+ * 16-dword slots for now. Image descriptors only need 8-dword but this
+ * doesn't really matter because no real apps use image handles.
*/
- entry = pb_slab_alloc(&sctx->bindless_descriptor_slabs, 64, 0);
- if (!entry)
- return NULL;
+ desc_slot_offset = desc_slot * 16;
- desc = NULL;
- desc = container_of(entry, desc, entry);
+ /* Copy the descriptor into the array. */
+ memcpy(desc->list + desc_slot_offset, desc_list, size);
- /* Upload the descriptor directly in VRAM. Because the slabs are
- * currently never reclaimed, we don't need to synchronize the
- * operation.
+ /* Re-upload the whole array of bindless descriptors into a new buffer.
*/
- ptr = sscreen->b.ws->buffer_map(desc->buffer->buf, NULL,
- PIPE_TRANSFER_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
- util_memcpy_cpu_to_le32(ptr + desc->offset, desc_list, size);
+ if (!si_upload_descriptors(sctx, desc))
+ return 0;
- /* Keep track of the initial descriptor especially for buffers
- * invalidation because we might need to know the previous address.
- */
- memcpy(desc->desc_list, desc_list, sizeof(desc->desc_list));
+ /* Make sure to re-emit the shader pointers for all stages. */
+ sctx->graphics_bindless_pointer_dirty = true;
+ sctx->compute_bindless_pointer_dirty = true;
- return desc;
+ return desc_slot;
}
-static void si_invalidate_bindless_buf_desc(struct si_context *sctx,
- struct si_bindless_descriptor *desc,
- struct pipe_resource *resource,
- uint64_t offset)
+static void si_update_bindless_buffer_descriptor(struct si_context *sctx,
+ unsigned desc_slot,
+ struct pipe_resource *resource,
+ uint64_t offset,
+ bool *desc_dirty)
{
+ struct si_descriptors *desc = &sctx->bindless_descriptors;
struct r600_resource *buf = r600_resource(resource);
- uint32_t *desc_list = desc->desc_list + 4;
+ unsigned desc_slot_offset = desc_slot * 16;
+ uint32_t *desc_list = desc->list + desc_slot_offset + 4;
uint64_t old_desc_va;
assert(resource->target == PIPE_BUFFER);
/* Retrieve the old buffer addr from the descriptor. */
- old_desc_va = desc_list[0];
- old_desc_va |= ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc_list[1]) << 32);
+ old_desc_va = si_desc_extract_buffer_address(desc_list);
if (old_desc_va != buf->gpu_address + offset) {
/* The buffer has been invalidated when the handle wasn't
@@ -2403,8 +2208,7 @@
*/
si_set_buf_desc_address(buf, offset, &desc_list[0]);
- desc->dirty = true;
- sctx->bindless_descriptors_dirty = true;
+ *desc_dirty = true;
}
}
@@ -2436,20 +2240,17 @@
memcpy(&tex_handle->sstate, sstate, sizeof(*sstate));
ctx->delete_sampler_state(ctx, sstate);
- tex_handle->desc = si_create_bindless_descriptor(sctx, desc_list,
- sizeof(desc_list));
- if (!tex_handle->desc) {
+ tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
+ sizeof(desc_list));
+ if (!tex_handle->desc_slot) {
FREE(tex_handle);
return 0;
}
- handle = tex_handle->desc->buffer->gpu_address +
- tex_handle->desc->offset;
+ handle = tex_handle->desc_slot;
if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)handle,
tex_handle)) {
- pb_slab_free(&sctx->bindless_descriptor_slabs,
- &tex_handle->desc->entry);
FREE(tex_handle);
return 0;
}
@@ -2473,10 +2274,11 @@
tex_handle = (struct si_texture_handle *)entry->data;
+ /* Allow this descriptor slot to be re-used. */
+ util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot);
+
pipe_sampler_view_reference(&tex_handle->view, NULL);
_mesa_hash_table_remove(sctx->tex_handles, entry);
- pb_slab_free(&sctx->bindless_descriptor_slabs,
- &tex_handle->desc->entry);
FREE(tex_handle);
}
@@ -2518,18 +2320,21 @@
p_atomic_read(&rtex->framebuffers_bound))
sctx->need_check_render_feedback = true;
- /* Re-upload the descriptor if it has been updated
- * while it wasn't resident.
- */
- si_update_resident_texture_descriptor(sctx, tex_handle);
- if (tex_handle->desc->dirty)
- sctx->bindless_descriptors_dirty = true;
+ si_update_bindless_texture_descriptor(sctx, tex_handle);
} else {
- si_invalidate_bindless_buf_desc(sctx, tex_handle->desc,
- sview->base.texture,
- sview->base.u.buf.offset);
+ si_update_bindless_buffer_descriptor(sctx,
+ tex_handle->desc_slot,
+ sview->base.texture,
+ sview->base.u.buf.offset,
+ &tex_handle->desc_dirty);
}
+ /* Re-upload the descriptor if it has been updated while it
+ * wasn't resident.
+ */
+ if (tex_handle->desc_dirty)
+ sctx->bindless_descriptors_dirty = true;
+
/* Add the texture handle to the per-context list. */
util_dynarray_append(&sctx->resident_tex_handles,
struct si_texture_handle *, tex_handle);
@@ -2537,11 +2342,6 @@
/* Add the buffers to the current CS in case si_begin_new_cs()
* is not going to be called.
*/
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- tex_handle->desc->buffer,
- RADEON_USAGE_READWRITE,
- RADEON_PRIO_DESCRIPTORS);
-
si_sampler_view_add_buffer(sctx, sview->base.texture,
RADEON_USAGE_READ,
sview->is_stencil_sampler, false);
@@ -2568,7 +2368,7 @@
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_image_handle *img_handle;
- uint32_t desc_list[16];
+ uint32_t desc_list[8];
uint64_t handle;
if (!view || !view->resource)
@@ -2583,20 +2383,17 @@
si_set_shader_image_desc(sctx, view, false, &desc_list[0]);
- img_handle->desc = si_create_bindless_descriptor(sctx, desc_list,
- sizeof(desc_list));
- if (!img_handle->desc) {
+ img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
+ sizeof(desc_list));
+ if (!img_handle->desc_slot) {
FREE(img_handle);
return 0;
}
- handle = img_handle->desc->buffer->gpu_address +
- img_handle->desc->offset;
+ handle = img_handle->desc_slot;
if (!_mesa_hash_table_insert(sctx->img_handles, (void *)handle,
img_handle)) {
- pb_slab_free(&sctx->bindless_descriptor_slabs,
- &img_handle->desc->entry);
FREE(img_handle);
return 0;
}
@@ -2622,8 +2419,6 @@
util_copy_image_view(&img_handle->view, NULL);
_mesa_hash_table_remove(sctx->img_handles, entry);
- pb_slab_free(&sctx->bindless_descriptor_slabs,
- &img_handle->desc->entry);
FREE(img_handle);
}
@@ -2661,19 +2456,21 @@
p_atomic_read(&rtex->framebuffers_bound))
sctx->need_check_render_feedback = true;
- /* Re-upload the descriptor if it has been updated
- * while it wasn't resident.
- */
- si_update_resident_image_descriptor(sctx, img_handle);
- if (img_handle->desc->dirty)
- sctx->bindless_descriptors_dirty = true;
-
+ si_update_bindless_image_descriptor(sctx, img_handle);
} else {
- si_invalidate_bindless_buf_desc(sctx, img_handle->desc,
- view->resource,
- view->u.buf.offset);
+ si_update_bindless_buffer_descriptor(sctx,
+ img_handle->desc_slot,
+ view->resource,
+ view->u.buf.offset,
+ &img_handle->desc_dirty);
}
+ /* Re-upload the descriptor if it has been updated while it
+ * wasn't resident.
+ */
+ if (img_handle->desc_dirty)
+ sctx->bindless_descriptors_dirty = true;
+
/* Add the image handle to the per-context list. */
util_dynarray_append(&sctx->resident_img_handles,
struct si_image_handle *, img_handle);
@@ -2681,11 +2478,6 @@
/* Add the buffers to the current CS in case si_begin_new_cs()
* is not going to be called.
*/
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- img_handle->desc->buffer,
- RADEON_USAGE_READWRITE,
- RADEON_PRIO_DESCRIPTORS);
-
si_sampler_view_add_buffer(sctx, view->resource,
(access & PIPE_IMAGE_ACCESS_WRITE) ?
RADEON_USAGE_READWRITE :
@@ -2715,20 +2507,6 @@
num_resident_img_handles = sctx->resident_img_handles.size /
sizeof(struct si_image_handle *);
- /* Skip adding the bindless descriptors when no handles are resident.
- */
- if (!num_resident_tex_handles && !num_resident_img_handles)
- return;
-
- /* Add all bindless descriptors. */
- util_dynarray_foreach(&sctx->bindless_descriptors,
- struct r600_resource *, desc) {
-
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, *desc,
- RADEON_USAGE_READWRITE,
- RADEON_PRIO_DESCRIPTORS);
- }
-
/* Add all resident texture handles. */
util_dynarray_foreach(&sctx->resident_tex_handles,
struct si_texture_handle *, tex_handle) {
@@ -2756,52 +2534,9 @@
/* INIT/DEINIT/UPLOAD */
-/* GFX9 has only 4KB of CE, while previous chips had 32KB. In order
- * to make CE RAM as useful as possible, this defines limits
- * for the number slots that can be in CE RAM on GFX9. If a shader
- * is using more, descriptors will be uploaded to memory directly and
- * CE won't be used.
- *
- * These numbers are based on shader-db.
- */
-static unsigned gfx9_max_ce_samplers[SI_NUM_SHADERS] = {
- [PIPE_SHADER_VERTEX] = 0,
- [PIPE_SHADER_TESS_CTRL] = 0,
- [PIPE_SHADER_TESS_EVAL] = 1,
- [PIPE_SHADER_GEOMETRY] = 0,
- [PIPE_SHADER_FRAGMENT] = 24,
- [PIPE_SHADER_COMPUTE] = 16,
-};
-static unsigned gfx9_max_ce_images[SI_NUM_SHADERS] = {
- /* these must be even due to slot alignment */
- [PIPE_SHADER_VERTEX] = 0,
- [PIPE_SHADER_TESS_CTRL] = 0,
- [PIPE_SHADER_TESS_EVAL] = 0,
- [PIPE_SHADER_GEOMETRY] = 0,
- [PIPE_SHADER_FRAGMENT] = 2,
- [PIPE_SHADER_COMPUTE] = 8,
-};
-static unsigned gfx9_max_ce_const_buffers[SI_NUM_SHADERS] = {
- [PIPE_SHADER_VERTEX] = 9,
- [PIPE_SHADER_TESS_CTRL] = 3,
- [PIPE_SHADER_TESS_EVAL] = 5,
- [PIPE_SHADER_GEOMETRY] = 0,
- [PIPE_SHADER_FRAGMENT] = 8,
- [PIPE_SHADER_COMPUTE] = 6,
-};
-static unsigned gfx9_max_ce_shader_buffers[SI_NUM_SHADERS] = {
- [PIPE_SHADER_VERTEX] = 0,
- [PIPE_SHADER_TESS_CTRL] = 0,
- [PIPE_SHADER_TESS_EVAL] = 0,
- [PIPE_SHADER_GEOMETRY] = 0,
- [PIPE_SHADER_FRAGMENT] = 12,
- [PIPE_SHADER_COMPUTE] = 13,
-};
-
void si_init_all_descriptors(struct si_context *sctx)
{
int i;
- unsigned ce_offset = 0;
STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
@@ -2811,50 +2546,31 @@
bool gfx9_gs = false;
unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
+ struct si_descriptors *desc;
- unsigned first_sampler_ce_slot = 0;
- unsigned num_sampler_ce_slots = num_sampler_slots;
-
- unsigned first_buffer_ce_slot = 0;
- unsigned num_buffer_ce_slots = num_buffer_slots;
-
- /* Adjust CE slot ranges based on GFX9 CE RAM limits. */
if (sctx->b.chip_class >= GFX9) {
gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
gfx9_gs = i == PIPE_SHADER_GEOMETRY;
-
- first_sampler_ce_slot =
- si_get_image_slot(gfx9_max_ce_images[i] - 1) / 2;
- num_sampler_ce_slots = gfx9_max_ce_images[i] / 2 +
- gfx9_max_ce_samplers[i];
-
- first_buffer_ce_slot =
- si_get_shaderbuf_slot(gfx9_max_ce_shader_buffers[i] - 1);
- num_buffer_ce_slots = gfx9_max_ce_shader_buffers[i] +
- gfx9_max_ce_const_buffers[i];
}
- si_init_buffer_resources(sctx, &sctx->const_and_shader_buffers[i],
- si_const_and_shader_buffer_descriptors(sctx, i),
+ desc = si_const_and_shader_buffer_descriptors(sctx, i);
+ si_init_buffer_resources(&sctx->const_and_shader_buffers[i], desc,
num_buffer_slots,
- first_buffer_ce_slot, num_buffer_ce_slots,
gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS :
gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS :
SI_SGPR_CONST_AND_SHADER_BUFFERS,
RADEON_USAGE_READWRITE,
RADEON_USAGE_READ,
RADEON_PRIO_SHADER_RW_BUFFER,
- RADEON_PRIO_CONST_BUFFER,
- &ce_offset);
+ RADEON_PRIO_CONST_BUFFER);
+ desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
- struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, i);
- si_init_descriptors(sctx, desc,
+ desc = si_sampler_and_image_descriptors(sctx, i);
+ si_init_descriptors(desc,
gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
SI_SGPR_SAMPLERS_AND_IMAGES,
- 16, num_sampler_slots,
- first_sampler_ce_slot, num_sampler_ce_slots,
- &ce_offset);
+ 16, num_sampler_slots);
int j;
for (j = 0; j < SI_NUM_IMAGES; j++)
@@ -2863,27 +2579,28 @@
memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
}
- si_init_buffer_resources(sctx, &sctx->rw_buffers,
+ si_init_buffer_resources(&sctx->rw_buffers,
&sctx->descriptors[SI_DESCS_RW_BUFFERS],
- SI_NUM_RW_BUFFERS, 0, SI_NUM_RW_BUFFERS,
- SI_SGPR_RW_BUFFERS,
+ SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
/* The second set of usage/priority is used by
* const buffers in RW buffer slots. */
RADEON_USAGE_READWRITE, RADEON_USAGE_READ,
- RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
- &ce_offset);
+ RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);
sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS;
- si_init_descriptors(sctx, &sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
- 4, SI_NUM_VERTEX_BUFFERS, 0, 0, NULL);
+ si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
+ 4, SI_NUM_VERTEX_BUFFERS);
+ FREE(sctx->vertex_buffers.list); /* not used */
+ sctx->vertex_buffers.list = NULL;
- sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
- sctx->total_ce_ram_allocated = ce_offset;
+ /* Initialize an array of 1024 bindless descriptors, when the limit is
+ * reached, just make it larger and re-upload the whole array.
+ */
+ si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
+ SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
+ 1024);
- if (sctx->b.chip_class >= GFX9)
- assert(ce_offset <= 4096);
- else
- assert(ce_offset <= 32768);
+ sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
/* Set pipe_context functions. */
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
@@ -2892,7 +2609,6 @@
sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
sctx->b.b.set_shader_buffers = si_set_shader_buffers;
sctx->b.b.set_sampler_views = si_set_sampler_views;
- sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
sctx->b.b.create_texture_handle = si_create_texture_handle;
sctx->b.b.delete_texture_handle = si_delete_texture_handle;
sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident;
@@ -2903,8 +2619,8 @@
sctx->b.rebind_buffer = si_rebind_buffer;
/* Shader user data. */
- si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
- si_emit_graphics_shader_userdata);
+ si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers,
+ si_emit_graphics_shader_pointers);
/* Set default and immutable mappings. */
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
@@ -2923,9 +2639,8 @@
si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
}
-bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
+static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)
{
- const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
unsigned dirty = sctx->descriptors_dirty & mask;
/* Assume nothing will go wrong: */
@@ -2934,8 +2649,7 @@
while (dirty) {
unsigned i = u_bit_scan(&dirty);
- if (!si_upload_descriptors(sctx, &sctx->descriptors[i],
- &sctx->shader_userdata.atom))
+ if (!si_upload_descriptors(sctx, &sctx->descriptors[i]))
return false;
}
@@ -2946,6 +2660,12 @@
return true;
}
+bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
+{
+ const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
+ return si_upload_shader_descriptors(sctx, mask);
+}
+
bool si_upload_compute_shader_descriptors(struct si_context *sctx)
{
/* Does not update rw_buffers as that is not needed for compute shaders
@@ -2953,23 +2673,7 @@
*/
const unsigned mask = u_bit_consecutive(SI_DESCS_FIRST_COMPUTE,
SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
- unsigned dirty = sctx->descriptors_dirty & mask;
-
- /* Assume nothing will go wrong: */
- sctx->shader_pointers_dirty |= dirty;
-
- while (dirty) {
- unsigned i = u_bit_scan(&dirty);
-
- if (!si_upload_descriptors(sctx, &sctx->descriptors[i], NULL))
- return false;
- }
-
- sctx->descriptors_dirty &= ~mask;
-
- si_upload_bindless_descriptors(sctx);
-
- return true;
+ return si_upload_shader_descriptors(sctx, mask);
}
void si_release_all_descriptors(struct si_context *sctx)
@@ -2979,7 +2683,7 @@
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
si_const_and_shader_buffer_descriptors(sctx, i));
- si_release_sampler_views(&sctx->samplers[i].views);
+ si_release_sampler_views(&sctx->samplers[i]);
si_release_image_views(&sctx->images[i]);
}
si_release_buffer_resources(&sctx->rw_buffers,
@@ -2989,7 +2693,10 @@
for (i = 0; i < SI_NUM_DESCS; ++i)
si_release_descriptors(&sctx->descriptors[i]);
+
+ sctx->vertex_buffers.list = NULL; /* points into a mapped buffer */
si_release_descriptors(&sctx->vertex_buffers);
+ si_release_bindless_descriptors(sctx);
}
void si_all_descriptors_begin_new_cs(struct si_context *sctx)
@@ -2998,7 +2705,7 @@
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
- si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
+ si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);
si_image_views_begin_new_cs(sctx, &sctx->images[i]);
}
si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers);
@@ -3006,8 +2713,9 @@
for (i = 0; i < SI_NUM_DESCS; ++i)
si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
+ si_descriptors_begin_new_cs(sctx, &sctx->bindless_descriptors);
- si_shader_userdata_begin_new_cs(sctx);
+ si_shader_pointers_begin_new_cs(sctx);
}
void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
@@ -3030,26 +2738,6 @@
first + count > desc->first_active_slot + desc->num_active_slots)
sctx->descriptors_dirty |= 1u << desc_idx;
- /* Enable or disable CE for this descriptor array. */
- bool used_ce = desc->uses_ce;
- desc->uses_ce = desc->first_ce_slot <= first &&
- desc->first_ce_slot + desc->num_ce_slots >= first + count;
-
- if (desc->uses_ce != used_ce) {
- /* Upload or dump descriptors if we're disabling or enabling CE,
- * respectively. */
- sctx->descriptors_dirty |= 1u << desc_idx;
-
- /* If we're enabling CE, re-upload all descriptors to CE RAM.
- * When CE was disabled, uploads to CE RAM stopped.
- */
- if (desc->uses_ce) {
- desc->dirty_mask |=
- u_bit_consecutive64(desc->first_ce_slot,
- desc->num_ce_slots);
- }
- }
-
desc->first_active_slot = first;
desc->num_active_slots = count;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_dma.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_dma.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_dma.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_dma.c 2018-01-18 21:30:28.000000000 +0000
@@ -62,7 +62,7 @@
}
ncopy = DIV_ROUND_UP(size, max_size);
- r600_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc);
+ si_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc);
for (i = 0; i < ncopy; i++) {
count = MIN2(size, max_size);
@@ -104,7 +104,7 @@
/* the same maximum size as for copying */
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
- r600_need_dma_space(&sctx->b, ncopy * 4, rdst, NULL);
+ si_need_dma_space(&sctx->b, ncopy * 4, rdst, NULL);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
@@ -193,7 +193,7 @@
mt = G_009910_MICRO_TILE_MODE(tile_mode);
size = copy_height * pitch;
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
- r600_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
+ si_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
for (i = 0; i < ncopy; i++) {
cheight = copy_height;
@@ -261,7 +261,7 @@
goto fallback;
if (src_box->depth > 1 ||
- !r600_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
+ !si_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
dstz, rsrc, src_level, src_box))
goto fallback;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_driinfo.h mesa-17.3.3/src/gallium/drivers/radeonsi/si_driinfo.h
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_driinfo.h 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_driinfo.h 2018-01-18 21:31:14.000000000 +0000
@@ -0,0 +1,49 @@
+// DO NOT EDIT - this file is automatically generated by merge_driinfo.py
+
+/*
+Use as:
+
+#include "xmlpool.h"
+
+static const char driinfo_xml[] =
+#include "this_file"
+;
+*/
+
+DRI_CONF_BEGIN
+ DRI_CONF_SECTION_PERFORMANCE
+ DRI_CONF_MESA_GLTHREAD("false")
+ DRI_CONF_MESA_NO_ERROR("false")
+ DRI_CONF_DISABLE_EXT_BUFFER_AGE("false")
+ DRI_CONF_DISABLE_OML_SYNC_CONTROL("false")
+ DRI_CONF_RADEONSI_ENABLE_SISCHED("false")
+ DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false")
+ DRI_CONF_RADEONSI_COMMUTATIVE_BLEND_ADD("false")
+ DRI_CONF_SECTION_END
+ DRI_CONF_SECTION_QUALITY
+ DRI_CONF_PP_CELSHADE(0)
+ DRI_CONF_PP_NORED(0)
+ DRI_CONF_PP_NOGREEN(0)
+ DRI_CONF_PP_NOBLUE(0)
+ DRI_CONF_PP_JIMENEZMLAA(0, 0, 32)
+ DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32)
+ DRI_CONF_SECTION_END
+ DRI_CONF_SECTION_DEBUG
+ DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false")
+ DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false")
+ DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
+ DRI_CONF_DISABLE_SHADER_BIT_ENCODING("false")
+ DRI_CONF_FORCE_GLSL_VERSION(0)
+ DRI_CONF_ALLOW_GLSL_EXTENSION_DIRECTIVE_MIDSHADER("false")
+ DRI_CONF_ALLOW_GLSL_BUILTIN_VARIABLE_REDECLARATION("false")
+ DRI_CONF_ALLOW_GLSL_CROSS_STAGE_INTERPOLATION_MISMATCH("false")
+ DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
+ DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
+ DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false")
+ DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
+ DRI_CONF_SECTION_END
+ DRI_CONF_SECTION_MISCELLANEOUS
+ DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
+ DRI_CONF_GLSL_ZERO_INIT("false")
+ DRI_CONF_SECTION_END
+DRI_CONF_END
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_hw_context.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_hw_context.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_hw_context.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_hw_context.c 2018-01-18 21:30:28.000000000 +0000
@@ -27,41 +27,17 @@
#include "si_pipe.h"
#include "radeon/r600_cs.h"
-static unsigned si_descriptor_list_cs_space(unsigned count, unsigned element_size)
+void si_destroy_saved_cs(struct si_saved_cs *scs)
{
- /* Ensure we have enough space to start a new range in a hole */
- assert(element_size >= 3);
-
- /* 5 dwords for write to L2 + 3 bytes for the packet header of
- * every disjoint range written to CE RAM.
- */
- return 5 + (3 * count / 2) + count * element_size;
-}
-
-static unsigned si_ce_needed_cs_space(void)
-{
- unsigned space = 0;
-
- space += si_descriptor_list_cs_space(SI_NUM_SHADER_BUFFERS +
- SI_NUM_CONST_BUFFERS, 4);
- /* two 8-byte images share one 16-byte slot */
- space += si_descriptor_list_cs_space(SI_NUM_IMAGES / 2 +
- SI_NUM_SAMPLERS, 16);
- space *= SI_NUM_SHADERS;
-
- space += si_descriptor_list_cs_space(SI_NUM_RW_BUFFERS, 4);
-
- /* Increment CE counter packet */
- space += 2;
-
- return space;
+ si_clear_saved_cs(&scs->gfx);
+ r600_resource_reference(&scs->trace_buf, NULL);
+ free(scs);
}
/* initialize */
void si_need_cs_space(struct si_context *ctx)
{
struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
- struct radeon_winsys_cs *ce_ib = ctx->ce_ib;
/* There is no need to flush the DMA IB here, because
* r600_need_dma_space always flushes the GFX IB if there is
@@ -87,8 +63,7 @@
/* If the CS is sufficiently large, don't count the space needed
* and just flush if there is not enough space left.
*/
- if (!ctx->b.ws->cs_check_space(cs, 2048) ||
- (ce_ib && !ctx->b.ws->cs_check_space(ce_ib, si_ce_needed_cs_space())))
+ if (!ctx->b.ws->cs_check_space(cs, 2048))
ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
@@ -105,10 +80,10 @@
if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))
return;
- if (r600_check_device_reset(&ctx->b))
+ if (si_check_device_reset(&ctx->b))
return;
- if (ctx->screen->b.debug_flags & DBG_CHECK_VM)
+ if (ctx->screen->b.debug_flags & DBG(CHECK_VM))
flags &= ~RADEON_FLUSH_ASYNC;
/* If the state tracker is flushing the GFX IB, r600_flush_from_st is
@@ -123,11 +98,13 @@
ctx->gfx_flush_in_progress = true;
- /* This CE dump should be done in parallel with the last draw. */
- if (ctx->ce_ib)
- si_ce_save_all_descriptors_at_ib_end(ctx);
+ si_preflush_suspend_features(&ctx->b);
- r600_preflush_suspend_features(&ctx->b);
+ ctx->streamout.suspended = false;
+ if (ctx->streamout.begin_emitted) {
+ si_emit_streamout_end(ctx);
+ ctx->streamout.suspended = true;
+ }
ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_PS_PARTIAL_FLUSH;
@@ -139,15 +116,13 @@
si_emit_cache_flush(ctx);
- if (ctx->trace_buf)
+ if (ctx->current_saved_cs) {
si_trace_emit(ctx);
+ si_log_hw_flush(ctx);
- if (ctx->is_debug) {
/* Save the IB for debug contexts. */
- radeon_clear_saved_cs(&ctx->last_gfx);
- radeon_save_cs(ws, cs, &ctx->last_gfx);
- r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
- r600_resource_reference(&ctx->trace_buf, NULL);
+ si_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
+ ctx->current_saved_cs->flushed = true;
}
/* Flush the CS. */
@@ -157,37 +132,56 @@
ctx->b.num_gfx_cs_flushes++;
/* Check VM faults if needed. */
- if (ctx->screen->b.debug_flags & DBG_CHECK_VM) {
+ if (ctx->screen->b.debug_flags & DBG(CHECK_VM)) {
/* Use conservative timeout 800ms, after which we won't wait any
* longer and assume the GPU is hung.
*/
ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_gfx_fence, 800*1000*1000);
- si_check_vm_faults(&ctx->b, &ctx->last_gfx, RING_GFX);
+ si_check_vm_faults(&ctx->b, &ctx->current_saved_cs->gfx, RING_GFX);
}
+ if (ctx->current_saved_cs)
+ si_saved_cs_reference(&ctx->current_saved_cs, NULL);
+
si_begin_new_cs(ctx);
ctx->gfx_flush_in_progress = false;
}
-void si_begin_new_cs(struct si_context *ctx)
+static void si_begin_cs_debug(struct si_context *ctx)
{
- if (ctx->is_debug) {
- uint32_t zero = 0;
+ static const uint32_t zeros[1];
+ assert(!ctx->current_saved_cs);
+
+ ctx->current_saved_cs = calloc(1, sizeof(*ctx->current_saved_cs));
+ if (!ctx->current_saved_cs)
+ return;
+
+ pipe_reference_init(&ctx->current_saved_cs->reference, 1);
- /* Create a buffer used for writing trace IDs and initialize it to 0. */
- assert(!ctx->trace_buf);
- ctx->trace_buf = (struct r600_resource*)
+ ctx->current_saved_cs->trace_buf = (struct r600_resource*)
pipe_buffer_create(ctx->b.b.screen, 0,
- PIPE_USAGE_STAGING, 4);
- if (ctx->trace_buf)
- pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
- 0, sizeof(zero), &zero);
- ctx->trace_id = 0;
+ PIPE_USAGE_STAGING, 8);
+ if (!ctx->current_saved_cs->trace_buf) {
+ free(ctx->current_saved_cs);
+ ctx->current_saved_cs = NULL;
+ return;
}
- if (ctx->trace_buf)
- si_trace_emit(ctx);
+ pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->current_saved_cs->trace_buf->b.b,
+ 0, sizeof(zeros), zeros);
+ ctx->current_saved_cs->trace_id = 0;
+
+ si_trace_emit(ctx);
+
+ radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, ctx->current_saved_cs->trace_buf,
+ RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
+}
+
+void si_begin_new_cs(struct si_context *ctx)
+{
+ if (ctx->is_debug)
+ si_begin_cs_debug(ctx);
/* Flush read caches at the beginning of CS not flushed by the kernel. */
if (ctx->b.chip_class >= CIK)
@@ -206,42 +200,65 @@
if (ctx->init_config_gs_rings)
si_pm4_emit(ctx, ctx->init_config_gs_rings);
- if (ctx->ce_preamble_ib)
- si_ce_enable_loads(ctx->ce_preamble_ib);
- else if (ctx->ce_ib)
- si_ce_enable_loads(ctx->ce_ib);
-
- if (ctx->ce_ib)
- si_ce_restore_all_descriptors_at_ib_start(ctx);
-
- if (ctx->b.chip_class >= CIK)
- si_mark_atom_dirty(ctx, &ctx->prefetch_L2);
-
- ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
- ctx->framebuffer.dirty_zsbuf = true;
+ if (ctx->queued.named.ls)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_LS;
+ if (ctx->queued.named.hs)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_HS;
+ if (ctx->queued.named.es)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_ES;
+ if (ctx->queued.named.gs)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_GS;
+ if (ctx->queued.named.vs)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_VS;
+ if (ctx->queued.named.ps)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_PS;
+ if (ctx->vertex_buffers.buffer && ctx->vertex_elements)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
+
+ /* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */
+ bool has_clear_state = ctx->screen->has_clear_state;
+ if (has_clear_state) {
+ ctx->framebuffer.dirty_cbufs =
+ u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs);
+ /* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */
+ ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL;
+ } else {
+ ctx->framebuffer.dirty_cbufs = u_bit_consecutive(0, 8);
+ ctx->framebuffer.dirty_zsbuf = true;
+ }
+ /* This should always be marked as dirty to set the framebuffer scissor
+ * at least. */
si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
si_mark_atom_dirty(ctx, &ctx->clip_regs);
- si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
+ /* CLEAR_STATE sets zeros. */
+ if (!has_clear_state || ctx->clip_state.any_nonzeros)
+ si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
ctx->msaa_sample_locs.nr_samples = 0;
si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
- si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
+ /* CLEAR_STATE sets 0xffff. */
+ if (!has_clear_state || ctx->sample_mask.sample_mask != 0xffff)
+ si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
si_mark_atom_dirty(ctx, &ctx->cb_render_state);
- si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
+ /* CLEAR_STATE sets zeros. */
+ if (!has_clear_state || ctx->blend_color.any_nonzeros)
+ si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
si_mark_atom_dirty(ctx, &ctx->db_render_state);
+ if (ctx->b.chip_class >= GFX9)
+ si_mark_atom_dirty(ctx, &ctx->dpbb_state);
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
si_mark_atom_dirty(ctx, &ctx->spi_map);
- si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
+ si_mark_atom_dirty(ctx, &ctx->streamout.enable_atom);
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);
si_all_resident_buffers_begin_new_cs(ctx);
- ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- si_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
- si_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
+ ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+ si_mark_atom_dirty(ctx, &ctx->viewports.atom);
si_mark_atom_dirty(ctx, &ctx->scratch_state);
if (ctx->scratch_buffer) {
@@ -249,7 +266,12 @@
&ctx->scratch_buffer->b.b);
}
- r600_postflush_resume_features(&ctx->b);
+ if (ctx->streamout.suspended) {
+ ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
+ si_streamout_buffers_dirty(ctx);
+ }
+
+ si_postflush_resume_features(&ctx->b);
assert(!ctx->b.gfx.cs->prev_dw);
ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_perfcounter.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_perfcounter.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_perfcounter.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_perfcounter.c 2018-01-18 21:30:28.000000000 +0000
@@ -403,6 +403,30 @@
};
+static struct si_pc_block groups_gfx9[] = {
+ { &cik_CB, 438, 4 },
+ { &cik_CPF, 32 },
+ { &cik_DB, 328, 4 },
+ { &cik_GRBM, 38 },
+ { &cik_GRBMSE, 16 },
+ { &cik_PA_SU, 292 },
+ { &cik_PA_SC, 491 },
+ { &cik_SPI, 196 },
+ { &cik_SQ, 374 },
+ { &cik_SX, 208 },
+ { &cik_TA, 119, 16 },
+ { &cik_TCA, 35, 2 },
+ { &cik_TCC, 256, 16 },
+ { &cik_TD, 57, 16 },
+ { &cik_TCP, 85, 16 },
+ { &cik_GDS, 121 },
+ { &cik_VGT, 148 },
+ { &cik_IA, 32 },
+ { &cik_WD, 58 },
+ { &cik_CPG, 59 },
+ { &cik_CPC, 35 },
+};
+
static void si_pc_get_size(struct r600_perfcounter_block *group,
unsigned count, unsigned *selectors,
unsigned *num_select_dw, unsigned *num_read_dw)
@@ -578,7 +602,7 @@
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_START) | EVENT_INDEX(0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_START_COUNTING));
}
@@ -590,14 +614,15 @@
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
- r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1,
- buffer, va, 1, 0);
- r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
+ si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DATA_SEL_VALUE_32BIT,
+ buffer, va, 0, R600_NOT_QUERY);
+ si_gfx_wait_fence(ctx, va, 0, 0xffffffff);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | EVENT_INDEX(0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
S_036020_PERFMON_SAMPLE_ENABLE(1));
@@ -651,7 +676,7 @@
static void si_pc_cleanup(struct r600_common_screen *rscreen)
{
- r600_perfcounters_do_destroy(rscreen->perfcounters);
+ si_perfcounters_do_destroy(rscreen->perfcounters);
rscreen->perfcounters = NULL;
}
@@ -671,8 +696,11 @@
blocks = groups_VI;
num_blocks = ARRAY_SIZE(groups_VI);
break;
- case SI:
case GFX9:
+ blocks = groups_gfx9;
+ num_blocks = ARRAY_SIZE(groups_gfx9);
+ break;
+ case SI:
default:
return; /* not implemented */
}
@@ -689,7 +717,7 @@
return;
pc->num_start_cs_dwords = 14;
- pc->num_stop_cs_dwords = 14 + r600_gfx_write_fence_dwords(&screen->b);
+ pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(&screen->b);
pc->num_instance_cs_dwords = 3;
pc->num_shaders_cs_dwords = 4;
@@ -706,7 +734,7 @@
pc->emit_read = si_pc_emit_read;
pc->cleanup = si_pc_cleanup;
- if (!r600_perfcounters_init(pc, num_blocks))
+ if (!si_perfcounters_init(pc, num_blocks))
goto error;
for (i = 0; i < num_blocks; ++i) {
@@ -718,7 +746,7 @@
instances = 2;
}
- r600_perfcounters_add_block(&screen->b, pc,
+ si_perfcounters_add_block(&screen->b, pc,
block->b->name,
block->b->flags,
block->b->num_counters,
@@ -731,5 +759,5 @@
return;
error:
- r600_perfcounters_do_destroy(pc);
+ si_perfcounters_do_destroy(pc);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_pipe.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_pipe.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_pipe.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_pipe.c 2018-01-18 21:30:28.000000000 +0000
@@ -28,12 +28,16 @@
#include "radeon/radeon_uvd.h"
#include "util/hash_table.h"
+#include "util/u_log.h"
#include "util/u_memory.h"
#include "util/u_suballoc.h"
#include "util/u_tests.h"
+#include "util/xmlconfig.h"
#include "vl/vl_decoder.h"
#include "../ddebug/dd_util.h"
+#include "compiler/nir/nir.h"
+
/*
* pipe_context
*/
@@ -51,10 +55,6 @@
si_release_all_descriptors(sctx);
- if (sctx->ce_suballocator)
- u_suballocator_destroy(sctx->ce_suballocator);
-
- r600_resource_reference(&sctx->ce_ram_saved_buffer, NULL);
pipe_resource_reference(&sctx->esgs_ring, NULL);
pipe_resource_reference(&sctx->gsvs_ring, NULL);
pipe_resource_reference(&sctx->tf_ring, NULL);
@@ -84,20 +84,25 @@
sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_eliminate_fastclear);
if (sctx->custom_blend_dcc_decompress)
sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_dcc_decompress);
+ if (sctx->vs_blit_pos)
+ sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_pos);
+ if (sctx->vs_blit_pos_layered)
+ sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_pos_layered);
+ if (sctx->vs_blit_color)
+ sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_color);
+ if (sctx->vs_blit_color_layered)
+ sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_color_layered);
+ if (sctx->vs_blit_texcoord)
+ sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_texcoord);
if (sctx->blitter)
util_blitter_destroy(sctx->blitter);
- r600_common_context_cleanup(&sctx->b);
+ si_common_context_cleanup(&sctx->b);
LLVMDisposeTargetMachine(sctx->tm);
- r600_resource_reference(&sctx->trace_buf, NULL);
- r600_resource_reference(&sctx->last_trace_buf, NULL);
- radeon_clear_saved_cs(&sctx->last_gfx);
-
- pb_slabs_deinit(&sctx->bindless_descriptor_slabs);
- util_dynarray_fini(&sctx->bindless_descriptors);
+ si_saved_cs_reference(&sctx->current_saved_cs, NULL);
_mesa_hash_table_destroy(sctx->tex_handles, NULL);
_mesa_hash_table_destroy(sctx->img_handles, NULL);
@@ -132,6 +137,9 @@
struct si_context *sctx = (struct si_context *)ctx;
dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number);
+
+ if (sctx->b.log)
+ u_log_printf(sctx->b.log, "\nString marker: %*s\n", len, string);
}
static LLVMTargetMachineRef
@@ -144,16 +152,26 @@
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s",
sscreen->b.chip_class >= GFX9 ? ",+xnack" : ",-xnack",
sscreen->llvm_has_working_vgpr_indexing ? "" : ",-promote-alloca",
- sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : "");
+ sscreen->b.debug_flags & DBG(SI_SCHED) ? ",+si-scheduler" : "");
return LLVMCreateTargetMachine(ac_get_llvm_target(triple), triple,
- r600_get_llvm_processor_name(sscreen->b.family),
+ si_get_llvm_processor_name(sscreen->b.family),
features,
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
}
+static void si_set_log_context(struct pipe_context *ctx,
+ struct u_log_context *log)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ sctx->b.log = log;
+
+ if (log)
+ u_log_add_auto_logger(log, si_auto_log_cs, sctx);
+}
+
static struct pipe_context *si_create_context(struct pipe_screen *screen,
unsigned flags)
{
@@ -172,11 +190,12 @@
sctx->b.b.priv = NULL;
sctx->b.b.destroy = si_destroy_context;
sctx->b.b.emit_string_marker = si_emit_string_marker;
+ sctx->b.b.set_log_context = si_set_log_context;
sctx->b.set_atom_dirty = (void *)si_set_atom_dirty;
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
- if (!r600_common_context_init(&sctx->b, &sscreen->b, flags))
+ if (!si_common_context_init(&sctx->b, &sscreen->b, flags))
goto fail;
if (sscreen->b.info.drm_major == 3)
@@ -186,6 +205,8 @@
si_init_compute_functions(sctx);
si_init_cp_dma_functions(sctx);
si_init_debug_functions(sctx);
+ si_init_msaa_functions(sctx);
+ si_init_streamout_functions(sctx);
if (sscreen->b.info.has_hw_decode) {
sctx->b.b.create_video_codec = si_uvd_create_decoder;
@@ -197,45 +218,6 @@
sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
si_context_gfx_flush, sctx);
-
- bool enable_ce = sscreen->b.chip_class != SI && /* SI hangs */
- /* These can't use CE due to a power gating bug in the kernel. */
- sscreen->b.family != CHIP_CARRIZO &&
- sscreen->b.family != CHIP_STONEY;
-
- /* CE is currently disabled by default, because it makes s_load latency
- * worse, because CE IB doesn't run in lockstep with DE.
- * Remove this line after that performance issue has been resolved.
- */
- enable_ce = false;
-
- /* Apply CE overrides. */
- if (sscreen->b.debug_flags & DBG_NO_CE)
- enable_ce = false;
- else if (sscreen->b.debug_flags & DBG_CE)
- enable_ce = true;
-
- if (ws->cs_add_const_ib && enable_ce) {
- sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
- if (!sctx->ce_ib)
- goto fail;
-
- if (ws->cs_add_const_preamble_ib) {
- sctx->ce_preamble_ib =
- ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);
-
- if (!sctx->ce_preamble_ib)
- goto fail;
- }
-
- sctx->ce_suballocator =
- u_suballocator_create(&sctx->b.b, 1024 * 1024, 0,
- PIPE_USAGE_DEFAULT,
- R600_RESOURCE_FLAG_UNMAPPABLE, false);
- if (!sctx->ce_suballocator)
- goto fail;
- }
-
sctx->b.gfx.flush = si_context_gfx_flush;
/* Border colors. */
@@ -260,6 +242,7 @@
si_init_all_descriptors(sctx);
si_init_state_functions(sctx);
si_init_shader_functions(sctx);
+ si_init_viewport_functions(sctx);
si_init_ia_multi_vgt_param_table(sctx);
if (sctx->b.chip_class >= CIK)
@@ -267,13 +250,14 @@
else
si_init_dma_functions(sctx);
- if (sscreen->b.debug_flags & DBG_FORCE_DMA)
+ if (sscreen->b.debug_flags & DBG(FORCE_DMA))
sctx->b.b.resource_copy_region = sctx->b.dma_copy;
sctx->blitter = util_blitter_create(&sctx->b.b);
if (sctx->blitter == NULL)
goto fail;
- sctx->blitter->draw_rectangle = r600_draw_rectangle;
+ sctx->blitter->draw_rectangle = si_draw_rectangle;
+ sctx->blitter->skip_viewport_restore = true;
sctx->sample_mask.sample_mask = 0xffff;
@@ -301,7 +285,7 @@
* if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
if (sctx->b.chip_class == CIK) {
sctx->null_const_buf.buffer =
- r600_aligned_buffer_create(screen,
+ si_aligned_buffer_create(screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT, 16,
sctx->screen->b.info.tcc_cache_line_size);
@@ -355,15 +339,6 @@
sctx->tm = si_create_llvm_target_machine(sscreen);
- /* Create a slab allocator for all bindless descriptors. */
- if (!pb_slabs_init(&sctx->bindless_descriptor_slabs, 6, 6, 1, sctx,
- si_bindless_descriptor_can_reclaim_slab,
- si_bindless_descriptor_slab_alloc,
- si_bindless_descriptor_slab_free))
- goto fail;
-
- util_dynarray_init(&sctx->bindless_descriptors, NULL);
-
/* Bindless handles. */
sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
@@ -389,7 +364,7 @@
struct si_screen *sscreen = (struct si_screen *)screen;
struct pipe_context *ctx;
- if (sscreen->b.debug_flags & DBG_CHECK_VM)
+ if (sscreen->b.debug_flags & DBG(CHECK_VM))
flags |= PIPE_CONTEXT_DEBUG;
ctx = si_create_context(screen, flags);
@@ -410,12 +385,11 @@
/* When shaders are logged to stderr, asynchronous compilation is
* disabled too. */
- if (sscreen->b.debug_flags & (DBG_VS | DBG_TCS | DBG_TES | DBG_GS |
- DBG_PS | DBG_CS))
+ if (sscreen->b.debug_flags & DBG_ALL_SHADERS)
return ctx;
return threaded_context_create(ctx, &sscreen->b.pool_transfers,
- r600_replace_buffer_storage,
+ si_replace_buffer_storage,
&((struct si_context*)ctx)->b.tc);
}
@@ -523,13 +497,16 @@
case PIPE_CAP_BINDLESS_TEXTURE:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_TIME_ELAPSED:
- return 1;
-
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_CLOCK:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
return 1;
case PIPE_CAP_TGSI_VOTE:
@@ -565,6 +542,8 @@
return 4;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ if (sscreen->b.debug_flags & DBG(NIR))
+ return 140; /* no geometry and tessellation shaders yet */
if (si_have_tgsi_compute(sscreen))
return 450;
return 420;
@@ -604,14 +583,17 @@
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
case PIPE_CAP_MAX_WINDOW_RECTANGLES:
- case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_UMA:
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
case PIPE_CAP_POST_DEPTH_COVERAGE:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
+ case PIPE_CAP_NATIVE_FENCE_FD:
+ return sscreen->b.info.has_sync_file;
+
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return si_have_tgsi_compute(sscreen);
@@ -654,7 +636,7 @@
/* Viewports and render targets. */
case PIPE_CAP_MAX_VIEWPORTS:
- return R600_MAX_VIEWPORTS;
+ return SI_MAX_VIEWPORTS;
case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
case PIPE_CAP_MAX_RENDER_TARGETS:
return 8;
@@ -762,9 +744,13 @@
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
+ if (sscreen->b.debug_flags & DBG(NIR) &&
+ (shader == PIPE_SHADER_VERTEX ||
+ shader == PIPE_SHADER_FRAGMENT))
+ return PIPE_SHADER_IR_NIR;
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
- return 3;
+ return 4;
/* Supported boolean features. */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -772,9 +758,14 @@
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
case PIPE_SHADER_CAP_INTEGERS:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
+ case PIPE_SHADER_CAP_FP16:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
@@ -794,13 +785,42 @@
/* Unsupported boolean features. */
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_SUPPORTED_IRS:
- case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
- case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
return 0;
}
return 0;
}
+static const struct nir_shader_compiler_options nir_options = {
+ .vertex_id_zero_based = true,
+ .lower_scmp = true,
+ .lower_flrp32 = true,
+ .lower_fsat = true,
+ .lower_fdiv = true,
+ .lower_sub = true,
+ .lower_ffma = true,
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_snorm_4x8 = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_pack_unorm_4x8 = true,
+ .lower_unpack_snorm_2x16 = true,
+ .lower_unpack_snorm_4x8 = true,
+ .lower_unpack_unorm_2x16 = true,
+ .lower_unpack_unorm_4x8 = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .max_unroll_iterations = 32,
+ .native_integers = true,
+};
+
+static const void *
+si_get_compiler_options(struct pipe_screen *screen,
+ enum pipe_shader_ir ir,
+ enum pipe_shader_type shader)
+{
+ assert(ir == PIPE_SHADER_IR_NIR);
+ return &nir_options;
+}
+
static void si_destroy_screen(struct pipe_screen* pscreen)
{
struct si_screen *sscreen = (struct si_screen *)pscreen;
@@ -833,13 +853,13 @@
struct si_shader_part *part = parts[i];
parts[i] = part->next;
- radeon_shader_binary_clean(&part->binary);
+ si_radeon_shader_binary_clean(&part->binary);
FREE(part);
}
}
mtx_destroy(&sscreen->shader_parts_mutex);
si_destroy_shader_cache(sscreen);
- r600_destroy_common_screen(&sscreen->b);
+ si_destroy_common_screen(&sscreen->b);
}
static bool si_init_gs_info(struct si_screen *sscreen)
@@ -883,7 +903,7 @@
return;
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
- if (!strcmp(family, r600_get_llvm_processor_name(i))) {
+ if (!strcmp(family, si_get_llvm_processor_name(i))) {
/* Override family and chip_class. */
sscreen->b.family = sscreen->b.info.family = i;
@@ -920,25 +940,37 @@
r600_resource(buf)->gpu_address = 0; /* cause a VM fault */
- if (sscreen->b.debug_flags & DBG_TEST_VMFAULT_CP) {
+ if (sscreen->b.debug_flags & DBG(TEST_VMFAULT_CP)) {
si_copy_buffer(sctx, buf, buf, 0, 4, 4, 0);
ctx->flush(ctx, NULL, 0);
puts("VM fault test: CP - done.");
}
- if (sscreen->b.debug_flags & DBG_TEST_VMFAULT_SDMA) {
+ if (sscreen->b.debug_flags & DBG(TEST_VMFAULT_SDMA)) {
sctx->b.dma_clear_buffer(ctx, buf, 0, 4, 0);
ctx->flush(ctx, NULL, 0);
puts("VM fault test: SDMA - done.");
}
- if (sscreen->b.debug_flags & DBG_TEST_VMFAULT_SHADER) {
+ if (sscreen->b.debug_flags & DBG(TEST_VMFAULT_SHADER)) {
util_test_constant_buffer(ctx, buf);
puts("VM fault test: Shader - done.");
}
exit(0);
}
+static void radeonsi_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+ ac_compute_driver_uuid(uuid, PIPE_UUID_SIZE);
+}
+
+static void radeonsi_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen;
+
+ ac_compute_device_uuid(&rscreen->info, uuid, PIPE_UUID_SIZE);
+}
+
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
- unsigned flags)
+ const struct pipe_screen_config *config)
{
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i;
@@ -952,11 +984,23 @@
sscreen->b.b.destroy = si_destroy_screen;
sscreen->b.b.get_param = si_get_param;
sscreen->b.b.get_shader_param = si_get_shader_param;
- sscreen->b.b.resource_create = r600_resource_create_common;
+ sscreen->b.b.get_compiler_options = si_get_compiler_options;
+ sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid;
+ sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid;
+ sscreen->b.b.resource_create = si_resource_create_common;
si_init_screen_state_functions(sscreen);
- if (!r600_common_screen_init(&sscreen->b, ws, flags) ||
+ /* Set these flags in debug_flags early, so that the shader cache takes
+ * them into account.
+ */
+ if (driQueryOptionb(config->options,
+ "glsl_correct_derivatives_after_discard"))
+ sscreen->b.debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL);
+ if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
+ sscreen->b.debug_flags |= DBG(SI_SCHED);
+
+ if (!si_common_screen_init(&sscreen->b, ws) ||
!si_init_gs_info(sscreen) ||
!si_init_shader_cache(sscreen)) {
FREE(sscreen);
@@ -1001,6 +1045,10 @@
sscreen->tess_offchip_block_dw_size =
sscreen->b.family == CHIP_HAWAII ? 4096 : 8192;
+ /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
+ * on SI. */
+ sscreen->has_clear_state = sscreen->b.chip_class >= CIK;
+
sscreen->has_distributed_tess =
sscreen->b.chip_class >= VI &&
sscreen->b.info.max_se >= 2;
@@ -1017,11 +1065,35 @@
sscreen->b.info.pfp_fw_version >= 79 &&
sscreen->b.info.me_fw_version >= 142);
- sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
+ sscreen->has_out_of_order_rast = sscreen->b.chip_class >= VI &&
+ sscreen->b.info.max_se >= 2 &&
+ !(sscreen->b.debug_flags & DBG(NO_OUT_OF_ORDER));
+ sscreen->assume_no_z_fights =
+ driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
+ sscreen->commutative_blend_add =
+ driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
+ sscreen->clear_db_cache_before_clear =
+ driQueryOptionb(config->options, "radeonsi_clear_db_cache_before_clear");
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
sscreen->b.family <= CHIP_POLARIS12) ||
sscreen->b.family == CHIP_VEGA10 ||
sscreen->b.family == CHIP_RAVEN;
+
+ if (sscreen->b.debug_flags & DBG(DPBB)) {
+ sscreen->dpbb_allowed = true;
+ } else {
+ /* Only enable primitive binning on Raven by default. */
+ sscreen->dpbb_allowed = sscreen->b.family == CHIP_RAVEN &&
+ !(sscreen->b.debug_flags & DBG(NO_DPBB));
+ }
+
+ if (sscreen->b.debug_flags & DBG(DFSM)) {
+ sscreen->dfsm_allowed = sscreen->dpbb_allowed;
+ } else {
+ sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
+ !(sscreen->b.debug_flags & DBG(NO_DFSM));
+ }
+
/* While it would be nice not to have this flag, we are constrained
* by the reality that LLVM 5.0 doesn't have working VGPR indexing
* on GFX9.
@@ -1039,24 +1111,26 @@
sscreen->b.has_rbplus = true;
sscreen->b.rbplus_allowed =
- !(sscreen->b.debug_flags & DBG_NO_RB_PLUS) &&
+ !(sscreen->b.debug_flags & DBG(NO_RB_PLUS)) &&
(sscreen->b.family == CHIP_STONEY ||
sscreen->b.family == CHIP_RAVEN);
}
(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
sscreen->use_monolithic_shaders =
- (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
+ (sscreen->b.debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_INV_VMEM_L1;
- if (sscreen->b.chip_class <= VI)
+ if (sscreen->b.chip_class <= VI) {
sscreen->b.barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
+ sscreen->b.barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+ }
sscreen->b.barrier_flags.compute_to_L2 = SI_CONTEXT_CS_PARTIAL_FLUSH;
if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
- sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
+ sscreen->b.debug_flags |= DBG_ALL_SHADERS;
for (i = 0; i < num_compiler_threads; i++)
sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
@@ -1066,12 +1140,12 @@
/* Create the auxiliary context. This must be done last. */
sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0);
- if (sscreen->b.debug_flags & DBG_TEST_DMA)
- r600_test_dma(&sscreen->b);
+ if (sscreen->b.debug_flags & DBG(TEST_DMA))
+ si_test_dma(&sscreen->b);
- if (sscreen->b.debug_flags & (DBG_TEST_VMFAULT_CP |
- DBG_TEST_VMFAULT_SDMA |
- DBG_TEST_VMFAULT_SHADER))
+ if (sscreen->b.debug_flags & (DBG(TEST_VMFAULT_CP) |
+ DBG(TEST_VMFAULT_SDMA) |
+ DBG(TEST_VMFAULT_SHADER)))
si_test_vmfault(sscreen);
return &sscreen->b.b;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_pipe.h mesa-17.3.3/src/gallium/drivers/radeonsi/si_pipe.h
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_pipe.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_pipe.h 2018-01-18 21:30:28.000000000 +0000
@@ -29,6 +29,7 @@
#include "si_shader.h"
#include "util/u_dynarray.h"
+#include "util/u_idalloc.h"
#ifdef PIPE_ARCH_BIG_ENDIAN
#define SI_BIG_ENDIAN 1
@@ -57,7 +58,9 @@
/* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't
* invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */
#define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4)
-/* gaps */
+/* Writeback & invalidate the L2 metadata cache. It can only be coupled with
+ * a CB or DB flush. */
+#define SI_CONTEXT_INV_L2_METADATA (R600_CONTEXT_PRIVATE_FLAG << 5)
/* Framebuffer caches. */
#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6)
#define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 7)
@@ -69,7 +72,16 @@
#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 12)
#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 13)
+#define SI_PREFETCH_VBO_DESCRIPTORS (1 << 0)
+#define SI_PREFETCH_LS (1 << 1)
+#define SI_PREFETCH_HS (1 << 2)
+#define SI_PREFETCH_ES (1 << 3)
+#define SI_PREFETCH_GS (1 << 4)
+#define SI_PREFETCH_VS (1 << 5)
+#define SI_PREFETCH_PS (1 << 6)
+
#define SI_MAX_BORDER_COLORS 4096
+#define SI_MAX_VIEWPORTS 16
#define SIX_BITS 0x3F
struct si_compute;
@@ -80,10 +92,16 @@
struct r600_common_screen b;
unsigned gs_table_depth;
unsigned tess_offchip_block_dw_size;
+ bool has_clear_state;
bool has_distributed_tess;
bool has_draw_indirect_multi;
- bool has_ds_bpermute;
+ bool has_out_of_order_rast;
+ bool assume_no_z_fights;
+ bool commutative_blend_add;
+ bool clear_db_cache_before_clear;
bool has_msaa_sample_loc_bug;
+ bool dpbb_allowed;
+ bool dfsm_allowed;
bool llvm_has_working_vgpr_indexing;
/* Whether shaders are monolithic (1-part) or separate (3-part). */
@@ -128,6 +146,7 @@
struct si_blend_color {
struct r600_atom atom;
struct pipe_blend_color state;
+ bool any_nonzeros;
};
struct si_sampler_view {
@@ -140,6 +159,7 @@
ubyte base_level;
ubyte block_width;
bool is_stencil_sampler;
+ bool is_integer;
bool dcc_incompatible;
};
@@ -150,6 +170,7 @@
unsigned magic;
#endif
uint32_t val[4];
+ uint32_t integer_val[4];
uint32_t upgraded_depth_val[4];
};
@@ -161,13 +182,17 @@
bool uses_scratch;
};
-struct si_textures_info {
- struct si_sampler_views views;
+struct si_samplers {
+ struct pipe_sampler_view *views[SI_NUM_SAMPLERS];
+ struct si_sampler_state *sampler_states[SI_NUM_SAMPLERS];
+
+ /* The i-th bit is set if that element is enabled (non-NULL resource). */
+ unsigned enabled_mask;
uint32_t needs_depth_decompress_mask;
uint32_t needs_color_decompress_mask;
};
-struct si_images_info {
+struct si_images {
struct pipe_image_view views[SI_NUM_IMAGES];
uint32_t needs_color_decompress_mask;
unsigned enabled_mask;
@@ -189,11 +214,35 @@
ubyte dirty_cbufs;
bool dirty_zsbuf;
bool any_dst_linear;
+ bool CB_has_shader_readable_metadata;
+ bool DB_has_shader_readable_metadata;
+};
+
+struct si_signed_scissor {
+ int minx;
+ int miny;
+ int maxx;
+ int maxy;
+};
+
+struct si_scissors {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ struct pipe_scissor_state states[SI_MAX_VIEWPORTS];
+};
+
+struct si_viewports {
+ struct r600_atom atom;
+ unsigned dirty_mask;
+ unsigned depth_range_dirty_mask;
+ struct pipe_viewport_state states[SI_MAX_VIEWPORTS];
+ struct si_signed_scissor as_scissor[SI_MAX_VIEWPORTS];
};
struct si_clip_state {
struct r600_atom atom;
struct pipe_clip_state state;
+ bool any_nonzeros;
};
struct si_sample_locs {
@@ -206,6 +255,43 @@
uint16_t sample_mask;
};
+struct si_streamout_target {
+ struct pipe_stream_output_target b;
+
+ /* The buffer where BUFFER_FILLED_SIZE is stored. */
+ struct r600_resource *buf_filled_size;
+ unsigned buf_filled_size_offset;
+ bool buf_filled_size_valid;
+
+ unsigned stride_in_dw;
+};
+
+struct si_streamout {
+ struct r600_atom begin_atom;
+ bool begin_emitted;
+
+ unsigned enabled_mask;
+ unsigned num_targets;
+ struct si_streamout_target *targets[PIPE_MAX_SO_BUFFERS];
+
+ unsigned append_bitmask;
+ bool suspended;
+
+ /* External state which comes from the vertex shader,
+ * it must be set explicitly when binding a shader. */
+ uint16_t *stride_in_dw;
+ unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+
+ /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
+ unsigned hw_enabled_mask;
+
+ /* The state of VGT_STRMOUT_(CONFIG|EN). */
+ struct r600_atom enable_atom;
+ bool streamout_enabled;
+ bool prims_gen_query_enabled;
+ int num_prims_gen_queries;
+};
+
/* A shader state consists of the shader selector, which is a constant state
* object shared by multiple contexts and shouldn't be modified, and
* the current shader variant selected for this context.
@@ -237,28 +323,32 @@
uint32_t index;
};
-struct si_bindless_descriptor
-{
- struct pb_slab_entry entry;
- struct r600_resource *buffer;
- unsigned offset;
- uint32_t desc_list[16];
- bool dirty;
-};
-
struct si_texture_handle
{
- struct si_bindless_descriptor *desc;
+ unsigned desc_slot;
+ bool desc_dirty;
struct pipe_sampler_view *view;
struct si_sampler_state sstate;
};
struct si_image_handle
{
- struct si_bindless_descriptor *desc;
+ unsigned desc_slot;
+ bool desc_dirty;
struct pipe_image_view view;
};
+struct si_saved_cs {
+ struct pipe_reference reference;
+ struct si_context *ctx;
+ struct radeon_saved_cs gfx;
+ struct r600_resource *trace_buf;
+ unsigned trace_id;
+
+ unsigned gfx_last_dw;
+ bool flushed;
+};
+
struct si_context {
struct r600_common_context b;
struct blitter_context *blitter;
@@ -267,19 +357,17 @@
void *custom_blend_fmask_decompress;
void *custom_blend_eliminate_fastclear;
void *custom_blend_dcc_decompress;
+ void *vs_blit_pos;
+ void *vs_blit_pos_layered;
+ void *vs_blit_color;
+ void *vs_blit_color_layered;
+ void *vs_blit_texcoord;
struct si_screen *screen;
LLVMTargetMachineRef tm; /* only non-threaded compilation */
struct si_shader_ctx_state fixed_func_tcs_shader;
struct r600_resource *wait_mem_scratch;
unsigned wait_mem_number;
-
- struct radeon_winsys_cs *ce_ib;
- struct radeon_winsys_cs *ce_preamble_ib;
- struct r600_resource *ce_ram_saved_buffer;
- struct u_suballocator *ce_suballocator;
- unsigned ce_ram_saved_offset;
- uint16_t total_ce_ram_allocated;
- bool ce_need_synchronization:1;
+ uint16_t prefetch_L2_mask;
bool gfx_flush_in_progress:1;
bool compute_is_busy:1;
@@ -293,10 +381,10 @@
union si_state emitted;
/* Atom declarations. */
- struct r600_atom prefetch_L2;
struct si_framebuffer framebuffer;
struct si_sample_locs msaa_sample_locs;
struct r600_atom db_render_state;
+ struct r600_atom dpbb_state;
struct r600_atom msaa_config;
struct si_sample_mask sample_mask;
struct r600_atom cb_render_state;
@@ -304,9 +392,12 @@
struct si_blend_color blend_color;
struct r600_atom clip_regs;
struct si_clip_state clip_state;
- struct si_shader_data shader_userdata;
+ struct si_shader_data shader_pointers;
struct si_stencil_ref stencil_ref;
struct r600_atom spi_map;
+ struct si_scissors scissors;
+ struct si_streamout streamout;
+ struct si_viewports viewports;
/* Precomputed states. */
struct si_pm4_state *init_config;
@@ -336,8 +427,8 @@
unsigned shader_needs_decompress_mask;
struct si_buffer_resources rw_buffers;
struct si_buffer_resources const_and_shader_buffers[SI_NUM_SHADERS];
- struct si_textures_info samplers[SI_NUM_SHADERS];
- struct si_images_info images[SI_NUM_SHADERS];
+ struct si_samplers samplers[SI_NUM_SHADERS];
+ struct si_images images[SI_NUM_SHADERS];
/* other shader resources */
struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
@@ -349,6 +440,8 @@
struct r600_resource *border_color_buffer;
union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */
unsigned border_color_count;
+ unsigned num_vs_blit_sgprs;
+ uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
/* Vertex and index buffers. */
bool vertex_buffers_dirty;
@@ -375,6 +468,7 @@
/* Emitted draw state. */
bool gs_tri_strip_adj_fix:1;
+ bool ls_vgpr_fix:1;
int last_index_size;
int last_base_vertex;
int last_start_instance;
@@ -389,6 +483,7 @@
unsigned last_sc_line_stipple;
unsigned current_vs_state;
unsigned last_vs_state;
+ enum pipe_prim_type current_rast_prim; /* primitive type after TES, GS */
/* Scratch buffer */
struct r600_atom scratch_state;
@@ -409,10 +504,7 @@
/* Debug state. */
bool is_debug;
- struct radeon_saved_cs last_gfx;
- struct r600_resource *last_trace_buf;
- struct r600_resource *trace_buf;
- unsigned trace_id;
+ struct si_saved_cs *current_saved_cs;
uint64_t dmesg_timestamp;
unsigned apitrace_call_number;
@@ -420,16 +512,20 @@
bool need_check_render_feedback;
bool decompression_enabled;
+ bool vs_writes_viewport_index;
+ bool vs_disables_clipping_viewport;
+
/* Precomputed IA_MULTI_VGT_PARAM */
union si_vgt_param_key ia_multi_vgt_param_key;
unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES];
- /* Slab allocator for bindless descriptors. */
- struct pb_slabs bindless_descriptor_slabs;
-
/* Bindless descriptors. */
- struct util_dynarray bindless_descriptors;
+ struct si_descriptors bindless_descriptors;
+ struct util_idalloc bindless_used_slots;
+ unsigned num_bindless_descriptors;
bool bindless_descriptors_dirty;
+ bool graphics_bindless_pointer_dirty;
+ bool compute_bindless_pointer_dirty;
/* Allocated bindless handles */
struct hash_table *tex_handles;
@@ -447,6 +543,15 @@
/* Bindless state */
bool uses_bindless_samplers;
bool uses_bindless_images;
+
+ /* MSAA sample locations.
+ * The first index is the sample index.
+ * The second index is the coordinate: X, Y. */
+ float sample_locations_1x[1][2];
+ float sample_locations_2x[2][2];
+ float sample_locations_4x[4][2];
+ float sample_locations_8x[8][2];
+ float sample_locations_16x[16][2];
};
/* cik_sdma.c */
@@ -454,8 +559,7 @@
/* si_blit.c */
void si_init_blit_functions(struct si_context *sctx);
-void si_decompress_graphics_textures(struct si_context *sctx);
-void si_decompress_compute_textures(struct si_context *sctx);
+void si_decompress_textures(struct si_context *sctx, unsigned shader_mask);
void si_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
@@ -482,9 +586,14 @@
unsigned user_flags);
void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
uint64_t offset, unsigned size);
+void cik_emit_prefetch_L2(struct si_context *sctx);
void si_init_cp_dma_functions(struct si_context *sctx);
/* si_debug.c */
+void si_auto_log_cs(void *data, struct u_log_context *log);
+void si_log_hw_flush(struct si_context *sctx);
+void si_log_draw_state(struct si_context *sctx, struct u_log_context *log);
+void si_log_compute_state(struct si_context *sctx, struct u_log_context *log);
void si_init_debug_functions(struct si_context *sctx);
void si_check_vm_faults(struct r600_common_context *ctx,
struct radeon_saved_cs *saved, enum ring_type ring);
@@ -494,6 +603,7 @@
void si_init_dma_functions(struct si_context *sctx);
/* si_hw_context.c */
+void si_destroy_saved_cs(struct si_saved_cs *scs);
void si_context_gfx_flush(void *context, unsigned flags,
struct pipe_fence_handle **fence);
void si_begin_new_cs(struct si_context *ctx);
@@ -512,6 +622,11 @@
struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
const struct pipe_video_buffer *tmpl);
+/* si_viewport.c */
+void si_update_vs_viewport_state(struct si_context *ctx);
+void si_init_viewport_functions(struct si_context *ctx);
+
+
/*
* common helpers
*/
@@ -576,6 +691,12 @@
return vs->current ? vs->current : NULL;
}
+static inline bool si_get_strmout_en(struct si_context *sctx)
+{
+ return sctx->streamout.streamout_enabled ||
+ sctx->streamout.prims_gen_query_enabled;
+}
+
static inline unsigned
si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)
{
@@ -591,4 +712,57 @@
return MIN2(alignment, tcc_cache_line_size);
}
+static inline void
+si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
+{
+ if (pipe_reference(&(*dst)->reference, &src->reference))
+ si_destroy_saved_cs(*dst);
+
+ *dst = src;
+}
+
+static inline void
+si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
+ bool shaders_read_metadata)
+{
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_INV_VMEM_L1;
+
+ if (sctx->b.chip_class >= GFX9) {
+ /* Single-sample color is coherent with shaders on GFX9, but
+ * L2 metadata must be flushed if shaders read metadata.
+ * (DCC, CMASK).
+ */
+ if (num_samples >= 2)
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ else if (shaders_read_metadata)
+ sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+ } else {
+ /* SI-CI-VI */
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ }
+}
+
+static inline void
+si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
+ bool include_stencil, bool shaders_read_metadata)
+{
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_INV_VMEM_L1;
+
+ if (sctx->b.chip_class >= GFX9) {
+ /* Single-sample depth (not stencil) is coherent with shaders
+ * on GFX9, but L2 metadata must be flushed if shaders read
+ * metadata.
+ */
+ if (num_samples >= 2 || include_stencil)
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ else if (shaders_read_metadata)
+ sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+ } else {
+ /* SI-CI-VI */
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ }
+}
+
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_public.h mesa-17.3.3/src/gallium/drivers/radeonsi/si_public.h
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_public.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_public.h 2018-01-18 21:30:28.000000000 +0000
@@ -26,6 +26,6 @@
struct radeon_winsys;
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
- unsigned flags);
+ const struct pipe_screen_config *config);
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader.c 2018-01-18 21:30:28.000000000 +0000
@@ -46,6 +46,7 @@
#include "si_pipe.h"
#include "sid.h"
+#include "compiler/nir/nir.h"
static const char *scratch_rsrc_dword0_symbol =
"SCRATCH_RSRC_DWORD0";
@@ -61,6 +62,22 @@
ubyte vertex_stream[4];
};
+/**
+ * Used to collect types and other info about arguments of the LLVM function
+ * before the function is created.
+ */
+struct si_function_info {
+ LLVMTypeRef types[100];
+ LLVMValueRef *assign[100];
+ unsigned num_sgpr_params;
+ unsigned num_params;
+};
+
+enum si_arg_regfile {
+ ARG_SGPR,
+ ARG_VGPR
+};
+
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
LLVMTargetMachineRef tm);
@@ -72,8 +89,6 @@
static void si_dump_shader_key(unsigned processor, const struct si_shader *shader,
FILE *f);
-static unsigned llvm_get_type_size(LLVMTypeRef type);
-
static void si_build_vs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
@@ -104,6 +119,50 @@
shader->selector->type == PIPE_SHADER_GEOMETRY;
}
+static void si_init_function_info(struct si_function_info *fninfo)
+{
+ fninfo->num_params = 0;
+ fninfo->num_sgpr_params = 0;
+}
+
+static unsigned add_arg_assign(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type,
+ LLVMValueRef *assign)
+{
+ assert(regfile != ARG_SGPR || fninfo->num_sgpr_params == fninfo->num_params);
+
+ unsigned idx = fninfo->num_params++;
+ assert(idx < ARRAY_SIZE(fninfo->types));
+
+ if (regfile == ARG_SGPR)
+ fninfo->num_sgpr_params = fninfo->num_params;
+
+ fninfo->types[idx] = type;
+ fninfo->assign[idx] = assign;
+ return idx;
+}
+
+static unsigned add_arg(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type)
+{
+ return add_arg_assign(fninfo, regfile, type, NULL);
+}
+
+static void add_arg_assign_checked(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type,
+ LLVMValueRef *assign, unsigned idx)
+{
+ MAYBE_UNUSED unsigned actual = add_arg_assign(fninfo, regfile, type, assign);
+ assert(actual == idx);
+}
+
+static void add_arg_checked(struct si_function_info *fninfo,
+ enum si_arg_regfile regfile, LLVMTypeRef type,
+ unsigned idx)
+{
+ add_arg_assign_checked(fninfo, regfile, type, NULL, idx);
+}
+
/**
* Returns a unique index for a per-patch semantic name and index. The index
* must be less than 32, so that a 32-bit bitmask of used inputs or outputs
@@ -181,21 +240,19 @@
unsigned param, unsigned rshift,
unsigned bitwidth)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef value = LLVMGetParam(ctx->main_fn,
param);
if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
- value = bitcast(&ctx->bld_base,
- TGSI_TYPE_UNSIGNED, value);
+ value = ac_to_integer(&ctx->ac, value);
if (rshift)
- value = LLVMBuildLShr(gallivm->builder, value,
+ value = LLVMBuildLShr(ctx->ac.builder, value,
LLVMConstInt(ctx->i32, rshift, 0), "");
if (rshift + bitwidth < 32) {
unsigned mask = (1 << bitwidth) - 1;
- value = LLVMBuildAnd(gallivm->builder, value,
+ value = LLVMBuildAnd(ctx->ac.builder, value,
LLVMConstInt(ctx->i32, mask, 0), "");
}
@@ -245,10 +302,35 @@
return unpack_param(ctx, ctx->param_vs_state_bits, 8, 13);
}
-static LLVMValueRef
-get_tcs_out_patch_stride(struct si_shader_context *ctx)
+static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
+{
+ assert(ctx->type == PIPE_SHADER_TESS_CTRL);
+
+ if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
+ return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
+
+ return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
+}
+
+static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
+{
+ unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
+
+ return LLVMConstInt(ctx->i32, stride, 0);
+}
+
+static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
{
- return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13);
+ if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
+ return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13);
+
+ const struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ unsigned tcs_out_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+ unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
+ unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
+ unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride +
+ num_patch_outputs * 4;
+ return LLVMConstInt(ctx->i32, patch_dw_stride, 0);
}
static LLVMValueRef
@@ -274,23 +356,21 @@
static LLVMValueRef
get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
- return LLVMBuildMul(gallivm->builder, patch_stride, rel_patch_id, "");
+ return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
}
static LLVMValueRef
get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
- return LLVMBuildAdd(gallivm->builder, patch0_offset,
- LLVMBuildMul(gallivm->builder, patch_stride,
+ return LLVMBuildAdd(ctx->ac.builder, patch0_offset,
+ LLVMBuildMul(ctx->ac.builder, patch_stride,
rel_patch_id, ""),
"");
}
@@ -298,32 +378,64 @@
static LLVMValueRef
get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef patch0_patch_data_offset =
get_tcs_out_patch0_patch_data_offset(ctx);
LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
- return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
- LLVMBuildMul(gallivm->builder, patch_stride,
+ return LLVMBuildAdd(ctx->ac.builder, patch0_patch_data_offset,
+ LLVMBuildMul(ctx->ac.builder, patch_stride,
rel_patch_id, ""),
"");
}
+static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
+{
+ unsigned tcs_out_vertices =
+ ctx->shader->selector ?
+ ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0;
+
+ /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
+ if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
+ return LLVMConstInt(ctx->i32, tcs_out_vertices, 0);
+
+ return unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
+}
+
+static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
+{
+ unsigned stride;
+
+ switch (ctx->type) {
+ case PIPE_SHADER_VERTEX:
+ stride = util_last_bit64(ctx->shader->selector->outputs_written);
+ return LLVMConstInt(ctx->i32, stride * 4, 0);
+
+ case PIPE_SHADER_TESS_CTRL:
+ if (ctx->screen->b.chip_class >= GFX9 &&
+ ctx->shader->is_monolithic) {
+ stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written);
+ return LLVMConstInt(ctx->i32, stride * 4, 0);
+ }
+ return unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
static LLVMValueRef get_instance_index_for_fetch(
struct si_shader_context *ctx,
unsigned param_start_instance, LLVMValueRef divisor)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
-
- LLVMValueRef result = LLVMGetParam(ctx->main_fn,
- ctx->param_instance_id);
+ LLVMValueRef result = ctx->abi.instance_id;
/* The division must be done before START_INSTANCE is added. */
if (divisor != ctx->i32_1)
- result = LLVMBuildUDiv(gallivm->builder, result, divisor, "");
+ result = LLVMBuildUDiv(ctx->ac.builder, result, divisor, "");
- return LLVMBuildAdd(gallivm->builder, result,
+ return LLVMBuildAdd(ctx->ac.builder, result,
LLVMGetParam(ctx->main_fn, param_start_instance), "");
}
@@ -333,8 +445,8 @@
LLVMValueRef vec4,
unsigned double_index)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
- LLVMTypeRef f64 = LLVMDoubleTypeInContext(ctx->gallivm.context);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMTypeRef f64 = LLVMDoubleTypeInContext(ctx->ac.context);
LLVMValueRef dvec2 = LLVMBuildBitCast(builder, vec4,
LLVMVectorType(f64, 2), "");
LLVMValueRef index = LLVMConstInt(ctx->i32, double_index, 0);
@@ -342,13 +454,96 @@
return LLVMBuildFPTrunc(builder, value, ctx->f32, "");
}
-static void declare_input_vs(
+static LLVMValueRef unpack_sint16(struct si_shader_context *ctx,
+ LLVMValueRef i32, unsigned index)
+{
+ assert(index <= 1);
+
+ if (index == 1)
+ return LLVMBuildAShr(ctx->ac.builder, i32,
+ LLVMConstInt(ctx->i32, 16, 0), "");
+
+ return LLVMBuildSExt(ctx->ac.builder,
+ LLVMBuildTrunc(ctx->ac.builder, i32,
+ ctx->ac.i16, ""),
+ ctx->i32, "");
+}
+
+void si_llvm_load_input_vs(
struct si_shader_context *ctx,
unsigned input_index,
- const struct tgsi_full_declaration *decl,
LLVMValueRef out[4])
{
- struct gallivm_state *gallivm = &ctx->gallivm;
+ unsigned vs_blit_property =
+ ctx->shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
+
+ if (vs_blit_property) {
+ LLVMValueRef vertex_id = ctx->abi.vertex_id;
+ LLVMValueRef sel_x1 = LLVMBuildICmp(ctx->ac.builder,
+ LLVMIntULE, vertex_id,
+ ctx->i32_1, "");
+ /* Use LLVMIntNE, because we have 3 vertices and only
+ * the middle one should use y2.
+ */
+ LLVMValueRef sel_y1 = LLVMBuildICmp(ctx->ac.builder,
+ LLVMIntNE, vertex_id,
+ ctx->i32_1, "");
+
+ if (input_index == 0) {
+ /* Position: */
+ LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs);
+ LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 1);
+
+ LLVMValueRef x1 = unpack_sint16(ctx, x1y1, 0);
+ LLVMValueRef y1 = unpack_sint16(ctx, x1y1, 1);
+ LLVMValueRef x2 = unpack_sint16(ctx, x2y2, 0);
+ LLVMValueRef y2 = unpack_sint16(ctx, x2y2, 1);
+
+ LLVMValueRef x = LLVMBuildSelect(ctx->ac.builder, sel_x1,
+ x1, x2, "");
+ LLVMValueRef y = LLVMBuildSelect(ctx->ac.builder, sel_y1,
+ y1, y2, "");
+
+ out[0] = LLVMBuildSIToFP(ctx->ac.builder, x, ctx->f32, "");
+ out[1] = LLVMBuildSIToFP(ctx->ac.builder, y, ctx->f32, "");
+ out[2] = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 2);
+ out[3] = ctx->ac.f32_1;
+ return;
+ }
+
+ /* Color or texture coordinates: */
+ assert(input_index == 1);
+
+ if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
+ for (int i = 0; i < 4; i++) {
+ out[i] = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 3 + i);
+ }
+ } else {
+ assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD);
+ LLVMValueRef x1 = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 3);
+ LLVMValueRef y1 = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 4);
+ LLVMValueRef x2 = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 5);
+ LLVMValueRef y2 = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 6);
+
+ out[0] = LLVMBuildSelect(ctx->ac.builder, sel_x1,
+ x1, x2, "");
+ out[1] = LLVMBuildSelect(ctx->ac.builder, sel_y1,
+ y1, y2, "");
+ out[2] = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 7);
+ out[3] = LLVMGetParam(ctx->main_fn,
+ ctx->param_vs_blit_inputs + 8);
+ }
+ return;
+ }
unsigned chan;
unsigned fix_fetch;
@@ -366,7 +561,7 @@
t_offset = LLVMConstInt(ctx->i32, input_index, 0);
- t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
+ t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
vertex_index = LLVMGetParam(ctx->main_fn,
ctx->param_vertex_index0 +
@@ -410,7 +605,7 @@
/* Break up the vec4 into individual components */
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, 0);
- out[chan] = LLVMBuildExtractElement(gallivm->builder,
+ out[chan] = LLVMBuildExtractElement(ctx->ac.builder,
input[0], llvm_chan, "");
}
@@ -426,9 +621,9 @@
/* First, recover the sign-extended signed integer value. */
if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
- tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, "");
+ tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, ctx->i32, "");
else
- tmp = LLVMBuildBitCast(gallivm->builder, tmp, ctx->i32, "");
+ tmp = ac_to_integer(&ctx->ac, tmp);
/* For the integer-like cases, do a natural sign extension.
*
@@ -436,20 +631,20 @@
* and happen to contain 0, 1, 2, 3 as the two LSBs of the
* exponent.
*/
- tmp = LLVMBuildShl(gallivm->builder, tmp,
+ tmp = LLVMBuildShl(ctx->ac.builder, tmp,
fix_fetch == SI_FIX_FETCH_A2_SNORM ?
LLVMConstInt(ctx->i32, 7, 0) : c30, "");
- tmp = LLVMBuildAShr(gallivm->builder, tmp, c30, "");
+ tmp = LLVMBuildAShr(ctx->ac.builder, tmp, c30, "");
/* Convert back to the right type. */
if (fix_fetch == SI_FIX_FETCH_A2_SNORM) {
LLVMValueRef clamp;
LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
- tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
- clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, tmp, neg_one, "");
- tmp = LLVMBuildSelect(gallivm->builder, clamp, neg_one, tmp, "");
+ tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->f32, "");
+ clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, tmp, neg_one, "");
+ tmp = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, tmp, "");
} else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) {
- tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, "");
+ tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->f32, "");
}
out[3] = tmp;
@@ -458,11 +653,10 @@
case SI_FIX_FETCH_RGBA_32_UNORM:
case SI_FIX_FETCH_RGBX_32_UNORM:
for (chan = 0; chan < 4; chan++) {
- out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan],
- ctx->i32, "");
- out[chan] = LLVMBuildUIToFP(gallivm->builder,
+ out[chan] = ac_to_integer(&ctx->ac, out[chan]);
+ out[chan] = LLVMBuildUIToFP(ctx->ac.builder,
out[chan], ctx->f32, "");
- out[chan] = LLVMBuildFMul(gallivm->builder, out[chan],
+ out[chan] = LLVMBuildFMul(ctx->ac.builder, out[chan],
LLVMConstReal(ctx->f32, 1.0 / UINT_MAX), "");
}
/* RGBX UINT returns 1 in alpha, which would be rounded to 0 by normalizing. */
@@ -480,11 +674,10 @@
scale = 1.0 / INT_MAX;
for (chan = 0; chan < 4; chan++) {
- out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan],
- ctx->i32, "");
- out[chan] = LLVMBuildSIToFP(gallivm->builder,
+ out[chan] = ac_to_integer(&ctx->ac, out[chan]);
+ out[chan] = LLVMBuildSIToFP(ctx->ac.builder,
out[chan], ctx->f32, "");
- out[chan] = LLVMBuildFMul(gallivm->builder, out[chan],
+ out[chan] = LLVMBuildFMul(ctx->ac.builder, out[chan],
LLVMConstReal(ctx->f32, scale), "");
}
/* RGBX SINT returns 1 in alpha, which would be rounded to 0 by normalizing. */
@@ -495,17 +688,15 @@
}
case SI_FIX_FETCH_RGBA_32_USCALED:
for (chan = 0; chan < 4; chan++) {
- out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan],
- ctx->i32, "");
- out[chan] = LLVMBuildUIToFP(gallivm->builder,
+ out[chan] = ac_to_integer(&ctx->ac, out[chan]);
+ out[chan] = LLVMBuildUIToFP(ctx->ac.builder,
out[chan], ctx->f32, "");
}
break;
case SI_FIX_FETCH_RGBA_32_SSCALED:
for (chan = 0; chan < 4; chan++) {
- out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan],
- ctx->i32, "");
- out[chan] = LLVMBuildSIToFP(gallivm->builder,
+ out[chan] = ac_to_integer(&ctx->ac, out[chan]);
+ out[chan] = LLVMBuildSIToFP(ctx->ac.builder,
out[chan], ctx->f32, "");
}
break;
@@ -533,7 +724,7 @@
case SI_FIX_FETCH_RGB_16:
case SI_FIX_FETCH_RGB_16_INT:
for (chan = 0; chan < 3; chan++) {
- out[chan] = LLVMBuildExtractElement(gallivm->builder,
+ out[chan] = LLVMBuildExtractElement(ctx->ac.builder,
input[chan],
ctx->i32_0, "");
}
@@ -541,18 +732,24 @@
fix_fetch == SI_FIX_FETCH_RGB_16) {
out[3] = LLVMConstReal(ctx->f32, 1);
} else {
- out[3] = LLVMBuildBitCast(gallivm->builder, ctx->i32_1,
- ctx->f32, "");
+ out[3] = ac_to_float(&ctx->ac, ctx->i32_1);
}
break;
}
}
-static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
- unsigned swizzle)
+static void declare_input_vs(
+ struct si_shader_context *ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl,
+ LLVMValueRef out[4])
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ si_llvm_load_input_vs(ctx, input_index, out);
+}
+static LLVMValueRef get_primitive_id(struct si_shader_context *ctx,
+ unsigned swizzle)
+{
if (swizzle > 0)
return ctx->i32_0;
@@ -579,29 +776,49 @@
* Return the value of tgsi_ind_register for indexing.
* This is the indirect index with the constant offset added to it.
*/
-static LLVMValueRef get_indirect_index(struct si_shader_context *ctx,
- const struct tgsi_ind_register *ind,
- int rel_index)
+LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
+ const struct tgsi_ind_register *ind,
+ unsigned addr_mul,
+ int rel_index)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef result;
- result = ctx->addrs[ind->Index][ind->Swizzle];
- result = LLVMBuildLoad(gallivm->builder, result, "");
- result = LLVMBuildAdd(gallivm->builder, result,
+ if (ind->File == TGSI_FILE_ADDRESS) {
+ result = ctx->addrs[ind->Index][ind->Swizzle];
+ result = LLVMBuildLoad(ctx->ac.builder, result, "");
+ } else {
+ struct tgsi_full_src_register src = {};
+
+ src.Register.File = ind->File;
+ src.Register.Index = ind->Index;
+
+ /* Set the second index to 0 for constants. */
+ if (ind->File == TGSI_FILE_CONSTANT)
+ src.Register.Dimension = 1;
+
+ result = ctx->bld_base.emit_fetch_funcs[ind->File](&ctx->bld_base, &src,
+ TGSI_TYPE_SIGNED,
+ ind->Swizzle);
+ result = ac_to_integer(&ctx->ac, result);
+ }
+
+ if (addr_mul != 1)
+ result = LLVMBuildMul(ctx->ac.builder, result,
+ LLVMConstInt(ctx->i32, addr_mul, 0), "");
+ result = LLVMBuildAdd(ctx->ac.builder, result,
LLVMConstInt(ctx->i32, rel_index, 0), "");
return result;
}
/**
- * Like get_indirect_index, but restricts the return value to a (possibly
+ * Like si_get_indirect_index, but restricts the return value to a (possibly
* undefined) value inside [0..num).
*/
LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
const struct tgsi_ind_register *ind,
int rel_index, unsigned num)
{
- LLVMValueRef result = get_indirect_index(ctx, ind, rel_index);
+ LLVMValueRef result = si_get_indirect_index(ctx, ind, 1, rel_index);
return si_llvm_bound_index(ctx, result, num);
}
@@ -616,7 +833,6 @@
LLVMValueRef vertex_dw_stride,
LLVMValueRef base_addr)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
struct tgsi_shader_info *info = &ctx->shader->selector->info;
ubyte *name, *index, *array_first;
int first, param;
@@ -641,13 +857,13 @@
LLVMValueRef index;
if (reg.Dimension.Indirect)
- index = get_indirect_index(ctx, ®.DimIndirect,
- reg.Dimension.Index);
+ index = si_get_indirect_index(ctx, ®.DimIndirect,
+ 1, reg.Dimension.Index);
else
index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
- base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
- LLVMBuildMul(gallivm->builder, index,
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+ LLVMBuildMul(ctx->ac.builder, index,
vertex_dw_stride, ""), "");
}
@@ -674,11 +890,11 @@
else
first = reg.Register.Index;
- ind_index = get_indirect_index(ctx, ®.Indirect,
- reg.Register.Index - first);
+ ind_index = si_get_indirect_index(ctx, ®.Indirect,
+ 1, reg.Register.Index - first);
- base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
- LLVMBuildMul(gallivm->builder, ind_index,
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+ LLVMBuildMul(ctx->ac.builder, ind_index,
LLVMConstInt(ctx->i32, 4, 0), ""), "");
param = reg.Register.Dimension ?
@@ -693,7 +909,7 @@
}
/* Add the base address of the element. */
- return LLVMBuildAdd(gallivm->builder, base_addr,
+ return LLVMBuildAdd(ctx->ac.builder, base_addr,
LLVMConstInt(ctx->i32, param * 4, 0), "");
}
@@ -720,21 +936,20 @@
LLVMValueRef vertex_index,
LLVMValueRef param_index)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
LLVMValueRef param_stride, constant16;
- vertices_per_patch = unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
+ vertices_per_patch = get_num_tcs_out_vertices(ctx);
num_patches = unpack_param(ctx, ctx->param_tcs_offchip_layout, 0, 6);
- total_vertices = LLVMBuildMul(gallivm->builder, vertices_per_patch,
+ total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch,
num_patches, "");
constant16 = LLVMConstInt(ctx->i32, 16, 0);
if (vertex_index) {
- base_addr = LLVMBuildMul(gallivm->builder, rel_patch_id,
+ base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
vertices_per_patch, "");
- base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
vertex_index, "");
param_stride = total_vertices;
@@ -743,17 +958,17 @@
param_stride = num_patches;
}
- base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
- LLVMBuildMul(gallivm->builder, param_index,
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+ LLVMBuildMul(ctx->ac.builder, param_index,
param_stride, ""), "");
- base_addr = LLVMBuildMul(gallivm->builder, base_addr, constant16, "");
+ base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
if (!vertex_index) {
LLVMValueRef patch_data_offset =
unpack_param(ctx, ctx->param_tcs_offchip_layout, 12, 20);
- base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
patch_data_offset, "");
}
return base_addr;
@@ -764,7 +979,6 @@
const struct tgsi_full_dst_register *dst,
const struct tgsi_full_src_register *src)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
struct tgsi_shader_info *info = &ctx->shader->selector->info;
ubyte *name, *index, *array_first;
struct tgsi_full_src_register reg;
@@ -777,8 +991,8 @@
if (reg.Register.Dimension) {
if (reg.Dimension.Indirect)
- vertex_index = get_indirect_index(ctx, ®.DimIndirect,
- reg.Dimension.Index);
+ vertex_index = si_get_indirect_index(ctx, ®.DimIndirect,
+ 1, reg.Dimension.Index);
else
vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
}
@@ -803,8 +1017,8 @@
else
param_base = reg.Register.Index;
- param_index = get_indirect_index(ctx, ®.Indirect,
- reg.Register.Index - param_base);
+ param_index = si_get_indirect_index(ctx, ®.Indirect,
+ 1, reg.Register.Index - param_base);
} else {
param_base = reg.Register.Index;
@@ -815,7 +1029,7 @@
si_shader_io_get_unique_index(name[param_base], index[param_base]) :
si_shader_io_get_unique_index_patch(name[param_base], index[param_base]);
- param_index = LLVMBuildAdd(gallivm->builder, param_index,
+ param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
LLVMConstInt(ctx->i32, param_index_base, 0),
"");
@@ -829,7 +1043,6 @@
LLVMValueRef base, bool can_speculate)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef value, value2;
LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type);
LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4);
@@ -838,15 +1051,15 @@
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
0, 1, 0, can_speculate, false);
- return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
+ return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
}
if (!tgsi_type_is_64bit(type)) {
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
0, 1, 0, can_speculate, false);
- value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
- return LLVMBuildExtractElement(gallivm->builder, value,
+ value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
+ return LLVMBuildExtractElement(ctx->ac.builder, value,
LLVMConstInt(ctx->i32, swizzle, 0), "");
}
@@ -871,7 +1084,6 @@
LLVMValueRef dw_addr)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef value;
if (swizzle == ~0) {
@@ -880,24 +1092,23 @@
for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
values[chan] = lds_load(bld_base, type, chan, dw_addr);
- return lp_build_gather_values(gallivm, values,
+ return lp_build_gather_values(&ctx->gallivm, values,
TGSI_NUM_CHANNELS);
}
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
LLVMConstInt(ctx->i32, swizzle, 0));
- value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
+ value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
if (tgsi_type_is_64bit(type)) {
LLVMValueRef value2;
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
ctx->i32_1);
- value2 = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
+ value2 = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
}
- return LLVMBuildBitCast(gallivm->builder, value,
- tgsi2llvmtype(bld_base, type), "");
+ return bitcast(bld_base, type, value);
}
/**
@@ -912,12 +1123,11 @@
LLVMValueRef value)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
LLVMConstInt(ctx->i32, dw_offset_imm, 0));
- value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
+ value = ac_to_integer(&ctx->ac, value);
ac_build_indexed_store(&ctx->ac, ctx->lds,
dw_addr, value);
}
@@ -925,7 +1135,7 @@
static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx,
unsigned param)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef addr = LLVMGetParam(ctx->main_fn, param);
addr = LLVMBuildZExt(builder, addr, ctx->i64, "");
@@ -954,7 +1164,7 @@
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
- stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
+ stride = get_tcs_in_vertex_dw_stride(ctx);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
@@ -970,7 +1180,7 @@
LLVMValueRef dw_addr, stride;
if (reg->Register.Dimension) {
- stride = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 8);
+ stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
} else {
@@ -1000,30 +1210,30 @@
static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
const struct tgsi_opcode_info *info,
+ unsigned index,
LLVMValueRef dst[4])
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+ const struct tgsi_full_dst_register *reg = &inst->Dst[index];
const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
unsigned chan_index;
LLVMValueRef dw_addr, stride;
LLVMValueRef buffer, base, buf_addr;
LLVMValueRef values[4];
bool skip_lds_store;
- bool is_tess_factor = false;
+ bool is_tess_factor = false, is_tess_inner = false;
/* Only handle per-patch and per-vertex outputs here.
* Vectors will be lowered to scalars and this function will be called again.
*/
if (reg->Register.File != TGSI_FILE_OUTPUT ||
(dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
- si_llvm_emit_store(bld_base, inst, info, dst);
+ si_llvm_emit_store(bld_base, inst, info, index, dst);
return;
}
if (reg->Register.Dimension) {
- stride = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 8);
+ stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
skip_lds_store = !sh_info->reads_pervertex_outputs;
@@ -1038,8 +1248,11 @@
/* Always write tess factors into LDS for the TCS epilog. */
if (name == TGSI_SEMANTIC_TESSINNER ||
name == TGSI_SEMANTIC_TESSOUTER) {
- skip_lds_store = false;
+ /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
+ skip_lds_store = !sh_info->reads_tessfactor_outputs &&
+ ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs;
is_tess_factor = true;
+ is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
}
}
}
@@ -1049,8 +1262,9 @@
base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL);
-
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+ uint32_t writemask = reg->Register.WriteMask;
+ while (writemask) {
+ chan_index = u_bit_scan(&writemask);
LLVMValueRef value = dst[chan_index];
if (inst->Instruction.Saturate)
@@ -1060,18 +1274,30 @@
if (!skip_lds_store)
lds_store(bld_base, chan_index, dw_addr, value);
- value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
+ value = ac_to_integer(&ctx->ac, value);
values[chan_index] = value;
- if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) {
+ if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
buf_addr, base,
4 * chan_index, 1, 0, true, false);
}
+
+ /* Write tess factors into VGPRs for the epilog. */
+ if (is_tess_factor &&
+ ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
+ if (!is_tess_inner) {
+ LLVMBuildStore(ctx->ac.builder, value, /* outer */
+ ctx->invoc0_tess_factors[chan_index]);
+ } else if (chan_index < 2) {
+ LLVMBuildStore(ctx->ac.builder, value, /* inner */
+ ctx->invoc0_tess_factors[4 + chan_index]);
+ }
+ }
}
- if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) {
- LLVMValueRef value = lp_build_gather_values(gallivm,
+ if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
+ LLVMValueRef value = lp_build_gather_values(&ctx->gallivm,
values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
base, 0, 1, 0, true, false);
@@ -1087,7 +1313,6 @@
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
struct lp_build_context *uint = &ctx->bld_base.uint_bld;
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef vtx_offset, soffset;
struct tgsi_shader_info *info = &shader->selector->info;
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
@@ -1096,7 +1321,7 @@
LLVMValueRef value;
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
- return get_primitive_id(bld_base, swizzle);
+ return get_primitive_id(ctx, swizzle);
if (!reg->Register.Dimension)
return NULL;
@@ -1125,7 +1350,7 @@
return NULL;
}
- vtx_offset = LLVMBuildAdd(gallivm->builder, vtx_offset,
+ vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset,
LLVMConstInt(ctx->i32, param * 4, 0), "");
return lds_load(bld_base, type, swizzle, vtx_offset);
}
@@ -1137,7 +1362,7 @@
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
values[chan] = fetch_input_gs(bld_base, reg, type, chan);
}
- return lp_build_gather_values(gallivm, values,
+ return lp_build_gather_values(&ctx->gallivm, values,
TGSI_NUM_CHANNELS);
}
@@ -1168,9 +1393,7 @@
return si_llvm_emit_fetch_64bit(bld_base, type,
value, value2);
}
- return LLVMBuildBitCast(gallivm->builder,
- value,
- tgsi2llvmtype(bld_base, type), "");
+ return bitcast(bld_base, type, value);
}
static int lookup_interp_param_index(unsigned interpolate, unsigned location)
@@ -1245,7 +1468,6 @@
LLVMValueRef face,
LLVMValueRef result[4])
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef i = NULL, j = NULL;
unsigned chan;
@@ -1265,12 +1487,12 @@
bool interp = interp_param != NULL;
if (interp) {
- interp_param = LLVMBuildBitCast(gallivm->builder, interp_param,
+ interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param,
LLVMVectorType(ctx->f32, 2), "");
- i = LLVMBuildExtractElement(gallivm->builder, interp_param,
+ i = LLVMBuildExtractElement(ctx->ac.builder, interp_param,
ctx->i32_0, "");
- j = LLVMBuildExtractElement(gallivm->builder, interp_param,
+ j = LLVMBuildExtractElement(ctx->ac.builder, interp_param,
ctx->i32_1, "");
}
@@ -1285,7 +1507,7 @@
if (semantic_index == 1 && colors_read_mask & 0xf)
back_attr_offset += 1;
- is_face_positive = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+ is_face_positive = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
face, ctx->i32_0, "");
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
@@ -1298,7 +1520,7 @@
back_attr_offset, chan,
prim_mask, i, j);
- result[chan] = LLVMBuildSelect(gallivm->builder,
+ result[chan] = LLVMBuildSelect(ctx->ac.builder,
is_face_positive,
front,
back,
@@ -1319,25 +1541,28 @@
}
}
-static void declare_input_fs(
+void si_llvm_load_input_fs(
struct si_shader_context *ctx,
unsigned input_index,
- const struct tgsi_full_declaration *decl,
LLVMValueRef out[4])
{
struct lp_build_context *base = &ctx->bld_base.base;
struct si_shader *shader = ctx->shader;
+ struct tgsi_shader_info *info = &shader->selector->info;
LLVMValueRef main_fn = ctx->main_fn;
LLVMValueRef interp_param = NULL;
int interp_param_idx;
+ enum tgsi_semantic semantic_name = info->input_semantic_name[input_index];
+ unsigned semantic_index = info->input_semantic_index[input_index];
+ enum tgsi_interpolate_mode interp_mode = info->input_interpolate[input_index];
+ enum tgsi_interpolate_loc interp_loc = info->input_interpolate_loc[input_index];
/* Get colors from input VGPRs (set by the prolog). */
- if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
- unsigned i = decl->Semantic.Index;
+ if (semantic_name == TGSI_SEMANTIC_COLOR) {
unsigned colors_read = shader->selector->info.colors_read;
- unsigned mask = colors_read >> (i * 4);
+ unsigned mask = colors_read >> (semantic_index * 4);
unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
- (i ? util_bitcount(colors_read & 0xf) : 0);
+ (semantic_index ? util_bitcount(colors_read & 0xf) : 0);
out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
@@ -1346,22 +1571,30 @@
return;
}
- interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
- decl->Interp.Location);
+ interp_param_idx = lookup_interp_param_index(interp_mode, interp_loc);
if (interp_param_idx == -1)
return;
else if (interp_param_idx) {
interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
}
- interp_fs_input(ctx, input_index, decl->Semantic.Name,
- decl->Semantic.Index, 0, /* this param is unused */
+ interp_fs_input(ctx, input_index, semantic_name,
+ semantic_index, 0, /* this param is unused */
shader->selector->info.colors_read, interp_param,
LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
&out[0]);
}
+static void declare_input_fs(
+ struct si_shader_context *ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl,
+ LLVMValueRef out[4])
+{
+ si_llvm_load_input_fs(ctx, input_index, out);
+}
+
static LLVMValueRef get_sample_id(struct si_shader_context *ctx)
{
return unpack_param(ctx, SI_PARAM_ANCILLARY, 8, 4);
@@ -1382,15 +1615,13 @@
static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id)
{
struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
- LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index);
+ LLVMValueRef resource = ac_build_load_to_sgpr(&ctx->ac, desc, buf_index);
/* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
- LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, LLVMConstInt(ctx->i32, 4, 0), "");
+ LLVMValueRef offset1 = LLVMBuildAdd(ctx->ac.builder, offset0, LLVMConstInt(ctx->i32, 4, 0), "");
LLVMValueRef pos[4] = {
buffer_load_const(ctx, resource, offset0),
@@ -1399,31 +1630,27 @@
LLVMConstReal(ctx->f32, 0)
};
- return lp_build_gather_values(gallivm, pos, 4);
+ return lp_build_gather_values(&ctx->gallivm, pos, 4);
}
-static void declare_system_value(struct si_shader_context *ctx,
- unsigned index,
- const struct tgsi_full_declaration *decl)
+void si_load_system_value(struct si_shader_context *ctx,
+ unsigned index,
+ const struct tgsi_full_declaration *decl)
{
struct lp_build_context *bld = &ctx->bld_base.base;
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef value = 0;
assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_INSTANCEID:
- value = LLVMGetParam(ctx->main_fn,
- ctx->param_instance_id);
+ value = ctx->abi.instance_id;
break;
case TGSI_SEMANTIC_VERTEXID:
- value = LLVMBuildAdd(gallivm->builder,
- LLVMGetParam(ctx->main_fn,
- ctx->param_vertex_id),
- LLVMGetParam(ctx->main_fn,
- ctx->param_base_vertex), "");
+ value = LLVMBuildAdd(ctx->ac.builder,
+ ctx->abi.vertex_id,
+ ctx->abi.base_vertex, "");
break;
case TGSI_SEMANTIC_VERTEXID_NOBASE:
@@ -1441,21 +1668,20 @@
LLVMValueRef vs_state = LLVMGetParam(ctx->main_fn, ctx->param_vs_state_bits);
LLVMValueRef indexed;
- indexed = LLVMBuildLShr(gallivm->builder, vs_state, ctx->i32_1, "");
- indexed = LLVMBuildTrunc(gallivm->builder, indexed, ctx->i1, "");
+ indexed = LLVMBuildLShr(ctx->ac.builder, vs_state, ctx->i32_1, "");
+ indexed = LLVMBuildTrunc(ctx->ac.builder, indexed, ctx->i1, "");
- value = LLVMBuildSelect(gallivm->builder, indexed,
- LLVMGetParam(ctx->main_fn, ctx->param_base_vertex),
- ctx->i32_0, "");
+ value = LLVMBuildSelect(ctx->ac.builder, indexed,
+ ctx->abi.base_vertex, ctx->i32_0, "");
break;
}
case TGSI_SEMANTIC_BASEINSTANCE:
- value = LLVMGetParam(ctx->main_fn, ctx->param_start_instance);
+ value = ctx->abi.start_instance;
break;
case TGSI_SEMANTIC_DRAWID:
- value = LLVMGetParam(ctx->main_fn, ctx->param_draw_id);
+ value = ctx->abi.draw_id;
break;
case TGSI_SEMANTIC_INVOCATIONID:
@@ -1478,12 +1704,12 @@
LLVMGetParam(ctx->main_fn,
SI_PARAM_POS_W_FLOAT)),
};
- value = lp_build_gather_values(gallivm, pos, 4);
+ value = lp_build_gather_values(&ctx->gallivm, pos, 4);
break;
}
case TGSI_SEMANTIC_FACE:
- value = LLVMGetParam(ctx->main_fn, SI_PARAM_FRONT_FACE);
+ value = ctx->abi.front_face;
break;
case TGSI_SEMANTIC_SAMPLEID:
@@ -1501,7 +1727,7 @@
TGSI_OPCODE_FRC, pos[0]);
pos[1] = lp_build_emit_llvm_unary(&ctx->bld_base,
TGSI_OPCODE_FRC, pos[1]);
- value = lp_build_gather_values(gallivm, pos, 4);
+ value = lp_build_gather_values(&ctx->gallivm, pos, 4);
break;
}
@@ -1517,17 +1743,17 @@
LLVMValueRef coord[4] = {
LLVMGetParam(ctx->main_fn, ctx->param_tes_u),
LLVMGetParam(ctx->main_fn, ctx->param_tes_v),
- bld->zero,
- bld->zero
+ ctx->ac.f32_0,
+ ctx->ac.f32_0
};
/* For triangles, the vector should be (u, v, 1-u-v). */
if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
PIPE_PRIM_TRIANGLES)
- coord[2] = lp_build_sub(bld, bld->one,
+ coord[2] = lp_build_sub(bld, ctx->ac.f32_1,
lp_build_add(bld, coord[0], coord[1]));
- value = lp_build_gather_values(gallivm, coord, 4);
+ value = lp_build_gather_values(&ctx->gallivm, coord, 4);
break;
}
@@ -1535,7 +1761,7 @@
if (ctx->type == PIPE_SHADER_TESS_CTRL)
value = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 26, 6);
else if (ctx->type == PIPE_SHADER_TESS_EVAL)
- value = unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
+ value = get_num_tcs_out_vertices(ctx);
else
assert(!"invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
break;
@@ -1566,18 +1792,18 @@
slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
buf = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
- buf = ac_build_indexed_load_const(&ctx->ac, buf, slot);
+ buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
offset = decl->Semantic.Name == TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0;
for (i = 0; i < 4; i++)
val[i] = buffer_load_const(ctx, buf,
LLVMConstInt(ctx->i32, (offset + i) * 4, 0));
- value = lp_build_gather_values(gallivm, val, 4);
+ value = lp_build_gather_values(&ctx->gallivm, val, 4);
break;
}
case TGSI_SEMANTIC_PRIMID:
- value = get_primitive_id(&ctx->bld_base, 0);
+ value = get_primitive_id(ctx, 0);
break;
case TGSI_SEMANTIC_GRID_SIZE:
@@ -1600,7 +1826,7 @@
for (i = 0; i < 3; ++i)
values[i] = LLVMConstInt(ctx->i32, sizes[i], 0);
- value = lp_build_gather_values(gallivm, values, 3);
+ value = lp_build_gather_values(&ctx->gallivm, values, 3);
} else {
value = LLVMGetParam(ctx->main_fn, ctx->param_block_size);
}
@@ -1618,7 +1844,7 @@
ctx->param_block_id[i]);
}
}
- value = lp_build_gather_values(gallivm, values, 3);
+ value = lp_build_gather_values(&ctx->gallivm, values, 3);
break;
}
@@ -1627,12 +1853,12 @@
break;
case TGSI_SEMANTIC_HELPER_INVOCATION:
- value = lp_build_intrinsic(gallivm->builder,
+ value = lp_build_intrinsic(ctx->ac.builder,
"llvm.amdgcn.ps.live",
ctx->i1, NULL, 0,
LP_FUNC_ATTR_READNONE);
- value = LLVMBuildNot(gallivm->builder, value, "");
- value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, "");
+ value = LLVMBuildNot(ctx->ac.builder, value, "");
+ value = LLVMBuildSExt(ctx->ac.builder, value, ctx->i32, "");
break;
case TGSI_SEMANTIC_SUBGROUP_SIZE:
@@ -1646,9 +1872,9 @@
case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
{
LLVMValueRef id = ac_get_thread_id(&ctx->ac);
- id = LLVMBuildZExt(gallivm->builder, id, ctx->i64, "");
- value = LLVMBuildShl(gallivm->builder, LLVMConstInt(ctx->i64, 1, 0), id, "");
- value = LLVMBuildBitCast(gallivm->builder, value, ctx->v2i32, "");
+ id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
+ value = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->i64, 1, 0), id, "");
+ value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
break;
}
@@ -1666,12 +1892,12 @@
/* All bits set */
value = LLVMConstInt(ctx->i64, -1, 0);
}
- id = LLVMBuildZExt(gallivm->builder, id, ctx->i64, "");
- value = LLVMBuildShl(gallivm->builder, value, id, "");
+ id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
+ value = LLVMBuildShl(ctx->ac.builder, value, id, "");
if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
- value = LLVMBuildNot(gallivm->builder, value, "");
- value = LLVMBuildBitCast(gallivm->builder, value, ctx->v2i32, "");
+ value = LLVMBuildNot(ctx->ac.builder, value, "");
+ value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
break;
}
@@ -1683,11 +1909,10 @@
ctx->system_values[index] = value;
}
-static void declare_compute_memory(struct si_shader_context *ctx,
- const struct tgsi_full_declaration *decl)
+void si_declare_compute_memory(struct si_shader_context *ctx,
+ const struct tgsi_full_declaration *decl)
{
struct si_shader_selector *sel = ctx->shader->selector;
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMTypeRef i8p = LLVMPointerType(ctx->i8, LOCAL_ADDR_SPACE);
LLVMValueRef var;
@@ -1696,13 +1921,13 @@
assert(decl->Range.First == decl->Range.Last);
assert(!ctx->shared_memory);
- var = LLVMAddGlobalInAddressSpace(gallivm->module,
+ var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
LLVMArrayType(ctx->i8, sel->local_size),
"compute_lds",
LOCAL_ADDR_SPACE);
LLVMSetAlignment(var, 4);
- ctx->shared_memory = LLVMBuildBitCast(gallivm->builder, var, i8p, "");
+ ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
}
static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
@@ -1710,8 +1935,35 @@
LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
ctx->param_const_and_shader_buffers);
- return ac_build_indexed_load_const(&ctx->ac, list_ptr,
- LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
+ return ac_build_load_to_sgpr(&ctx->ac, list_ptr,
+ LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
+}
+
+static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
+
+ index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
+ index = LLVMBuildAdd(ctx->ac.builder, index,
+ LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
+
+ return ac_build_load_to_sgpr(&ctx->ac, ptr, index);
+}
+
+static LLVMValueRef
+load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
+ ctx->param_const_and_shader_buffers);
+
+ index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
+ index = LLVMBuildSub(ctx->ac.builder,
+ LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
+ index, "");
+
+ return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
}
static LLVMValueRef fetch_constant(
@@ -1721,12 +1973,11 @@
unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct lp_build_context *base = &bld_base->base;
+ struct si_shader_selector *sel = ctx->shader->selector;
const struct tgsi_ind_register *ireg = ®->Indirect;
unsigned buf, idx;
LLVMValueRef addr, bufp;
- LLVMValueRef result;
if (swizzle == LP_CHAN_ALL) {
unsigned chan;
@@ -1737,54 +1988,98 @@
return lp_build_gather_values(&ctx->gallivm, values, 4);
}
- buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
+ /* Split 64-bit loads. */
+ if (tgsi_type_is_64bit(type)) {
+ LLVMValueRef lo, hi;
+
+ lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle);
+ hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle + 1);
+ return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+ }
+
idx = reg->Register.Index * 4 + swizzle;
+ if (reg->Register.Indirect) {
+ addr = si_get_indirect_index(ctx, ireg, 16, idx * 4);
+ } else {
+ addr = LLVMConstInt(ctx->i32, idx * 4, 0);
+ }
+
+ /* Fast path when user data SGPRs point to constant buffer 0 directly. */
+ if (sel->info.const_buffers_declared == 1 &&
+ sel->info.shader_buffers_declared == 0) {
+ LLVMValueRef ptr =
+ LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
+
+ /* This enables use of s_load_dword and flat_load_dword for const buffer 0
+ * loads, and up to x4 load opcode merging. However, it leads to horrible
+ * code reducing SIMD wave occupancy from 8 to 2 in many cases.
+ *
+ * Using s_buffer_load_dword (x1) seems to be the best option right now.
+ *
+ * LLVM 5.0 on SI doesn't insert a required s_nop between SALU setting
+ * a descriptor and s_buffer_load_dword using it, so we can't expand
+ * the pointer into a full descriptor like below. We have to use
+ * s_load_dword instead. The only case when LLVM 5.0 would select
+ * s_buffer_load_dword (that we have to prevent) is when we use use
+ * a literal offset where we don't need bounds checking.
+ */
+ if (ctx->screen->b.chip_class == SI &&
+ HAVE_LLVM < 0x0600 &&
+ !reg->Register.Indirect) {
+ addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
+ LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr);
+ return bitcast(bld_base, type, result);
+ }
+
+ /* Do the bounds checking with a descriptor, because
+ * doing computation and manual bounds checking of 64-bit
+ * addresses generates horrible VALU code with very high
+ * VGPR usage and very low SIMD occupancy.
+ */
+ ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->i64, "");
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
+
+ LLVMValueRef desc_elems[] = {
+ LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, ""),
+ LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, ""),
+ LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * 16, 0),
+ LLVMConstInt(ctx->i32,
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0)
+ };
+ LLVMValueRef desc = ac_build_gather_values(&ctx->ac, desc_elems, 4);
+ LLVMValueRef result = buffer_load_const(ctx, desc, addr);
+ return bitcast(bld_base, type, result);
+ }
- if (reg->Register.Dimension && reg->Dimension.Indirect) {
+ assert(reg->Register.Dimension);
+ buf = reg->Dimension.Index;
+
+ if (reg->Dimension.Indirect) {
LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
LLVMValueRef index;
index = si_get_bounded_indirect_index(ctx, ®->DimIndirect,
reg->Dimension.Index,
ctx->num_const_buffers);
- index = LLVMBuildAdd(ctx->gallivm.builder, index,
+ index = LLVMBuildAdd(ctx->ac.builder, index,
LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
- bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index);
+ bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
} else
bufp = load_const_buffer_desc(ctx, buf);
- if (reg->Register.Indirect) {
- addr = ctx->addrs[ireg->Index][ireg->Swizzle];
- addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
- addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
- addr = lp_build_add(&bld_base->uint_bld, addr,
- LLVMConstInt(ctx->i32, idx * 4, 0));
- } else {
- addr = LLVMConstInt(ctx->i32, idx * 4, 0);
- }
-
- result = buffer_load_const(ctx, bufp, addr);
-
- if (!tgsi_type_is_64bit(type))
- result = bitcast(bld_base, type, result);
- else {
- LLVMValueRef addr2, result2;
-
- addr2 = lp_build_add(&bld_base->uint_bld, addr,
- LLVMConstInt(ctx->i32, 4, 0));
- result2 = buffer_load_const(ctx, bufp, addr2);
-
- result = si_llvm_emit_fetch_64bit(bld_base, type,
- result, result2);
- }
- return result;
+ return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr));
}
/* Upper 16 bits must be zero. */
static LLVMValueRef si_llvm_pack_two_int16(struct si_shader_context *ctx,
LLVMValueRef val[2])
{
- return LLVMBuildOr(ctx->gallivm.builder, val[0],
- LLVMBuildShl(ctx->gallivm.builder, val[1],
+ return LLVMBuildOr(ctx->ac.builder, val[0],
+ LLVMBuildShl(ctx->ac.builder, val[1],
LLVMConstInt(ctx->i32, 16, 0),
""), "");
}
@@ -1794,7 +2089,7 @@
LLVMValueRef val[2])
{
LLVMValueRef v[2] = {
- LLVMBuildAnd(ctx->gallivm.builder, val[0],
+ LLVMBuildAnd(ctx->ac.builder, val[0],
LLVMConstInt(ctx->i32, 0xffff, 0), ""),
val[1],
};
@@ -1809,7 +2104,7 @@
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = &bld_base->base;
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef val[4];
unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
unsigned chan;
@@ -1878,9 +2173,7 @@
LLVMValueRef packed;
packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
- args->out[chan] =
- LLVMBuildBitCast(ctx->gallivm.builder,
- packed, ctx->f32, "");
+ args->out[chan] = ac_to_float(&ctx->ac, packed);
}
break;
@@ -1896,10 +2189,8 @@
}
args->compr = 1; /* COMPR flag */
- args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- si_llvm_pack_two_int16(ctx, val));
- args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- si_llvm_pack_two_int16(ctx, val+2));
+ args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val));
+ args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val+2));
break;
case V_028714_SPI_SHADER_SNORM16_ABGR:
@@ -1918,17 +2209,15 @@
val[chan] = LLVMBuildFAdd(builder, val[chan],
LLVMBuildSelect(builder,
LLVMBuildFCmp(builder, LLVMRealOGE,
- val[chan], base->zero, ""),
+ val[chan], ctx->ac.f32_0, ""),
LLVMConstReal(ctx->f32, 0.5),
LLVMConstReal(ctx->f32, -0.5), ""), "");
val[chan] = LLVMBuildFPToSI(builder, val[chan], ctx->i32, "");
}
args->compr = 1; /* COMPR flag */
- args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- si_llvm_pack_two_int32_as_int16(ctx, val));
- args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- si_llvm_pack_two_int32_as_int16(ctx, val+2));
+ args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val));
+ args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val+2));
break;
case V_028714_SPI_SHADER_UINT16_ABGR: {
@@ -1939,17 +2228,15 @@
/* Clamp. */
for (chan = 0; chan < 4; chan++) {
- val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
+ val[chan] = ac_to_integer(&ctx->ac, values[chan]);
val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN,
val[chan],
chan == 3 ? max_alpha : max_rgb);
}
args->compr = 1; /* COMPR flag */
- args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- si_llvm_pack_two_int16(ctx, val));
- args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- si_llvm_pack_two_int16(ctx, val+2));
+ args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val));
+ args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val+2));
break;
}
@@ -1965,7 +2252,7 @@
/* Clamp. */
for (chan = 0; chan < 4; chan++) {
- val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]);
+ val[chan] = ac_to_integer(&ctx->ac, values[chan]);
val[chan] = lp_build_emit_llvm_binary(bld_base,
TGSI_OPCODE_IMIN,
val[chan], chan == 3 ? max_alpha : max_rgb);
@@ -1975,10 +2262,8 @@
}
args->compr = 1; /* COMPR flag */
- args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- si_llvm_pack_two_int32_as_int16(ctx, val));
- args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- si_llvm_pack_two_int32_as_int16(ctx, val+2));
+ args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val));
+ args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val+2));
break;
}
@@ -2018,26 +2303,25 @@
unsigned samplemask_param)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef coverage;
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
coverage = LLVMGetParam(ctx->main_fn,
samplemask_param);
- coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
+ coverage = ac_to_integer(&ctx->ac, coverage);
- coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
+ coverage = lp_build_intrinsic(ctx->ac.builder, "llvm.ctpop.i32",
ctx->i32,
&coverage, 1, LP_FUNC_ATTR_READNONE);
- coverage = LLVMBuildUIToFP(gallivm->builder, coverage,
+ coverage = LLVMBuildUIToFP(ctx->ac.builder, coverage,
ctx->f32, "");
- coverage = LLVMBuildFMul(gallivm->builder, coverage,
+ coverage = LLVMBuildFMul(ctx->ac.builder, coverage,
LLVMConstReal(ctx->f32,
1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
- return LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
+ return LLVMBuildFMul(ctx->ac.builder, alpha, coverage, "");
}
static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base,
@@ -2052,7 +2336,7 @@
LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
LLVMValueRef constbuf_index = LLVMConstInt(ctx->i32,
SI_VS_CONST_CLIP_PLANES, 0);
- LLVMValueRef const_resource = ac_build_indexed_load_const(&ctx->ac, ptr, constbuf_index);
+ LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index);
for (reg_index = 0; reg_index < 2; reg_index ++) {
struct ac_export_args *args = &pos[2 + reg_index];
@@ -2112,8 +2396,6 @@
struct pipe_stream_output *stream_out,
struct si_shader_output_values *shader_out)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
unsigned buf_idx = stream_out->output_buffer;
unsigned start = stream_out->start_component;
unsigned num_comps = stream_out->num_components;
@@ -2127,9 +2409,7 @@
for (int j = 0; j < num_comps; j++) {
assert(stream_out->stream == shader_out->vertex_stream[start + j]);
- out[j] = LLVMBuildBitCast(builder,
- shader_out->values[start + j],
- ctx->i32, "");
+ out[j] = ac_to_integer(&ctx->ac, shader_out->values[start + j]);
}
/* Pack the output. */
@@ -2144,7 +2424,7 @@
case 4: /* as v4i32 */
vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps)));
for (int j = 0; j < num_comps; j++) {
- vdata = LLVMBuildInsertElement(builder, vdata, out[j],
+ vdata = LLVMBuildInsertElement(ctx->ac.builder, vdata, out[j],
LLVMConstInt(ctx->i32, j, 0), "");
}
break;
@@ -2167,8 +2447,7 @@
{
struct si_shader_selector *sel = ctx->shader->selector;
struct pipe_stream_output_info *so = &sel->so;
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
int i;
struct lp_build_if_state if_ctx;
@@ -2185,7 +2464,7 @@
/* Emit the streamout code conditionally. This actually avoids
* out-of-bounds buffer access. The hw tells us via the SGPR
* (so_vtx_count) which threads are allowed to emit streamout data. */
- lp_build_if(&if_ctx, gallivm, can_emit);
+ lp_build_if(&if_ctx, &ctx->gallivm, can_emit);
{
/* The buffer offset is computed as follows:
* ByteOffset = streamout_offset[buffer_id]*4 +
@@ -2214,7 +2493,7 @@
LLVMValueRef offset = LLVMConstInt(ctx->i32,
SI_VS_STREAMOUT_BUF0 + i, 0);
- so_buffers[i] = ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
+ so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
LLVMValueRef so_offset = LLVMGetParam(ctx->main_fn,
ctx->param_streamout_offset[i]);
@@ -2306,7 +2585,6 @@
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
- struct lp_build_context *base = &bld_base->base;
struct ac_export_args pos_args[4] = {};
LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
unsigned pos_idx;
@@ -2355,10 +2633,10 @@
pos_args[0].done = 0; /* last export? */
pos_args[0].target = V_008DFC_SQ_EXP_POS;
pos_args[0].compr = 0; /* COMPR flag */
- pos_args[0].out[0] = base->zero; /* X */
- pos_args[0].out[1] = base->zero; /* Y */
- pos_args[0].out[2] = base->zero; /* Z */
- pos_args[0].out[3] = base->one; /* W */
+ pos_args[0].out[0] = ctx->ac.f32_0; /* X */
+ pos_args[0].out[1] = ctx->ac.f32_0; /* Y */
+ pos_args[0].out[2] = ctx->ac.f32_0; /* Z */
+ pos_args[0].out[3] = ctx->ac.f32_1; /* W */
}
/* Write the misc vector (point size, edgeflag, layer, viewport). */
@@ -2374,10 +2652,10 @@
pos_args[1].done = 0; /* last export? */
pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
pos_args[1].compr = 0; /* COMPR flag */
- pos_args[1].out[0] = base->zero; /* X */
- pos_args[1].out[1] = base->zero; /* Y */
- pos_args[1].out[2] = base->zero; /* Z */
- pos_args[1].out[3] = base->zero; /* W */
+ pos_args[1].out[0] = ctx->ac.f32_0; /* X */
+ pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
+ pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
+ pos_args[1].out[3] = ctx->ac.f32_0; /* W */
if (shader->selector->info.writes_psize)
pos_args[1].out[0] = psize_value;
@@ -2385,17 +2663,15 @@
if (shader->selector->info.writes_edgeflag) {
/* The output is a float, but the hw expects an integer
* with the first bit containing the edge flag. */
- edgeflag_value = LLVMBuildFPToUI(ctx->gallivm.builder,
+ edgeflag_value = LLVMBuildFPToUI(ctx->ac.builder,
edgeflag_value,
ctx->i32, "");
- edgeflag_value = lp_build_min(&bld_base->int_bld,
+ edgeflag_value = ac_build_umin(&ctx->ac,
edgeflag_value,
ctx->i32_1);
/* The LLVM intrinsic expects a float. */
- pos_args[1].out[1] = LLVMBuildBitCast(ctx->gallivm.builder,
- edgeflag_value,
- ctx->f32, "");
+ pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value);
}
if (ctx->screen->b.chip_class >= GFX9) {
@@ -2408,13 +2684,12 @@
if (shader->selector->info.writes_viewport_index) {
LLVMValueRef v = viewport_index_value;
- v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, v);
- v = LLVMBuildShl(ctx->gallivm.builder, v,
+ v = ac_to_integer(&ctx->ac, v);
+ v = LLVMBuildShl(ctx->ac.builder, v,
LLVMConstInt(ctx->i32, 16, 0), "");
- v = LLVMBuildOr(ctx->gallivm.builder, v,
- bitcast(bld_base, TGSI_TYPE_UNSIGNED,
- pos_args[1].out[2]), "");
- pos_args[1].out[2] = bitcast(bld_base, TGSI_TYPE_FLOAT, v);
+ v = LLVMBuildOr(ctx->ac.builder, v,
+ ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
+ pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
pos_args[1].enabled_channels |= 1 << 2;
}
} else {
@@ -2458,7 +2733,6 @@
static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef invocation_id, buffer, buffer_offset;
LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
uint64_t inputs;
@@ -2467,17 +2741,17 @@
buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
- lds_vertex_stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
- lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
+ lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
+ lds_vertex_offset = LLVMBuildMul(ctx->ac.builder, invocation_id,
lds_vertex_stride, "");
lds_base = get_tcs_in_current_patch_offset(ctx);
- lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, "");
+ lds_base = LLVMBuildAdd(ctx->ac.builder, lds_base, lds_vertex_offset, "");
inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
while (inputs) {
unsigned i = u_bit_scan64(&inputs);
- LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base,
+ LLVMValueRef lds_ptr = LLVMBuildAdd(ctx->ac.builder, lds_base,
LLVMConstInt(ctx->i32, 4 * i, 0),
"");
@@ -2497,10 +2771,11 @@
static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
LLVMValueRef rel_patch_id,
LLVMValueRef invocation_id,
- LLVMValueRef tcs_out_current_patch_data_offset)
+ LLVMValueRef tcs_out_current_patch_data_offset,
+ LLVMValueRef invoc0_tf_outer[4],
+ LLVMValueRef invoc0_tf_inner[2])
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_shader *shader = ctx->shader;
unsigned tess_inner_index, tess_outer_index;
LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
@@ -2508,7 +2783,9 @@
unsigned stride, outer_comps, inner_comps, i, offset;
struct lp_build_if_state if_ctx, inner_if_ctx;
- si_llvm_emit_barrier(NULL, bld_base, NULL);
+ /* Add a barrier before loading tess factors from LDS. */
+ if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
+ si_llvm_emit_barrier(NULL, bld_base, NULL);
/* Do this only for invocation 0, because the tess levels are per-patch,
* not per-vertex.
@@ -2516,8 +2793,8 @@
* This can't jump, because invocation 0 executes this. It should
* at least mask out the loads and stores for other invocations.
*/
- lp_build_if(&if_ctx, gallivm,
- LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+ lp_build_if(&if_ctx, &ctx->gallivm,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
invocation_id, ctx->i32_0, ""));
/* Determine the layout of one tess factor element in the buffer. */
@@ -2542,32 +2819,32 @@
return;
}
- /* Load tess_inner and tess_outer from LDS.
- * Any invocation can write them, so we can't get them from a temporary.
- */
- tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
- tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
-
- lds_base = tcs_out_current_patch_data_offset;
- lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
- LLVMConstInt(ctx->i32,
- tess_inner_index * 4, 0), "");
- lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
- LLVMConstInt(ctx->i32,
- tess_outer_index * 4, 0), "");
-
for (i = 0; i < 4; i++) {
inner[i] = LLVMGetUndef(ctx->i32);
outer[i] = LLVMGetUndef(ctx->i32);
}
- if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
- /* For isolines, the hardware expects tess factors in the
- * reverse order from what GLSL / TGSI specify.
- */
- outer[0] = out[1] = lds_load(bld_base, TGSI_TYPE_SIGNED, 0, lds_outer);
- outer[1] = out[0] = lds_load(bld_base, TGSI_TYPE_SIGNED, 1, lds_outer);
+ if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
+ /* Tess factors are in VGPRs. */
+ for (i = 0; i < outer_comps; i++)
+ outer[i] = out[i] = invoc0_tf_outer[i];
+ for (i = 0; i < inner_comps; i++)
+ inner[i] = out[outer_comps+i] = invoc0_tf_inner[i];
} else {
+ /* Load tess_inner and tess_outer from LDS.
+ * Any invocation can write them, so we can't get them from a temporary.
+ */
+ tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
+ tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
+
+ lds_base = tcs_out_current_patch_data_offset;
+ lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->i32,
+ tess_inner_index * 4, 0), "");
+ lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->i32,
+ tess_outer_index * 4, 0), "");
+
for (i = 0; i < outer_comps; i++) {
outer[i] = out[i] =
lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
@@ -2578,12 +2855,21 @@
}
}
+ if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
+ /* For isolines, the hardware expects tess factors in the
+ * reverse order from what GLSL / TGSI specify.
+ */
+ LLVMValueRef tmp = out[0];
+ out[0] = out[1];
+ out[1] = tmp;
+ }
+
/* Convert the outputs to vectors for stores. */
- vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
+ vec0 = lp_build_gather_values(&ctx->gallivm, out, MIN2(stride, 4));
vec1 = NULL;
if (stride > 4)
- vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
+ vec1 = lp_build_gather_values(&ctx->gallivm, out+4, stride - 4);
/* Get the buffer. */
buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_factor_addr_base64k);
@@ -2591,11 +2877,11 @@
/* Get the offset. */
tf_base = LLVMGetParam(ctx->main_fn,
ctx->param_tcs_factor_offset);
- byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
+ byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
LLVMConstInt(ctx->i32, 4 * stride, 0), "");
- lp_build_if(&inner_if_ctx, gallivm,
- LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+ lp_build_if(&inner_if_ctx, &ctx->gallivm,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
rel_patch_id, ctx->i32_0, ""));
/* Store the dynamic HS control word. */
@@ -2634,7 +2920,7 @@
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->i32, param_outer, 0));
- outer_vec = lp_build_gather_values(gallivm, outer,
+ outer_vec = lp_build_gather_values(&ctx->gallivm, outer,
util_next_power_of_two(outer_comps));
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
@@ -2647,7 +2933,7 @@
LLVMConstInt(ctx->i32, param_inner, 0));
inner_vec = inner_comps == 1 ? inner[0] :
- lp_build_gather_values(gallivm, inner, inner_comps);
+ lp_build_gather_values(&ctx->gallivm, inner, inner_comps);
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
inner_comps, tf_inner_offset,
base, 0, 1, 0, true, false);
@@ -2661,7 +2947,7 @@
si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
unsigned param, unsigned return_index)
{
- return LLVMBuildInsertValue(ctx->gallivm.builder, ret,
+ return LLVMBuildInsertValue(ctx->ac.builder, ret,
LLVMGetParam(ctx->main_fn, param),
return_index, "");
}
@@ -2670,11 +2956,11 @@
si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
unsigned param, unsigned return_index)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef p = LLVMGetParam(ctx->main_fn, param);
return LLVMBuildInsertValue(builder, ret,
- LLVMBuildBitCast(builder, p, ctx->f32, ""),
+ ac_to_float(&ctx->ac, p),
return_index, "");
}
@@ -2682,7 +2968,7 @@
si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret,
unsigned param, unsigned return_index)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef ptr, lo, hi;
ptr = LLVMGetParam(ctx->main_fn, param);
@@ -2698,7 +2984,7 @@
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
si_copy_tcs_inputs(bld_base);
@@ -2760,13 +3046,30 @@
}
/* VGPRs */
- rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
- invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
- tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
+ rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
+ invocation_id = ac_to_float(&ctx->ac, invocation_id);
+ tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
+
+ /* Leave a hole corresponding to the two input VGPRs. This ensures that
+ * the invocation_id output does not alias the param_tcs_rel_ids input,
+ * which saves a V_MOV on gfx9.
+ */
+ vgpr += 2;
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
- ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
+
+ if (ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
+ vgpr++; /* skip the tess factor LDS offset */
+ for (unsigned i = 0; i < 6; i++) {
+ LLVMValueRef value =
+ LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
+ value = ac_to_float(&ctx->ac, value);
+ ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
+ }
+ } else {
+ ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
+ }
ctx->return_value = ret;
}
@@ -2775,12 +3078,17 @@
{
LLVMValueRef ret = ctx->return_value;
- ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers,
+ 8 + SI_SGPR_RW_BUFFERS);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret,
+ ctx->param_bindless_samplers_and_images,
+ 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
+
ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
8 + SI_SGPR_VS_STATE_BITS);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
@@ -2813,12 +3121,16 @@
{
LLVMValueRef ret = ctx->return_value;
- ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
-
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers,
+ 8 + SI_SGPR_RW_BUFFERS);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret,
+ ctx->param_bindless_samplers_and_images,
+ 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
+
unsigned desc_param = ctx->param_vs_state_bits + 1;
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
@@ -2838,13 +3150,11 @@
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
- struct gallivm_state *gallivm = &ctx->gallivm;
unsigned i, chan;
LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn,
ctx->param_rel_auto_id);
- LLVMValueRef vertex_dw_stride =
- unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
- LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
+ LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
+ LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
vertex_dw_stride, "");
/* Write outputs to LDS. The next shader (TCS aka HS) will read
@@ -2874,12 +3184,12 @@
continue;
int param = si_shader_io_get_unique_index(name, index);
- LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
+ LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
LLVMConstInt(ctx->i32, param * 4, 0), "");
for (chan = 0; chan < 4; chan++) {
lds_store(bld_base, chan, dw_addr,
- LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
+ LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], ""));
}
}
@@ -2890,7 +3200,6 @@
static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_shader *es = ctx->shader;
struct tgsi_shader_info *info = &es->selector->info;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
@@ -2903,10 +3212,10 @@
unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
LLVMValueRef wave_idx = unpack_param(ctx, ctx->param_merged_wave_info, 24, 4);
- vertex_idx = LLVMBuildOr(gallivm->builder, vertex_idx,
- LLVMBuildMul(gallivm->builder, wave_idx,
+ vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
+ LLVMBuildMul(ctx->ac.builder, wave_idx,
LLVMConstInt(ctx->i32, 64, false), ""), "");
- lds_base = LLVMBuildMul(gallivm->builder, vertex_idx,
+ lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
}
@@ -2922,8 +3231,8 @@
info->output_semantic_index[i]);
for (chan = 0; chan < 4; chan++) {
- LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
- out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, "");
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
+ out_val = ac_to_integer(&ctx->ac, out_val);
/* GFX9 has the ESGS ring in LDS. */
if (ctx->screen->b.chip_class >= GFX9) {
@@ -2962,15 +3271,17 @@
lp_build_endif(&ctx->merged_wrap_if_state);
}
-static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct tgsi_shader_info *info = &ctx->shader->selector->info;
struct si_shader_output_values *outputs = NULL;
int i,j;
assert(!ctx->shader->is_gs_copy_shader);
+ assert(info->num_outputs <= max_outputs);
outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
@@ -2995,16 +3306,16 @@
/* The state is in the first bit of the user SGPR. */
cond = LLVMGetParam(ctx->main_fn,
ctx->param_vs_state_bits);
- cond = LLVMBuildTrunc(gallivm->builder, cond,
+ cond = LLVMBuildTrunc(ctx->ac.builder, cond,
ctx->i1, "");
- lp_build_if(&if_ctx, gallivm, cond);
+ lp_build_if(&if_ctx, &ctx->gallivm, cond);
}
for (j = 0; j < 4; j++) {
- addr = ctx->outputs[i][j];
- val = LLVMBuildLoad(gallivm->builder, addr, "");
+ addr = addrs[4 * i + j];
+ val = LLVMBuildLoad(ctx->ac.builder, addr, "");
val = ac_build_clamp(&ctx->ac, val);
- LLVMBuildStore(gallivm->builder, val, addr);
+ LLVMBuildStore(ctx->ac.builder, val, addr);
}
}
@@ -3018,8 +3329,8 @@
for (j = 0; j < 4; j++) {
outputs[i].values[j] =
- LLVMBuildLoad(gallivm->builder,
- ctx->outputs[i][j],
+ LLVMBuildLoad(ctx->ac.builder,
+ addrs[4 * i + j],
"");
outputs[i].vertex_stream[j] =
(info->output_streams[i] >> (2 * j)) & 3;
@@ -3033,8 +3344,7 @@
if (ctx->shader->key.mono.u.vs_export_prim_id) {
outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
outputs[i].semantic_index = 0;
- outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
- get_primitive_id(bld_base, 0));
+ outputs[i].values[0] = ac_to_float(&ctx->ac, get_primitive_id(ctx, 0));
for (j = 1; j < 4; j++)
outputs[i].values[j] = LLVMConstReal(ctx->f32, 0);
@@ -3043,10 +3353,18 @@
i++;
}
- si_llvm_export_vs(bld_base, outputs, i);
+ si_llvm_export_vs(&ctx->bld_base, outputs, i);
FREE(outputs);
}
+static void si_tgsi_emit_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ ctx->abi.emit_outputs(&ctx->abi, RADEON_LLVM_MAX_OUTPUTS,
+ &ctx->outputs[0][0]);
+}
+
struct si_ps_exports {
unsigned num;
struct ac_export_args args[10];
@@ -3103,10 +3421,10 @@
if (stencil) {
/* Stencil should be in X[23:16]. */
- stencil = bitcast(bld_base, TGSI_TYPE_UNSIGNED, stencil);
- stencil = LLVMBuildShl(ctx->gallivm.builder, stencil,
+ stencil = ac_to_integer(&ctx->ac, stencil);
+ stencil = LLVMBuildShl(ctx->ac.builder, stencil,
LLVMConstInt(ctx->i32, 16, 0), "");
- args.out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, stencil);
+ args.out[0] = ac_to_float(&ctx->ac, stencil);
mask |= 0x3;
}
if (samplemask) {
@@ -3148,7 +3466,6 @@
bool is_last, struct si_ps_exports *exp)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct lp_build_context *base = &bld_base->base;
int i;
/* Clamp color */
@@ -3158,7 +3475,7 @@
/* Alpha to one */
if (ctx->shader->key.part.ps.epilog.alpha_to_one)
- color[3] = base->one;
+ color[3] = ctx->ac.f32_1;
/* Alpha test */
if (index == 0 &&
@@ -3248,12 +3565,14 @@
*
* The alpha-ref SGPR is returned via its original location.
*/
-static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
unsigned i, j, first_vgpr, vgpr;
LLVMValueRef color[8][4] = {};
@@ -3272,22 +3591,22 @@
case TGSI_SEMANTIC_COLOR:
assert(semantic_index < 8);
for (j = 0; j < 4; j++) {
- LLVMValueRef ptr = ctx->outputs[i][j];
+ LLVMValueRef ptr = addrs[4 * i + j];
LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
color[semantic_index][j] = result;
}
break;
case TGSI_SEMANTIC_POSITION:
depth = LLVMBuildLoad(builder,
- ctx->outputs[i][2], "");
+ addrs[4 * i + 2], "");
break;
case TGSI_SEMANTIC_STENCIL:
stencil = LLVMBuildLoad(builder,
- ctx->outputs[i][1], "");
+ addrs[4 * i + 1], "");
break;
case TGSI_SEMANTIC_SAMPLEMASK:
samplemask = LLVMBuildLoad(builder,
- ctx->outputs[i][0], "");
+ addrs[4 * i + 0], "");
break;
default:
fprintf(stderr, "Warning: SI unhandled fs output type:%d\n",
@@ -3300,9 +3619,9 @@
/* Set SGPRs. */
ret = LLVMBuildInsertValue(builder, ret,
- bitcast(bld_base, TGSI_TYPE_SIGNED,
- LLVMGetParam(ctx->main_fn,
- SI_PARAM_ALPHA_REF)),
+ ac_to_integer(&ctx->ac,
+ LLVMGetParam(ctx->main_fn,
+ SI_PARAM_ALPHA_REF)),
SI_SGPR_ALPHA_REF, "");
/* Set VGPRs */
@@ -3331,55 +3650,12 @@
ctx->return_value = ret;
}
-/* Prevent optimizations (at least of memory accesses) across the current
- * point in the program by emitting empty inline assembly that is marked as
- * having side effects.
- *
- * Optionally, a value can be passed through the inline assembly to prevent
- * LLVM from hoisting calls to ReadNone functions.
- */
-static void emit_optimization_barrier(struct si_shader_context *ctx,
- LLVMValueRef *pvgpr)
-{
- static int counter = 0;
-
- LLVMBuilderRef builder = ctx->gallivm.builder;
- char code[16];
-
- snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
-
- if (!pvgpr) {
- LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
- LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
- LLVMBuildCall(builder, inlineasm, NULL, 0, "");
- } else {
- LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
- LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
- LLVMValueRef vgpr = *pvgpr;
- LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
- unsigned vgpr_size = llvm_get_type_size(vgpr_type);
- LLVMValueRef vgpr0;
-
- assert(vgpr_size % 4 == 0);
-
- vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
- vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
- vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
- vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
- vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
-
- *pvgpr = vgpr;
- }
-}
-
void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef args[1] = {
LLVMConstInt(ctx->i32, simm16, 0)
};
- lp_build_intrinsic(builder, "llvm.amdgcn.s.waitcnt",
+ lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.s.waitcnt",
ctx->voidt, args, 1, 0);
}
@@ -3414,17 +3690,16 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef tmp;
- tmp = lp_build_intrinsic(gallivm->builder, "llvm.readcyclecounter",
+ tmp = lp_build_intrinsic(ctx->ac.builder, "llvm.readcyclecounter",
ctx->i64, NULL, 0, 0);
- tmp = LLVMBuildBitCast(gallivm->builder, tmp, ctx->v2i32, "");
+ tmp = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->v2i32, "");
emit_data->output[0] =
- LLVMBuildExtractElement(gallivm->builder, tmp, ctx->i32_0, "");
+ LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_0, "");
emit_data->output[1] =
- LLVMBuildExtractElement(gallivm->builder, tmp, ctx->i32_1, "");
+ LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_1, "");
}
LLVMTypeRef si_const_array(LLVMTypeRef elem_type, int num_elements)
@@ -3439,7 +3714,6 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
unsigned opcode = emit_data->info->opcode;
LLVMValueRef val;
int idx;
@@ -3455,9 +3729,8 @@
/* for DDX we want to next X pixel, DDY next Y pixel. */
idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
- val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
- val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
- mask, idx, val);
+ val = ac_to_integer(&ctx->ac, emit_data->args[0]);
+ val = ac_build_ddxy(&ctx->ac, mask, idx, val);
emit_data->output[emit_data->chan] = val;
}
@@ -3471,18 +3744,17 @@
LLVMValueRef interp_ij)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef result[4], a;
unsigned i;
for (i = 0; i < 2; i++) {
- a = LLVMBuildExtractElement(gallivm->builder, interp_ij,
+ a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
LLVMConstInt(ctx->i32, i, 0), "");
result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, a);
result[2+i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDY, a);
}
- return lp_build_gather_values(gallivm, result, 4);
+ return lp_build_gather_values(&ctx->gallivm, result, 4);
}
static void interp_fetch_args(
@@ -3490,7 +3762,6 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
const struct tgsi_full_instruction *inst = emit_data->inst;
if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
@@ -3512,19 +3783,44 @@
*/
sample_id = lp_build_emit_fetch(bld_base,
emit_data->inst, 1, TGSI_CHAN_X);
- sample_id = LLVMBuildBitCast(gallivm->builder, sample_id,
- ctx->i32, "");
- sample_position = load_sample_position(ctx, sample_id);
+ sample_id = ac_to_integer(&ctx->ac, sample_id);
+
+ /* Section 8.13.2 (Interpolation Functions) of the OpenGL Shading
+ * Language 4.50 spec says about interpolateAtSample:
+ *
+ * "Returns the value of the input interpolant variable at
+ * the location of sample number sample. If multisample
+ * buffers are not available, the input variable will be
+ * evaluated at the center of the pixel. If sample sample
+ * does not exist, the position used to interpolate the
+ * input variable is undefined."
+ *
+ * This means that sample_id values outside of the valid are
+ * in fact valid input, and the usual mechanism for loading the
+ * sample position doesn't work.
+ */
+ if (ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) {
+ LLVMValueRef center[4] = {
+ LLVMConstReal(ctx->f32, 0.5),
+ LLVMConstReal(ctx->f32, 0.5),
+ ctx->ac.f32_0,
+ ctx->ac.f32_0,
+ };
+
+ sample_position = lp_build_gather_values(&ctx->gallivm, center, 4);
+ } else {
+ sample_position = load_sample_position(ctx, sample_id);
+ }
- emit_data->args[0] = LLVMBuildExtractElement(gallivm->builder,
+ emit_data->args[0] = LLVMBuildExtractElement(ctx->ac.builder,
sample_position,
ctx->i32_0, "");
- emit_data->args[0] = LLVMBuildFSub(gallivm->builder, emit_data->args[0], halfval, "");
- emit_data->args[1] = LLVMBuildExtractElement(gallivm->builder,
+ emit_data->args[0] = LLVMBuildFSub(ctx->ac.builder, emit_data->args[0], halfval, "");
+ emit_data->args[1] = LLVMBuildExtractElement(ctx->ac.builder,
sample_position,
ctx->i32_1, "");
- emit_data->args[1] = LLVMBuildFSub(gallivm->builder, emit_data->args[1], halfval, "");
+ emit_data->args[1] = LLVMBuildFSub(ctx->ac.builder, emit_data->args[1], halfval, "");
emit_data->arg_count = 2;
}
}
@@ -3535,7 +3831,6 @@
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
- struct gallivm_state *gallivm = &ctx->gallivm;
const struct tgsi_shader_info *info = &shader->selector->info;
LLVMValueRef interp_param;
const struct tgsi_full_instruction *inst = emit_data->inst;
@@ -3562,8 +3857,8 @@
input_array_size = info->num_inputs - input_base;
}
- array_idx = get_indirect_index(ctx, &input->Indirect,
- input->Register.Index - input_base);
+ array_idx = si_get_indirect_index(ctx, &input->Indirect,
+ 1, input->Register.Index - input_base);
} else {
input_base = inst->Src[0].Register.Index;
input_array_size = 1;
@@ -3602,32 +3897,29 @@
for (i = 0; i < 2; i++) {
LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, 0);
LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, 0);
- LLVMValueRef ddx_el = LLVMBuildExtractElement(gallivm->builder,
+ LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
ddxy_out, ix_ll, "");
- LLVMValueRef ddy_el = LLVMBuildExtractElement(gallivm->builder,
+ LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
ddxy_out, iy_ll, "");
- LLVMValueRef interp_el = LLVMBuildExtractElement(gallivm->builder,
+ LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
interp_param, ix_ll, "");
LLVMValueRef temp1, temp2;
- interp_el = LLVMBuildBitCast(gallivm->builder, interp_el,
- ctx->f32, "");
+ interp_el = ac_to_float(&ctx->ac, interp_el);
- temp1 = LLVMBuildFMul(gallivm->builder, ddx_el, emit_data->args[0], "");
+ temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, emit_data->args[0], "");
- temp1 = LLVMBuildFAdd(gallivm->builder, temp1, interp_el, "");
+ temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, "");
- temp2 = LLVMBuildFMul(gallivm->builder, ddy_el, emit_data->args[1], "");
+ temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, emit_data->args[1], "");
- ij_out[i] = LLVMBuildFAdd(gallivm->builder, temp2, temp1, "");
+ ij_out[i] = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, "");
}
- interp_param = lp_build_gather_values(gallivm, ij_out, 2);
+ interp_param = lp_build_gather_values(&ctx->gallivm, ij_out, 2);
}
- if (interp_param) {
- interp_param = LLVMBuildBitCast(gallivm->builder,
- interp_param, LLVMVectorType(ctx->f32, 2), "");
- }
+ if (interp_param)
+ interp_param = ac_to_float(&ctx->ac, interp_param);
for (chan = 0; chan < 4; chan++) {
LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, input_array_size));
@@ -3637,67 +3929,33 @@
LLVMValueRef v, i = NULL, j = NULL;
if (interp_param) {
- interp_param = LLVMBuildBitCast(gallivm->builder,
- interp_param, LLVMVectorType(ctx->f32, 2), "");
i = LLVMBuildExtractElement(
- gallivm->builder, interp_param, ctx->i32_0, "");
+ ctx->ac.builder, interp_param, ctx->i32_0, "");
j = LLVMBuildExtractElement(
- gallivm->builder, interp_param, ctx->i32_1, "");
+ ctx->ac.builder, interp_param, ctx->i32_1, "");
}
v = si_build_fs_interp(ctx, input_base + idx, schan,
prim_mask, i, j);
- gather = LLVMBuildInsertElement(gallivm->builder,
+ gather = LLVMBuildInsertElement(ctx->ac.builder,
gather, v, LLVMConstInt(ctx->i32, idx, false), "");
}
emit_data->output[chan] = LLVMBuildExtractElement(
- gallivm->builder, gather, array_idx, "");
+ ctx->ac.builder, gather, array_idx, "");
}
}
-static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
- LLVMValueRef value)
-{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMValueRef args[3] = {
- value,
- ctx->i32_0,
- LLVMConstInt(ctx->i32, LLVMIntNE, 0)
- };
-
- /* We currently have no other way to prevent LLVM from lifting the icmp
- * calls to a dominating basic block.
- */
- emit_optimization_barrier(ctx, &args[0]);
-
- if (LLVMTypeOf(args[0]) != ctx->i32)
- args[0] = LLVMBuildBitCast(gallivm->builder, args[0], ctx->i32, "");
-
- return lp_build_intrinsic(gallivm->builder,
- "llvm.amdgcn.icmp.i32",
- ctx->i64, args, 3,
- LP_FUNC_ATTR_NOUNWIND |
- LP_FUNC_ATTR_READNONE |
- LP_FUNC_ATTR_CONVERGENT);
-}
-
static void vote_all_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMValueRef active_set, vote_set;
- LLVMValueRef tmp;
-
- active_set = si_emit_ballot(ctx, ctx->i32_1);
- vote_set = si_emit_ballot(ctx, emit_data->args[0]);
- tmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
+ LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
- LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+ LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
}
static void vote_any_emit(
@@ -3706,16 +3964,10 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMValueRef vote_set;
- LLVMValueRef tmp;
-
- vote_set = si_emit_ballot(ctx, emit_data->args[0]);
- tmp = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
- vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
+ LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
- LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+ LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
}
static void vote_eq_emit(
@@ -3724,19 +3976,10 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMValueRef active_set, vote_set;
- LLVMValueRef all, none, tmp;
-
- active_set = si_emit_ballot(ctx, ctx->i32_1);
- vote_set = si_emit_ballot(ctx, emit_data->args[0]);
-
- all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, "");
- none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
- vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
- tmp = LLVMBuildOr(gallivm->builder, all, none, "");
+
+ LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, emit_data->args[0]);
emit_data->output[emit_data->chan] =
- LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
+ LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
}
static void ballot_emit(
@@ -3745,11 +3988,11 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef tmp;
tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
- tmp = si_emit_ballot(ctx, tmp);
+ tmp = ac_build_ballot(&ctx->ac, tmp);
tmp = LLVMBuildBitCast(builder, tmp, ctx->v2i32, "");
emit_data->output[0] = LLVMBuildExtractElement(builder, tmp, ctx->i32_0, "");
@@ -3775,17 +4018,14 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->gallivm.builder;
/* We currently have no other way to prevent LLVM from lifting the icmp
* calls to a dominating basic block.
*/
- emit_optimization_barrier(ctx, &emit_data->args[0]);
+ ac_build_optimization_barrier(&ctx->ac, &emit_data->args[0]);
- for (unsigned i = 0; i < emit_data->arg_count; ++i) {
- emit_data->args[i] = LLVMBuildBitCast(builder, emit_data->args[i],
- ctx->i32, "");
- }
+ for (unsigned i = 0; i < emit_data->arg_count; ++i)
+ emit_data->args[i] = ac_to_integer(&ctx->ac, emit_data->args[i]);
emit_data->output[emit_data->chan] =
ac_build_intrinsic(&ctx->ac, action->intr_name,
@@ -3819,7 +4059,6 @@
struct lp_build_context *uint = &bld_base->uint_bld;
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
- struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
ctx->param_gs2vs_offset);
@@ -3832,7 +4071,7 @@
stream = si_llvm_get_stream(bld_base, emit_data);
/* Write vertex attribute values to GSVS ring */
- gs_next_vertex = LLVMBuildLoad(gallivm->builder,
+ gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
ctx->gs_next_vertex[stream],
"");
@@ -3844,7 +4083,7 @@
* further memory loads and may allow LLVM to skip to the end
* altogether.
*/
- can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULT, gs_next_vertex,
+ can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
LLVMConstInt(ctx->i32,
shader->selector->gs_max_out_vertices, 0), "");
@@ -3856,7 +4095,7 @@
ac_build_kill(&ctx->ac, kill);
} else {
- lp_build_if(&if_state, gallivm, can_emit);
+ lp_build_if(&if_state, &ctx->gallivm, can_emit);
}
offset = 0;
@@ -3868,7 +4107,7 @@
((info->output_streams[i] >> (2 * chan)) & 3) != stream)
continue;
- LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
LLVMValueRef voffset =
LLVMConstInt(ctx->i32, offset *
shader->selector->gs_max_out_vertices, 0);
@@ -3877,7 +4116,7 @@
voffset = lp_build_add(uint, voffset, gs_next_vertex);
voffset = lp_build_mul_imm(uint, voffset, 4);
- out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, "");
+ out_val = ac_to_integer(&ctx->ac, out_val);
ac_build_buffer_store_dword(&ctx->ac,
ctx->gsvs_ring[stream],
@@ -3890,7 +4129,7 @@
gs_next_vertex = lp_build_add(uint, gs_next_vertex,
ctx->i32_1);
- LLVMBuildStore(gallivm->builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
+ LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
/* Signal vertex emission */
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
@@ -3919,7 +4158,6 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
/* SI only (thanks to a hw bug workaround):
* The real barrier instruction isn’t needed, because an entire patch
@@ -3931,7 +4169,7 @@
return;
}
- lp_build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(ctx->ac.builder,
"llvm.amdgcn.s.barrier",
ctx->voidt, NULL, 0, LP_FUNC_ATTR_CONVERGENT);
}
@@ -3944,16 +4182,16 @@
static void si_create_function(struct si_shader_context *ctx,
const char *name,
LLVMTypeRef *returns, unsigned num_returns,
- LLVMTypeRef *params, unsigned num_params,
- int last_sgpr, unsigned max_workgroup_size)
+ struct si_function_info *fninfo,
+ unsigned max_workgroup_size)
{
int i;
si_llvm_create_func(ctx, name, returns, num_returns,
- params, num_params);
+ fninfo->types, fninfo->num_params);
ctx->return_value = LLVMGetUndef(ctx->return_type);
- for (i = 0; i <= last_sgpr; ++i) {
+ for (i = 0; i < fninfo->num_sgpr_params; ++i) {
LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
/* The combination of:
@@ -3971,6 +4209,11 @@
lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
}
+ for (i = 0; i < fninfo->num_params; ++i) {
+ if (fninfo->assign[i])
+ *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
+ }
+
if (max_workgroup_size) {
si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size",
max_workgroup_size);
@@ -3979,7 +4222,7 @@
"no-signed-zeros-fp-math",
"true");
- if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
+ if (ctx->screen->b.debug_flags & DBG(UNSAFE_MATH)) {
/* These were copied from some LLVM test. */
LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
"less-precise-fpmad",
@@ -3998,58 +4241,32 @@
static void declare_streamout_params(struct si_shader_context *ctx,
struct pipe_stream_output_info *so,
- LLVMTypeRef *params, LLVMTypeRef i32,
- unsigned *num_params)
+ struct si_function_info *fninfo)
{
int i;
/* Streamout SGPRs. */
if (so->num_outputs) {
if (ctx->type != PIPE_SHADER_TESS_EVAL)
- params[ctx->param_streamout_config = (*num_params)++] = i32;
+ ctx->param_streamout_config = add_arg(fninfo, ARG_SGPR, ctx->ac.i32);
else
- ctx->param_streamout_config = *num_params - 1;
+ ctx->param_streamout_config = fninfo->num_params - 1;
- params[ctx->param_streamout_write_index = (*num_params)++] = i32;
+ ctx->param_streamout_write_index = add_arg(fninfo, ARG_SGPR, ctx->ac.i32);
}
/* A streamout buffer offset is loaded if the stride is non-zero. */
for (i = 0; i < 4; i++) {
if (!so->stride[i])
continue;
- params[ctx->param_streamout_offset[i] = (*num_params)++] = i32;
- }
-}
-
-static unsigned llvm_get_type_size(LLVMTypeRef type)
-{
- LLVMTypeKind kind = LLVMGetTypeKind(type);
-
- switch (kind) {
- case LLVMIntegerTypeKind:
- return LLVMGetIntTypeWidth(type) / 8;
- case LLVMFloatTypeKind:
- return 4;
- case LLVMPointerTypeKind:
- return 8;
- case LLVMVectorTypeKind:
- return LLVMGetVectorSize(type) *
- llvm_get_type_size(LLVMGetElementType(type));
- case LLVMArrayTypeKind:
- return LLVMGetArrayLength(type) *
- llvm_get_type_size(LLVMGetElementType(type));
- default:
- assert(0);
- return 0;
+ ctx->param_streamout_offset[i] = add_arg(fninfo, ARG_SGPR, ctx->ac.i32);
}
}
static void declare_lds_as_pointer(struct si_shader_context *ctx)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
-
unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
- ctx->lds = LLVMBuildIntToPtr(gallivm->builder, ctx->i32_0,
+ ctx->lds = LLVMBuildIntToPtr(ctx->ac.builder, ctx->i32_0,
LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
"lds");
}
@@ -4088,74 +4305,84 @@
}
static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
- LLVMTypeRef *params,
- unsigned *num_params,
+ struct si_function_info *fninfo,
bool assign_params)
{
- params[(*num_params)++] = si_const_array(ctx->v4i32,
- SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
- params[(*num_params)++] = si_const_array(ctx->v8i32,
- SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2);
+ LLVMTypeRef const_shader_buf_type;
+
+ if (ctx->shader->selector->info.const_buffers_declared == 1 &&
+ ctx->shader->selector->info.shader_buffers_declared == 0)
+ const_shader_buf_type = ctx->f32;
+ else
+ const_shader_buf_type = ctx->v4i32;
+
+ unsigned const_and_shader_buffers =
+ add_arg(fninfo, ARG_SGPR,
+ si_const_array(const_shader_buf_type, 0));
+
+ unsigned samplers_and_images =
+ add_arg(fninfo, ARG_SGPR,
+ si_const_array(ctx->v8i32,
+ SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2));
if (assign_params) {
- ctx->param_const_and_shader_buffers = *num_params - 2;
- ctx->param_samplers_and_images = *num_params - 1;
+ ctx->param_const_and_shader_buffers = const_and_shader_buffers;
+ ctx->param_samplers_and_images = samplers_and_images;
}
}
-static void declare_default_desc_pointers(struct si_shader_context *ctx,
- LLVMTypeRef *params,
- unsigned *num_params)
+static void declare_global_desc_pointers(struct si_shader_context *ctx,
+ struct si_function_info *fninfo)
{
- params[ctx->param_rw_buffers = (*num_params)++] =
- si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
- declare_per_stage_desc_pointers(ctx, params, num_params, true);
+ ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
+ si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
+ ctx->param_bindless_samplers_and_images = add_arg(fninfo, ARG_SGPR,
+ si_const_array(ctx->v8i32, 0));
}
static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
- LLVMTypeRef *params,
- unsigned *num_params)
+ struct si_function_info *fninfo)
{
- params[ctx->param_vertex_buffers = (*num_params)++] =
- si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS);
- params[ctx->param_base_vertex = (*num_params)++] = ctx->i32;
- params[ctx->param_start_instance = (*num_params)++] = ctx->i32;
- params[ctx->param_draw_id = (*num_params)++] = ctx->i32;
- params[ctx->param_vs_state_bits = (*num_params)++] = ctx->i32;
+ ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
+ si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS));
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
+ add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
+ ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
}
static void declare_vs_input_vgprs(struct si_shader_context *ctx,
- LLVMTypeRef *params, unsigned *num_params,
+ struct si_function_info *fninfo,
unsigned *num_prolog_vgprs)
{
struct si_shader *shader = ctx->shader;
- params[ctx->param_vertex_id = (*num_params)++] = ctx->i32;
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.vertex_id);
if (shader->key.as_ls) {
- params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32;
- params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
+ ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
} else {
- params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
- params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32;
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
+ ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
}
- params[(*num_params)++] = ctx->i32; /* unused */
+ add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */
if (!shader->is_gs_copy_shader) {
/* Vertex load indices. */
- ctx->param_vertex_index0 = (*num_params);
+ ctx->param_vertex_index0 = fninfo->num_params;
for (unsigned i = 0; i < shader->selector->info.num_inputs; i++)
- params[(*num_params)++] = ctx->i32;
+ add_arg(fninfo, ARG_VGPR, ctx->i32);
*num_prolog_vgprs += shader->selector->info.num_inputs;
}
}
static void declare_tes_input_vgprs(struct si_shader_context *ctx,
- LLVMTypeRef *params, unsigned *num_params)
+ struct si_function_info *fninfo)
{
- params[ctx->param_tes_u = (*num_params)++] = ctx->f32;
- params[ctx->param_tes_v = (*num_params)++] = ctx->f32;
- params[ctx->param_tes_rel_patch_id = (*num_params)++] = ctx->i32;
- params[ctx->param_tes_patch_id = (*num_params)++] = ctx->i32;
+ ctx->param_tes_u = add_arg(fninfo, ARG_VGPR, ctx->f32);
+ ctx->param_tes_v = add_arg(fninfo, ARG_VGPR, ctx->f32);
+ ctx->param_tes_rel_patch_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_tes_patch_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
}
enum {
@@ -4166,15 +4393,17 @@
static void create_function(struct si_shader_context *ctx)
{
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_shader *shader = ctx->shader;
- LLVMTypeRef params[100]; /* just make it large enough */
+ struct si_function_info fninfo;
LLVMTypeRef returns[16+32*4];
- unsigned i, last_sgpr, num_params = 0, num_return_sgprs;
+ unsigned i, num_return_sgprs;
unsigned num_returns = 0;
unsigned num_prolog_vgprs = 0;
unsigned type = ctx->type;
+ unsigned vs_blit_property =
+ shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
+
+ si_init_function_info(&fninfo);
/* Set MERGED shaders. */
if (ctx->screen->b.chip_class >= GFX9) {
@@ -4188,88 +4417,114 @@
switch (type) {
case PIPE_SHADER_VERTEX:
- declare_default_desc_pointers(ctx, params, &num_params);
- declare_vs_specific_input_sgprs(ctx, params, &num_params);
+ declare_global_desc_pointers(ctx, &fninfo);
+
+ if (vs_blit_property) {
+ ctx->param_vs_blit_inputs = fninfo.num_params;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* depth */
+
+ if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color0 */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color1 */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color2 */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color3 */
+ } else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */
+ add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */
+ }
+
+ /* VGPRs */
+ declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs);
+ break;
+ }
+
+ declare_per_stage_desc_pointers(ctx, &fninfo, true);
+ declare_vs_specific_input_sgprs(ctx, &fninfo);
if (shader->key.as_es) {
- params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
+ assert(!shader->selector->nir);
+ ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
} else if (shader->key.as_ls) {
+ assert(!shader->selector->nir);
/* no extra parameters */
} else {
- if (shader->is_gs_copy_shader)
- num_params = ctx->param_rw_buffers + 1;
+ if (shader->is_gs_copy_shader) {
+ fninfo.num_params = ctx->param_rw_buffers + 1;
+ fninfo.num_sgpr_params = fninfo.num_params;
+ }
/* The locations of the other parameters are assigned dynamically. */
declare_streamout_params(ctx, &shader->selector->so,
- params, ctx->i32, &num_params);
+ &fninfo);
}
- last_sgpr = num_params-1;
-
/* VGPRs */
- declare_vs_input_vgprs(ctx, params, &num_params,
- &num_prolog_vgprs);
+ declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs);
break;
case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */
- declare_default_desc_pointers(ctx, params, &num_params);
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32;
- params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
- params[ctx->param_vs_state_bits = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
- last_sgpr = num_params - 1;
+ declare_global_desc_pointers(ctx, &fninfo);
+ declare_per_stage_desc_pointers(ctx, &fninfo, true);
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* VGPRs */
- params[ctx->param_tcs_patch_id = num_params++] = ctx->i32;
- params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32;
+ ctx->param_tcs_patch_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_tcs_rel_ids = add_arg(&fninfo, ARG_VGPR, ctx->i32);
/* param_tcs_offchip_offset and param_tcs_factor_offset are
* placed after the user SGPRs.
*/
for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
returns[num_returns++] = ctx->i32; /* SGPRs */
- for (i = 0; i < 3; i++)
+ for (i = 0; i < 11; i++)
returns[num_returns++] = ctx->f32; /* VGPRs */
break;
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
/* Merged stages have 8 system SGPRs at the beginning. */
- params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
- si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
- params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
-
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
- declare_per_stage_desc_pointers(ctx, params, &num_params,
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* SPI_SHADER_USER_DATA_ADDR_HI_HS */
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+
+ declare_global_desc_pointers(ctx, &fninfo);
+ declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_VERTEX);
- declare_vs_specific_input_sgprs(ctx, params, &num_params);
+ declare_vs_specific_input_sgprs(ctx, &fninfo);
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32;
- params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* unused */
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
- declare_per_stage_desc_pointers(ctx, params, &num_params,
+ declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_TESS_CTRL);
- last_sgpr = num_params - 1;
/* VGPRs (first TCS, then VS) */
- params[ctx->param_tcs_patch_id = num_params++] = ctx->i32;
- params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32;
+ ctx->param_tcs_patch_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_tcs_rel_ids = add_arg(&fninfo, ARG_VGPR, ctx->i32);
if (ctx->type == PIPE_SHADER_VERTEX) {
- declare_vs_input_vgprs(ctx, params, &num_params,
+ declare_vs_input_vgprs(ctx, &fninfo,
&num_prolog_vgprs);
/* LS return values are inputs to the TCS main shader part. */
@@ -4286,56 +4541,54 @@
*/
for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++)
returns[num_returns++] = ctx->i32; /* SGPRs */
- for (i = 0; i < 3; i++)
+ for (i = 0; i < 11; i++)
returns[num_returns++] = ctx->f32; /* VGPRs */
}
break;
case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
/* Merged stages have 8 system SGPRs at the beginning. */
- params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */
- si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
- params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
- params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */
- params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
-
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
- declare_per_stage_desc_pointers(ctx, params, &num_params,
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_LO_GS) */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_USER_DATA_ADDR_HI_GS) */
+ ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
+
+ declare_global_desc_pointers(ctx, &fninfo);
+ declare_per_stage_desc_pointers(ctx, &fninfo,
(ctx->type == PIPE_SHADER_VERTEX ||
ctx->type == PIPE_SHADER_TESS_EVAL));
if (ctx->type == PIPE_SHADER_VERTEX) {
- declare_vs_specific_input_sgprs(ctx, params, &num_params);
+ declare_vs_specific_input_sgprs(ctx, &fninfo);
} else {
/* TESS_EVAL (and also GEOMETRY):
* Declare as many input SGPRs as the VS has. */
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
- params[num_params++] = ctx->i32; /* unused */
- params[ctx->param_vs_state_bits = num_params++] = ctx->i32; /* unused */
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
}
- declare_per_stage_desc_pointers(ctx, params, &num_params,
+ declare_per_stage_desc_pointers(ctx, &fninfo,
ctx->type == PIPE_SHADER_GEOMETRY);
- last_sgpr = num_params - 1;
/* VGPRs (first GS, then VS/TES) */
- params[ctx->param_gs_vtx01_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx23_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_prim_id = num_params++] = ctx->i32;
- params[ctx->param_gs_instance_id = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx45_offset = num_params++] = ctx->i32;
+ ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_prim_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_instance_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx45_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
if (ctx->type == PIPE_SHADER_VERTEX) {
- declare_vs_input_vgprs(ctx, params, &num_params,
+ declare_vs_input_vgprs(ctx, &fninfo,
&num_prolog_vgprs);
} else if (ctx->type == PIPE_SHADER_TESS_EVAL) {
- declare_tes_input_vgprs(ctx, params, &num_params);
+ declare_tes_input_vgprs(ctx, &fninfo);
}
if (ctx->type == PIPE_SHADER_VERTEX ||
@@ -4349,76 +4602,83 @@
break;
case PIPE_SHADER_TESS_EVAL:
- declare_default_desc_pointers(ctx, params, &num_params);
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
+ declare_global_desc_pointers(ctx, &fninfo);
+ declare_per_stage_desc_pointers(ctx, &fninfo, true);
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
if (shader->key.as_es) {
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
} else {
- params[num_params++] = ctx->i32;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
declare_streamout_params(ctx, &shader->selector->so,
- params, ctx->i32, &num_params);
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
+ &fninfo);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
}
- last_sgpr = num_params - 1;
/* VGPRs */
- declare_tes_input_vgprs(ctx, params, &num_params);
+ declare_tes_input_vgprs(ctx, &fninfo);
break;
case PIPE_SHADER_GEOMETRY:
- declare_default_desc_pointers(ctx, params, &num_params);
- params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_wave_id = num_params++] = ctx->i32;
- last_sgpr = num_params - 1;
+ declare_global_desc_pointers(ctx, &fninfo);
+ declare_per_stage_desc_pointers(ctx, &fninfo, true);
+ ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_gs_wave_id = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* VGPRs */
- params[ctx->param_gs_vtx0_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx1_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_prim_id = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx2_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx3_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx4_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_vtx5_offset = num_params++] = ctx->i32;
- params[ctx->param_gs_instance_id = num_params++] = ctx->i32;
+ ctx->param_gs_vtx0_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx1_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_prim_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx2_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx3_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx4_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_vtx5_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ ctx->param_gs_instance_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
break;
case PIPE_SHADER_FRAGMENT:
- declare_default_desc_pointers(ctx, params, &num_params);
- params[SI_PARAM_ALPHA_REF] = ctx->f32;
- params[SI_PARAM_PRIM_MASK] = ctx->i32;
- last_sgpr = SI_PARAM_PRIM_MASK;
- params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32;
- params[SI_PARAM_PERSP_CENTER] = ctx->v2i32;
- params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32;
- params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
- params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
- params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
- params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
- params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
- params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
- params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
- params[SI_PARAM_POS_Z_FLOAT] = ctx->f32;
- params[SI_PARAM_POS_W_FLOAT] = ctx->f32;
- params[SI_PARAM_FRONT_FACE] = ctx->i32;
+ declare_global_desc_pointers(ctx, &fninfo);
+ declare_per_stage_desc_pointers(ctx, &fninfo, true);
+ add_arg_checked(&fninfo, ARG_SGPR, ctx->f32, SI_PARAM_ALPHA_REF);
+ add_arg_checked(&fninfo, ARG_SGPR, ctx->i32, SI_PARAM_PRIM_MASK);
+
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_SAMPLE);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_CENTER);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_PERSP_CENTROID);
+ add_arg_checked(&fninfo, ARG_VGPR, v3i32, SI_PARAM_PERSP_PULL_MODEL);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_SAMPLE);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_CENTER);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->v2i32, SI_PARAM_LINEAR_CENTROID);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->f32, SI_PARAM_LINE_STIPPLE_TEX);
+ add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->f32,
+ &ctx->abi.frag_pos[0], SI_PARAM_POS_X_FLOAT);
+ add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->f32,
+ &ctx->abi.frag_pos[1], SI_PARAM_POS_Y_FLOAT);
+ add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->f32,
+ &ctx->abi.frag_pos[2], SI_PARAM_POS_Z_FLOAT);
+ add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->f32,
+ &ctx->abi.frag_pos[3], SI_PARAM_POS_W_FLOAT);
+ add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->i32,
+ &ctx->abi.front_face, SI_PARAM_FRONT_FACE);
shader->info.face_vgpr_index = 20;
- params[SI_PARAM_ANCILLARY] = ctx->i32;
+ add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->i32,
+ &ctx->abi.ancillary, SI_PARAM_ANCILLARY);
shader->info.ancillary_vgpr_index = 21;
- params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
- params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
- num_params = SI_PARAM_POS_FIXED_PT+1;
+ add_arg_assign_checked(&fninfo, ARG_VGPR, ctx->f32,
+ &ctx->abi.sample_coverage, SI_PARAM_SAMPLE_COVERAGE);
+ add_arg_checked(&fninfo, ARG_VGPR, ctx->i32, SI_PARAM_POS_FIXED_PT);
/* Color inputs from the prolog. */
if (shader->selector->info.colors_read) {
unsigned num_color_elements =
util_bitcount(shader->selector->info.colors_read);
- assert(num_params + num_color_elements <= ARRAY_SIZE(params));
+ assert(fninfo.num_params + num_color_elements <= ARRAY_SIZE(fninfo.types));
for (i = 0; i < num_color_elements; i++)
- params[num_params++] = ctx->f32;
+ add_arg(&fninfo, ARG_VGPR, ctx->f32);
num_prolog_vgprs += num_color_elements;
}
@@ -4444,30 +4704,27 @@
break;
case PIPE_SHADER_COMPUTE:
- declare_default_desc_pointers(ctx, params, &num_params);
+ declare_global_desc_pointers(ctx, &fninfo);
+ declare_per_stage_desc_pointers(ctx, &fninfo, true);
if (shader->selector->info.uses_grid_size)
- params[ctx->param_grid_size = num_params++] = v3i32;
+ ctx->param_grid_size = add_arg(&fninfo, ARG_SGPR, v3i32);
if (shader->selector->info.uses_block_size)
- params[ctx->param_block_size = num_params++] = v3i32;
+ ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, v3i32);
for (i = 0; i < 3; i++) {
ctx->param_block_id[i] = -1;
if (shader->selector->info.uses_block_id[i])
- params[ctx->param_block_id[i] = num_params++] = ctx->i32;
+ ctx->param_block_id[i] = add_arg(&fninfo, ARG_SGPR, ctx->i32);
}
- last_sgpr = num_params - 1;
- params[ctx->param_thread_id = num_params++] = v3i32;
+ ctx->param_thread_id = add_arg(&fninfo, ARG_VGPR, v3i32);
break;
default:
assert(0 && "unimplemented shader");
return;
}
- assert(num_params <= ARRAY_SIZE(params));
-
- si_create_function(ctx, "main", returns, num_returns, params,
- num_params, last_sgpr,
+ si_create_function(ctx, "main", returns, num_returns, &fninfo,
si_get_max_workgroup_size(shader));
/* Reserve register locations for VGPR inputs the PS prolog may need. */
@@ -4489,11 +4746,11 @@
shader->info.num_input_sgprs = 0;
shader->info.num_input_vgprs = 0;
- for (i = 0; i <= last_sgpr; ++i)
- shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
+ for (i = 0; i < fninfo.num_sgpr_params; ++i)
+ shader->info.num_input_sgprs += ac_get_type_size(fninfo.types[i]) / 4;
- for (; i < num_params; ++i)
- shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+ for (; i < fninfo.num_params; ++i)
+ shader->info.num_input_vgprs += ac_get_type_size(fninfo.types[i]) / 4;
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
shader->info.num_input_vgprs -= num_prolog_vgprs;
@@ -4513,8 +4770,7 @@
*/
static void preload_ring_buffers(struct si_shader_context *ctx)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
ctx->param_rw_buffers);
@@ -4527,20 +4783,20 @@
LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0);
ctx->esgs_ring =
- ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
+ ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
}
if (ctx->shader->is_gs_copy_shader) {
LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
ctx->gsvs_ring[0] =
- ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
+ ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
} else if (ctx->type == PIPE_SHADER_GEOMETRY) {
const struct si_shader_selector *sel = ctx->shader->selector;
LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
LLVMValueRef base_ring;
- base_ring = ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
+ base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
/* The conceptual layout of the GSVS ring is
* v0c0 .. vLv0 v0c1 .. vLc1 ..
@@ -4611,8 +4867,7 @@
LLVMValueRef param_rw_buffers,
unsigned param_pos_fixed_pt)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef slot, desc, offset, row, bit, address[2];
/* Use the fixed-point gl_FragCoord input.
@@ -4624,13 +4879,13 @@
/* Load the buffer descriptor. */
slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0);
- desc = ac_build_indexed_load_const(&ctx->ac, param_rw_buffers, slot);
+ desc = ac_build_load_to_sgpr(&ctx->ac, param_rw_buffers, slot);
/* The stipple pattern is 32x32, each row has 32 bits. */
offset = LLVMBuildMul(builder, address[1],
LLVMConstInt(ctx->i32, 4, 0), "");
row = buffer_load_const(ctx, desc, offset);
- row = LLVMBuildBitCast(builder, row, ctx->i32, "");
+ row = ac_to_integer(&ctx->ac, row);
bit = LLVMBuildLShr(builder, row, address[0], "");
bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
@@ -4885,7 +5140,18 @@
unsigned code_size = si_get_shader_binary_size(shader);
unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
unsigned lds_per_wave = 0;
- unsigned max_simd_waves = 10;
+ unsigned max_simd_waves;
+
+ switch (sscreen->b.family) {
+ /* These always have 8 waves: */
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ max_simd_waves = 8;
+ break;
+ default:
+ max_simd_waves = 10;
+ }
/* Compute LDS usage for PS. */
switch (processor) {
@@ -4930,7 +5196,7 @@
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
if (!check_debug_option ||
- r600_can_dump_shader(&sscreen->b, processor)) {
+ si_can_dump_shader(&sscreen->b, processor)) {
if (processor == PIPE_SHADER_FRAGMENT) {
fprintf(file, "*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
@@ -5002,18 +5268,25 @@
FILE *file, bool check_debug_option)
{
if (!check_debug_option ||
- r600_can_dump_shader(&sscreen->b, processor))
+ si_can_dump_shader(&sscreen->b, processor))
si_dump_shader_key(processor, shader, file);
if (!check_debug_option && shader->binary.llvm_ir_string) {
+ if (shader->previous_stage &&
+ shader->previous_stage->binary.llvm_ir_string) {
+ fprintf(file, "\n%s - previous stage - LLVM IR:\n\n",
+ si_get_shader_name(shader, processor));
+ fprintf(file, "%s\n", shader->previous_stage->binary.llvm_ir_string);
+ }
+
fprintf(file, "\n%s - main shader part - LLVM IR:\n\n",
si_get_shader_name(shader, processor));
fprintf(file, "%s\n", shader->binary.llvm_ir_string);
}
if (!check_debug_option ||
- (r600_can_dump_shader(&sscreen->b, processor) &&
- !(sscreen->b.debug_flags & DBG_NO_ASM))) {
+ (si_can_dump_shader(&sscreen->b, processor) &&
+ !(sscreen->b.debug_flags & DBG(NO_ASM)))) {
fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
if (shader->prolog)
@@ -5050,10 +5323,10 @@
int r = 0;
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
- if (r600_can_dump_shader(&sscreen->b, processor)) {
+ if (si_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
- if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
+ if (!(sscreen->b.debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
fprintf(stderr, "%s LLVM IR:\n\n", name);
ac_dump_module(mod);
fprintf(stderr, "\n");
@@ -5111,9 +5384,9 @@
static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
{
if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
- LLVMBuildRetVoid(ctx->gallivm.builder);
+ LLVMBuildRetVoid(ctx->ac.builder);
else
- LLVMBuildRet(ctx->gallivm.builder, ret);
+ LLVMBuildRet(ctx->ac.builder, ret);
}
/* Generate code for the hardware VS shader stage to go with a geometry shader */
@@ -5125,7 +5398,6 @@
{
struct si_shader_context ctx;
struct si_shader *shader;
- struct gallivm_state *gallivm = &ctx.gallivm;
LLVMBuilderRef builder;
struct lp_build_tgsi_context *bld_base = &ctx.bld_base;
struct lp_build_context *uint = &bld_base->uint_bld;
@@ -5152,14 +5424,13 @@
ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX;
- builder = gallivm->builder;
+ builder = ctx.ac.builder;
create_function(&ctx);
preload_ring_buffers(&ctx);
LLVMValueRef voffset =
- lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn,
- ctx.param_vertex_id), 4);
+ lp_build_mul_imm(uint, ctx.abi.vertex_id, 4);
/* Fetch the vertex stream ID.*/
LLVMValueRef stream_id;
@@ -5183,7 +5454,7 @@
LLVMBasicBlockRef end_bb;
LLVMValueRef switch_inst;
- end_bb = LLVMAppendBasicBlockInContext(gallivm->context, ctx.main_fn, "end");
+ end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end");
switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);
for (int stream = 0; stream < 4; stream++) {
@@ -5196,7 +5467,7 @@
if (stream > 0 && !gs_selector->so.num_outputs)
continue;
- bb = LLVMInsertBasicBlockInContext(gallivm->context, end_bb, "out");
+ bb = LLVMInsertBasicBlockInContext(ctx.ac.context, end_bb, "out");
LLVMAddCase(switch_inst, LLVMConstInt(ctx.i32, stream, 0), bb);
LLVMPositionBuilderAtEnd(builder, bb);
@@ -5238,7 +5509,7 @@
LLVMPositionBuilderAtEnd(builder, end_bb);
- LLVMBuildRetVoid(gallivm->builder);
+ LLVMBuildRetVoid(ctx.ac.builder);
ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */
si_llvm_optimize_module(&ctx);
@@ -5249,7 +5520,7 @@
debug, PIPE_SHADER_GEOMETRY,
"GS Copy Shader");
if (!r) {
- if (r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
+ if (si_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
fprintf(stderr, "GS Copy Shader:\n");
si_shader_dump(sscreen, ctx.shader, debug,
PIPE_SHADER_GEOMETRY, stderr, true);
@@ -5275,6 +5546,8 @@
prefix, prolog->instance_divisor_is_one);
fprintf(f, " %s.instance_divisor_is_fetched = %u\n",
prefix, prolog->instance_divisor_is_fetched);
+ fprintf(f, " %s.ls_vgpr_fix = %u\n",
+ prefix, prolog->ls_vgpr_fix);
fprintf(f, " mono.vs.fix_fetch = {");
for (int i = 0; i < SI_MAX_ATTRIBS; i++)
@@ -5438,7 +5711,7 @@
LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
/* No idea why LLVM aligns allocas to 4 elements. */
unsigned alignment = LLVMGetAlignment(inst);
- unsigned dw_size = align(llvm_get_type_size(type) / 4, alignment);
+ unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
ctx->shader->config.private_mem_vgprs += dw_size;
}
bb = LLVMGetNextBasicBlock(bb);
@@ -5448,7 +5721,7 @@
static void si_init_exec_full_mask(struct si_shader_context *ctx)
{
LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
- lp_build_intrinsic(ctx->gallivm.builder,
+ lp_build_intrinsic(ctx->ac.builder,
"llvm.amdgcn.init.exec", ctx->voidt,
&full_mask, 1, LP_FUNC_ATTR_CONVERGENT);
}
@@ -5460,11 +5733,19 @@
LLVMGetParam(ctx->main_fn, param),
LLVMConstInt(ctx->i32, bitoffset, 0),
};
- lp_build_intrinsic(ctx->gallivm.builder,
+ lp_build_intrinsic(ctx->ac.builder,
"llvm.amdgcn.init.exec.from.input",
ctx->voidt, args, 2, LP_FUNC_ATTR_CONVERGENT);
}
+static bool si_vs_needs_prolog(const struct si_shader_selector *sel,
+ const struct si_vs_prolog_bits *key)
+{
+ /* VGPR initialization fixup for Vega10 and Raven is always done in the
+ * VS prolog. */
+ return sel->vs_needs_prolog || key->ls_vgpr_fix;
+}
+
static bool si_compile_tgsi_main(struct si_shader_context *ctx,
bool is_monolithic)
{
@@ -5472,6 +5753,7 @@
struct si_shader_selector *sel = shader->selector;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
+ // TODO clean all this up!
switch (ctx->type) {
case PIPE_SHADER_VERTEX:
ctx->load_input = declare_input_vs;
@@ -5479,8 +5761,10 @@
bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
else if (shader->key.as_es)
bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- else
- bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+ else {
+ ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
+ }
break;
case PIPE_SHADER_TESS_CTRL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
@@ -5492,8 +5776,10 @@
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
if (shader->key.as_es)
bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- else
- bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+ else {
+ ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
+ }
break;
case PIPE_SHADER_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
@@ -5501,16 +5787,19 @@
break;
case PIPE_SHADER_FRAGMENT:
ctx->load_input = declare_input_fs;
- bld_base->emit_epilogue = si_llvm_return_fs_outputs;
+ ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_COMPUTE:
- ctx->declare_memory_region = declare_compute_memory;
break;
default:
assert(!"Unsupported shader type");
return false;
}
+ ctx->abi.load_ubo = load_ubo;
+ ctx->abi.load_ssbo = load_ssbo;
+
create_function(ctx);
preload_ring_buffers(ctx);
@@ -5532,7 +5821,7 @@
(shader->key.as_es || shader->key.as_ls) &&
(ctx->type == PIPE_SHADER_TESS_EVAL ||
(ctx->type == PIPE_SHADER_VERTEX &&
- !sel->vs_needs_prolog))) {
+ !si_vs_needs_prolog(sel, &shader->key.part.vs.prolog)))) {
si_init_exec_from_input(ctx,
ctx->param_merged_wave_info, 0);
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
@@ -5553,6 +5842,14 @@
}
}
+ if (ctx->type == PIPE_SHADER_TESS_CTRL &&
+ sel->tcs_info.tessfactors_are_def_in_all_invocs) {
+ for (unsigned i = 0; i < 6; i++) {
+ ctx->invoc0_tess_factors[i] =
+ lp_build_alloca_undef(&ctx->gallivm, ctx->i32, "");
+ }
+ }
+
if (ctx->type == PIPE_SHADER_GEOMETRY) {
int i;
for (i = 0; i < 4; i++) {
@@ -5563,14 +5860,21 @@
}
if (ctx->type == PIPE_SHADER_FRAGMENT && sel->info.uses_kill &&
- ctx->screen->b.debug_flags & DBG_FS_CORRECT_DERIVS_AFTER_KILL) {
+ ctx->screen->b.debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) {
/* This is initialized to 0.0 = not kill. */
ctx->postponed_kill = lp_build_alloca(&ctx->gallivm, ctx->f32, "");
}
- if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
- fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
- return false;
+ if (sel->tokens) {
+ if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
+ fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
+ return false;
+ }
+ } else {
+ if (!si_nir_build_llvm(ctx, sel->nir)) {
+ fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
+ return false;
+ }
}
si_llvm_build_ret(ctx, ctx->return_value);
@@ -5598,11 +5902,13 @@
key->vs_prolog.num_input_sgprs = num_input_sgprs;
key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
key->vs_prolog.as_ls = shader_out->key.as_ls;
+ key->vs_prolog.as_es = shader_out->key.as_es;
if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
key->vs_prolog.as_ls = 1;
key->vs_prolog.num_merged_next_stage_vgprs = 2;
} else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
+ key->vs_prolog.as_es = 1;
key->vs_prolog.num_merged_next_stage_vgprs = 5;
}
@@ -5775,12 +6081,13 @@
union si_shader_part_key *key)
{
unsigned num_sgprs, num_vgprs;
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
- LLVMTypeRef params[48]; /* 40 SGPRs (maximum) + some VGPRs */
+ struct si_function_info fninfo;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMTypeRef returns[48];
LLVMValueRef func, ret;
+ si_init_function_info(&fninfo);
+
if (ctx->screen->b.chip_class >= GFX9) {
num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR;
num_vgprs = 5; /* ES inputs are not needed by GS */
@@ -5790,18 +6097,18 @@
}
for (unsigned i = 0; i < num_sgprs; ++i) {
- params[i] = ctx->i32;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
returns[i] = ctx->i32;
}
for (unsigned i = 0; i < num_vgprs; ++i) {
- params[num_sgprs + i] = ctx->i32;
+ add_arg(&fninfo, ARG_VGPR, ctx->i32);
returns[num_sgprs + i] = ctx->f32;
}
/* Create the function. */
si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs,
- params, num_sgprs + num_vgprs, num_sgprs - 1, 0);
+ &fninfo, 0);
func = ctx->main_fn;
/* Set the full EXEC mask for the prolog, because we are only fiddling
@@ -5821,7 +6128,7 @@
}
for (unsigned i = 0; i < num_vgprs; i++) {
LLVMValueRef p = LLVMGetParam(func, num_sgprs + i);
- p = LLVMBuildBitCast(builder, p, ctx->f32, "");
+ p = ac_to_float(&ctx->ac, p);
ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, "");
}
@@ -5870,7 +6177,7 @@
hi = LLVMBuildShl(builder, vtx_out[i*2+1],
LLVMConstInt(ctx->i32, 16, 0), "");
out = LLVMBuildOr(builder, vtx_out[i*2], hi, "");
- out = LLVMBuildBitCast(builder, out, ctx->f32, "");
+ out = ac_to_float(&ctx->ac, out);
ret = LLVMBuildInsertValue(builder, ret, out,
gfx9_vtx_params[i], "");
}
@@ -5878,7 +6185,7 @@
for (unsigned i = 0; i < 6; i++) {
LLVMValueRef out;
- out = LLVMBuildBitCast(builder, vtx_out[i], ctx->f32, "");
+ out = ac_to_float(&ctx->ac, vtx_out[i]);
ret = LLVMBuildInsertValue(builder, ret, out,
gfx6_vtx_params[i], "");
}
@@ -5898,23 +6205,23 @@
unsigned main_part,
unsigned next_shader_first_part)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
/* PS epilog has one arg per color component; gfx9 merged shader
* prologs need to forward 32 user SGPRs.
*/
- LLVMTypeRef param_types[64];
+ struct si_function_info fninfo;
LLVMValueRef initial[64], out[64];
LLVMTypeRef function_type;
- unsigned num_params;
+ unsigned num_first_params;
unsigned num_out, initial_num_out;
MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
MAYBE_UNUSED unsigned initial_num_out_sgpr; /* used in debug checks */
unsigned num_sgprs, num_vgprs;
- unsigned last_sgpr_param;
unsigned gprs;
struct lp_build_if_state if_state;
+ si_init_function_info(&fninfo);
+
for (unsigned i = 0; i < num_parts; ++i) {
lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
@@ -5929,32 +6236,26 @@
num_vgprs = 0;
function_type = LLVMGetElementType(LLVMTypeOf(parts[0]));
- num_params = LLVMCountParamTypes(function_type);
+ num_first_params = LLVMCountParamTypes(function_type);
- for (unsigned i = 0; i < num_params; ++i) {
+ for (unsigned i = 0; i < num_first_params; ++i) {
LLVMValueRef param = LLVMGetParam(parts[0], i);
if (ac_is_sgpr_param(param)) {
assert(num_vgprs == 0);
- num_sgprs += llvm_get_type_size(LLVMTypeOf(param)) / 4;
+ num_sgprs += ac_get_type_size(LLVMTypeOf(param)) / 4;
} else {
- num_vgprs += llvm_get_type_size(LLVMTypeOf(param)) / 4;
+ num_vgprs += ac_get_type_size(LLVMTypeOf(param)) / 4;
}
}
- assert(num_vgprs + num_sgprs <= ARRAY_SIZE(param_types));
- num_params = 0;
- last_sgpr_param = 0;
gprs = 0;
while (gprs < num_sgprs + num_vgprs) {
- LLVMValueRef param = LLVMGetParam(parts[main_part], num_params);
- unsigned size;
+ LLVMValueRef param = LLVMGetParam(parts[main_part], fninfo.num_params);
+ LLVMTypeRef type = LLVMTypeOf(param);
+ unsigned size = ac_get_type_size(type) / 4;
- param_types[num_params] = LLVMTypeOf(param);
- if (gprs < num_sgprs)
- last_sgpr_param = num_params;
- size = llvm_get_type_size(param_types[num_params]) / 4;
- num_params++;
+ add_arg(&fninfo, gprs < num_sgprs ? ARG_SGPR : ARG_VGPR, type);
assert(ac_is_sgpr_param(param) == (gprs < num_sgprs));
assert(gprs + size <= num_sgprs + num_vgprs &&
@@ -5963,8 +6264,7 @@
gprs += size;
}
- si_create_function(ctx, "wrapper", NULL, 0, param_types, num_params,
- last_sgpr_param,
+ si_create_function(ctx, "wrapper", NULL, 0, &fninfo,
si_get_max_workgroup_size(ctx->shader));
if (is_merged_shader(ctx->shader))
@@ -5976,11 +6276,11 @@
num_out = 0;
num_out_sgpr = 0;
- for (unsigned i = 0; i < num_params; ++i) {
+ for (unsigned i = 0; i < fninfo.num_params; ++i) {
LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
LLVMTypeRef param_type = LLVMTypeOf(param);
- LLVMTypeRef out_type = i <= last_sgpr_param ? ctx->i32 : ctx->f32;
- unsigned size = llvm_get_type_size(param_type) / 4;
+ LLVMTypeRef out_type = i < fninfo.num_sgpr_params ? ctx->i32 : ctx->f32;
+ unsigned size = ac_get_type_size(param_type) / 4;
if (size == 1) {
if (param_type != out_type)
@@ -6002,7 +6302,7 @@
builder, param, LLVMConstInt(ctx->i32, j, 0), "");
}
- if (i <= last_sgpr_param)
+ if (i < fninfo.num_sgpr_params)
num_out_sgpr = num_out;
}
@@ -6016,9 +6316,7 @@
LLVMValueRef ret;
LLVMTypeRef ret_type;
unsigned out_idx = 0;
-
- num_params = LLVMCountParams(parts[part]);
- assert(num_params <= ARRAY_SIZE(param_types));
+ unsigned num_params = LLVMCountParams(parts[part]);
/* Merged shaders are executed conditionally depending
* on the number of enabled threads passed in the input SGPRs. */
@@ -6044,7 +6342,7 @@
param = LLVMGetParam(parts[part], param_idx);
param_type = LLVMTypeOf(param);
- param_size = llvm_get_type_size(param_type) / 4;
+ param_size = ac_get_type_size(param_type) / 4;
is_sgpr = ac_is_sgpr_param(param);
if (is_sgpr) {
@@ -6063,7 +6361,7 @@
if (param_size == 1)
arg = out[out_idx];
else
- arg = lp_build_gather_values(gallivm, &out[out_idx], param_size);
+ arg = lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size);
if (LLVMTypeOf(arg) != param_type) {
if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
@@ -6138,9 +6436,12 @@
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
- if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
- !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
- tgsi_dump(sel->tokens, 0);
+ if (si_can_dump_shader(&sscreen->b, sel->info.processor) &&
+ !(sscreen->b.debug_flags & DBG(NO_TGSI))) {
+ if (sel->tokens)
+ tgsi_dump(sel->tokens, 0);
+ else
+ nir_print_shader(sel->nir, stderr);
si_dump_streamout(&sel->so);
}
@@ -6153,8 +6454,6 @@
shader->info.uses_instanceid = sel->info.uses_instanceid;
- ctx.load_system_value = declare_system_value;
-
if (!si_compile_tgsi_main(&ctx, is_monolithic)) {
si_llvm_dispose(&ctx);
return -1;
@@ -6182,6 +6481,8 @@
if (sscreen->b.chip_class >= GFX9) {
struct si_shader_selector *ls = shader->key.part.tcs.ls;
LLVMValueRef parts[4];
+ bool vs_needs_prolog =
+ si_vs_needs_prolog(ls, &shader->key.part.tcs.ls_prolog);
/* TCS main part */
parts[2] = ctx.main_fn;
@@ -6194,7 +6495,7 @@
parts[3] = ctx.main_fn;
/* VS prolog */
- if (ls->vs_needs_prolog) {
+ if (vs_needs_prolog) {
union si_shader_part_key vs_prolog_key;
si_get_vs_prolog_key(&ls->info,
shader->info.num_input_sgprs,
@@ -6225,9 +6526,9 @@
ctx.type = PIPE_SHADER_TESS_CTRL;
si_build_wrapper_function(&ctx,
- parts + !ls->vs_needs_prolog,
- 4 - !ls->vs_needs_prolog, 0,
- ls->vs_needs_prolog ? 2 : 1);
+ parts + !vs_needs_prolog,
+ 4 - !vs_needs_prolog, 0,
+ vs_needs_prolog ? 2 : 1);
} else {
LLVMValueRef parts[2];
union si_shader_part_key epilog_key;
@@ -6344,7 +6645,7 @@
si_optimize_vs_outputs(&ctx);
if ((debug && debug->debug_message) ||
- r600_can_dump_shader(&sscreen->b, ctx.type))
+ si_can_dump_shader(&sscreen->b, ctx.type))
si_count_scratch_private_memory(&ctx);
/* Compile to bytecode. */
@@ -6481,7 +6782,6 @@
struct si_shader shader = {};
struct si_shader_context ctx;
- struct gallivm_state *gallivm = &ctx.gallivm;
si_init_shader_ctx(&ctx, sscreen, tm);
ctx.shader = &shader;
@@ -6489,6 +6789,8 @@
switch (type) {
case PIPE_SHADER_VERTEX:
+ shader.key.as_ls = key->vs_prolog.as_ls;
+ shader.key.as_es = key->vs_prolog.as_es;
break;
case PIPE_SHADER_TESS_CTRL:
assert(!prolog);
@@ -6513,7 +6815,7 @@
si_llvm_optimize_module(&ctx);
if (si_compile_llvm(sscreen, &result->binary, &result->config, tm,
- gallivm->module, debug, ctx.type, name)) {
+ ctx.ac.module, debug, ctx.type, name)) {
FREE(result);
result = NULL;
goto out;
@@ -6530,15 +6832,19 @@
static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef ptr[2], list;
+ bool is_merged_shader =
+ ctx->screen->b.chip_class >= GFX9 &&
+ (ctx->type == PIPE_SHADER_TESS_CTRL ||
+ ctx->type == PIPE_SHADER_GEOMETRY ||
+ ctx->shader->key.as_ls || ctx->shader->key.as_es);
/* Get the pointer to rw buffers. */
- ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
- ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
- list = lp_build_gather_values(gallivm, ptr, 2);
- list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
- list = LLVMBuildIntToPtr(gallivm->builder, list,
+ ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
+ ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI);
+ list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
+ list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
+ list = LLVMBuildIntToPtr(ctx->ac.builder, list,
si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
return list;
}
@@ -6562,38 +6868,33 @@
static void si_build_vs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMTypeRef *params, *returns;
+ struct si_function_info fninfo;
+ LLVMTypeRef *returns;
LLVMValueRef ret, func;
- int last_sgpr, num_params, num_returns, i;
- unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
- key->vs_prolog.num_merged_next_stage_vgprs;
+ int num_returns, i;
+ unsigned first_vs_vgpr = key->vs_prolog.num_merged_next_stage_vgprs;
unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
+ LLVMValueRef input_vgprs[9];
unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
num_input_vgprs;
unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
- ctx->param_vertex_id = first_vs_vgpr;
- ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
+ si_init_function_info(&fninfo);
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
- params = alloca(num_all_input_regs * sizeof(LLVMTypeRef));
returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
sizeof(LLVMTypeRef));
- num_params = 0;
num_returns = 0;
/* Declare input and output SGPRs. */
- num_params = 0;
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
- params[num_params++] = ctx->i32;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
returns[num_returns++] = ctx->i32;
}
- last_sgpr = num_params - 1;
/* Preloaded VGPRs (outputs must be floats) */
for (i = 0; i < num_input_vgprs; i++) {
- params[num_params++] = ctx->i32;
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &input_vgprs[i]);
returns[num_returns++] = ctx->f32;
}
@@ -6602,13 +6903,36 @@
returns[num_returns++] = ctx->f32;
/* Create the function. */
- si_create_function(ctx, "vs_prolog", returns, num_returns, params,
- num_params, last_sgpr, 0);
+ si_create_function(ctx, "vs_prolog", returns, num_returns, &fninfo, 0);
func = ctx->main_fn;
- if (key->vs_prolog.num_merged_next_stage_vgprs &&
- !key->vs_prolog.is_monolithic)
- si_init_exec_from_input(ctx, 3, 0);
+ if (key->vs_prolog.num_merged_next_stage_vgprs) {
+ if (!key->vs_prolog.is_monolithic)
+ si_init_exec_from_input(ctx, 3, 0);
+
+ if (key->vs_prolog.as_ls &&
+ (ctx->screen->b.family == CHIP_VEGA10 ||
+ ctx->screen->b.family == CHIP_RAVEN)) {
+ /* If there are no HS threads, SPI loads the LS VGPRs
+ * starting at VGPR 0. Shift them back to where they
+ * belong.
+ */
+ LLVMValueRef has_hs_threads =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
+ unpack_param(ctx, 3, 8, 8),
+ ctx->i32_0, "");
+
+ for (i = 4; i > 0; --i) {
+ input_vgprs[i + 1] =
+ LLVMBuildSelect(ctx->ac.builder, has_hs_threads,
+ input_vgprs[i + 1],
+ input_vgprs[i - 1], "");
+ }
+ }
+ }
+
+ ctx->abi.vertex_id = input_vgprs[first_vs_vgpr];
+ ctx->abi.instance_id = input_vgprs[first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1)];
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
@@ -6616,12 +6940,13 @@
ret = ctx->return_value;
for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
- ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
}
- for (; i < num_params; i++) {
- LLVMValueRef p = LLVMGetParam(func, i);
- p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
- ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ for (i = 0; i < num_input_vgprs; i++) {
+ LLVMValueRef p = input_vgprs[i];
+ p = ac_to_float(&ctx->ac, p);
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p,
+ key->vs_prolog.num_input_sgprs + i, "");
}
/* Compute vertex load indices from instance divisors. */
@@ -6632,7 +6957,7 @@
LLVMValueRef buf_index =
LLVMConstInt(ctx->i32, SI_VS_CONST_INSTANCE_DIVISORS, 0);
instance_divisor_constbuf =
- ac_build_indexed_load_const(&ctx->ac, list, buf_index);
+ ac_build_load_to_sgpr(&ctx->ac, list, buf_index);
}
for (i = 0; i <= key->vs_prolog.last_input; i++) {
@@ -6648,8 +6973,7 @@
if (divisor_is_fetched) {
divisor = buffer_load_const(ctx, instance_divisor_constbuf,
LLVMConstInt(ctx->i32, i * 4, 0));
- divisor = LLVMBuildBitCast(gallivm->builder, divisor,
- ctx->i32, "");
+ divisor = ac_to_integer(&ctx->ac, divisor);
}
/* InstanceID / Divisor + StartInstance */
@@ -6659,15 +6983,15 @@
divisor);
} else {
/* VertexID + BaseVertex */
- index = LLVMBuildAdd(gallivm->builder,
- LLVMGetParam(func, ctx->param_vertex_id),
+ index = LLVMBuildAdd(ctx->ac.builder,
+ ctx->abi.vertex_id,
LLVMGetParam(func, user_sgpr_base +
SI_SGPR_BASE_VERTEX), "");
}
- index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
- ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
- num_params++, "");
+ index = ac_to_float(&ctx->ac, index);
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index,
+ fninfo.num_params + i, "");
}
si_llvm_build_ret(ctx, ret);
@@ -6682,8 +7006,7 @@
{
struct si_shader_selector *vs = main_part->selector;
- /* The prolog is a no-op if there are no inputs. */
- if (!vs->vs_needs_prolog)
+ if (!si_vs_needs_prolog(vs, key))
return true;
/* Get the prolog. */
@@ -6718,64 +7041,76 @@
static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
- LLVMTypeRef params[32];
+ struct si_function_info fninfo;
LLVMValueRef func;
- int last_sgpr, num_params = 0;
+
+ si_init_function_info(&fninfo);
if (ctx->screen->b.chip_class >= GFX9) {
- params[num_params++] = ctx->i64;
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32; /* wave info */
- params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */
+ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
} else {
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
- params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
- params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
- params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
- }
- last_sgpr = num_params - 1;
-
- params[num_params++] = ctx->i32; /* patch index within the wave (REL_PATCH_ID) */
- params[num_params++] = ctx->i32; /* invocation ID within the patch */
- params[num_params++] = ctx->i32; /* LDS offset where tess factors should be loaded from */
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+ }
+
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* VGPR gap */
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* VGPR gap */
+ unsigned tess_factors_idx =
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* patch index within the wave (REL_PATCH_ID) */
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* invocation ID within the patch */
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* LDS offset where tess factors should be loaded from */
+
+ for (unsigned i = 0; i < 6; i++)
+ add_arg(&fninfo, ARG_VGPR, ctx->i32); /* tess factors */
/* Create the function. */
- si_create_function(ctx, "tcs_epilog", NULL, 0, params, num_params, last_sgpr,
+ si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo,
ctx->screen->b.chip_class >= CIK ? 128 : 64);
declare_lds_as_pointer(ctx);
func = ctx->main_fn;
+ LLVMValueRef invoc0_tess_factors[6];
+ for (unsigned i = 0; i < 6; i++)
+ invoc0_tess_factors[i] = LLVMGetParam(func, tess_factors_idx + 3 + i);
+
si_write_tess_factors(bld_base,
- LLVMGetParam(func, last_sgpr + 1),
- LLVMGetParam(func, last_sgpr + 2),
- LLVMGetParam(func, last_sgpr + 3));
+ LLVMGetParam(func, tess_factors_idx),
+ LLVMGetParam(func, tess_factors_idx + 1),
+ LLVMGetParam(func, tess_factors_idx + 2),
+ invoc0_tess_factors, invoc0_tess_factors + 4);
- LLVMBuildRetVoid(gallivm->builder);
+ LLVMBuildRetVoid(ctx->ac.builder);
}
/**
@@ -6858,45 +7193,39 @@
static void si_build_ps_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMTypeRef *params;
+ struct si_function_info fninfo;
LLVMValueRef ret, func;
- int last_sgpr, num_params, num_returns, i, num_color_channels;
+ int num_returns, i, num_color_channels;
assert(si_need_ps_prolog(key));
- /* Number of inputs + 8 color elements. */
- params = alloca((key->ps_prolog.num_input_sgprs +
- key->ps_prolog.num_input_vgprs + 8) *
- sizeof(LLVMTypeRef));
+ si_init_function_info(&fninfo);
/* Declare inputs. */
- num_params = 0;
for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
- params[num_params++] = ctx->i32;
- last_sgpr = num_params - 1;
+ add_arg(&fninfo, ARG_SGPR, ctx->i32);
for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
- params[num_params++] = ctx->f32;
+ add_arg(&fninfo, ARG_VGPR, ctx->f32);
/* Declare outputs (same as inputs + add colors if needed) */
- num_returns = num_params;
+ num_returns = fninfo.num_params;
num_color_channels = util_bitcount(key->ps_prolog.colors_read);
for (i = 0; i < num_color_channels; i++)
- params[num_returns++] = ctx->f32;
+ fninfo.types[num_returns++] = ctx->f32;
/* Create the function. */
- si_create_function(ctx, "ps_prolog", params, num_returns, params,
- num_params, last_sgpr, 0);
+ si_create_function(ctx, "ps_prolog", fninfo.types, num_returns,
+ &fninfo, 0);
func = ctx->main_fn;
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
ret = ctx->return_value;
- for (i = 0; i < num_params; i++) {
+ for (i = 0; i < fninfo.num_params; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
- ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
}
/* Polygon stippling. */
@@ -6921,9 +7250,9 @@
* PRIM_MASK is after user SGPRs.
*/
bc_optimize = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
- bc_optimize = LLVMBuildLShr(gallivm->builder, bc_optimize,
+ bc_optimize = LLVMBuildLShr(ctx->ac.builder, bc_optimize,
LLVMConstInt(ctx->i32, 31, 0), "");
- bc_optimize = LLVMBuildTrunc(gallivm->builder, bc_optimize,
+ bc_optimize = LLVMBuildTrunc(ctx->ac.builder, bc_optimize,
ctx->i1, "");
if (key->ps_prolog.states.bc_optimize_for_persp) {
@@ -6935,9 +7264,9 @@
centroid[i] = LLVMGetParam(func, base + 4 + i);
/* Select PERSP_CENTROID. */
for (i = 0; i < 2; i++) {
- tmp = LLVMBuildSelect(gallivm->builder, bc_optimize,
+ tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize,
center[i], centroid[i], "");
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
tmp, base + 4 + i, "");
}
}
@@ -6950,9 +7279,9 @@
centroid[i] = LLVMGetParam(func, base + 10 + i);
/* Select LINEAR_CENTROID. */
for (i = 0; i < 2; i++) {
- tmp = LLVMBuildSelect(gallivm->builder, bc_optimize,
+ tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize,
center[i], centroid[i], "");
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
tmp, base + 10 + i, "");
}
}
@@ -6968,11 +7297,11 @@
persp_sample[i] = LLVMGetParam(func, base + i);
/* Overwrite PERSP_CENTER. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
persp_sample[i], base + 2 + i, "");
/* Overwrite PERSP_CENTROID. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
persp_sample[i], base + 4 + i, "");
}
if (key->ps_prolog.states.force_linear_sample_interp) {
@@ -6984,11 +7313,11 @@
linear_sample[i] = LLVMGetParam(func, base + 6 + i);
/* Overwrite LINEAR_CENTER. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
linear_sample[i], base + 8 + i, "");
/* Overwrite LINEAR_CENTROID. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
linear_sample[i], base + 10 + i, "");
}
@@ -7002,11 +7331,11 @@
persp_center[i] = LLVMGetParam(func, base + 2 + i);
/* Overwrite PERSP_SAMPLE. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
persp_center[i], base + i, "");
/* Overwrite PERSP_CENTROID. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
persp_center[i], base + 4 + i, "");
}
if (key->ps_prolog.states.force_linear_center_interp) {
@@ -7018,15 +7347,16 @@
linear_center[i] = LLVMGetParam(func, base + 8 + i);
/* Overwrite LINEAR_SAMPLE. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
linear_center[i], base + 6 + i, "");
/* Overwrite LINEAR_CENTROID. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(gallivm->builder, ret,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
linear_center[i], base + 10 + i, "");
}
/* Interpolate colors. */
+ unsigned color_out_idx = 0;
for (i = 0; i < 2; i++) {
unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
@@ -7043,11 +7373,11 @@
key->ps_prolog.color_interp_vgpr_index[i];
/* Get the (i,j) updated by bc_optimize handling. */
- interp[0] = LLVMBuildExtractValue(gallivm->builder, ret,
+ interp[0] = LLVMBuildExtractValue(ctx->ac.builder, ret,
interp_vgpr, "");
- interp[1] = LLVMBuildExtractValue(gallivm->builder, ret,
+ interp[1] = LLVMBuildExtractValue(ctx->ac.builder, ret,
interp_vgpr + 1, "");
- interp_ij = lp_build_gather_values(gallivm, interp, 2);
+ interp_ij = lp_build_gather_values(&ctx->gallivm, interp, 2);
}
/* Use the absolute location of the input. */
@@ -7055,7 +7385,7 @@
if (key->ps_prolog.states.color_two_side) {
face = LLVMGetParam(func, face_vgpr);
- face = LLVMBuildBitCast(gallivm->builder, face, ctx->i32, "");
+ face = ac_to_integer(&ctx->ac, face);
}
interp_fs_input(ctx,
@@ -7067,8 +7397,8 @@
while (writemask) {
unsigned chan = u_bit_scan(&writemask);
- ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
- num_params++, "");
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret, color[chan],
+ fninfo.num_params + color_out_idx++, "");
}
}
@@ -7106,17 +7436,17 @@
LLVMValueRef sampleid = unpack_param(ctx, ancillary_vgpr, 8, 4);
LLVMValueRef samplemask = LLVMGetParam(func, ancillary_vgpr + 1);
- samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->i32, "");
+ samplemask = ac_to_integer(&ctx->ac, samplemask);
samplemask = LLVMBuildAnd(
- gallivm->builder,
+ ctx->ac.builder,
samplemask,
- LLVMBuildShl(gallivm->builder,
+ LLVMBuildShl(ctx->ac.builder,
LLVMConstInt(ctx->i32, ps_iter_mask, false),
sampleid, ""),
"");
- samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->f32, "");
+ samplemask = ac_to_float(&ctx->ac, samplemask);
- ret = LLVMBuildInsertValue(gallivm->builder, ret, samplemask,
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret, samplemask,
ancillary_vgpr + 1, "");
}
@@ -7136,45 +7466,43 @@
static void si_build_ps_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
- LLVMTypeRef params[16+8*4+3];
+ struct si_function_info fninfo;
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
- int last_sgpr, num_params = 0, i;
+ int i;
struct si_ps_exports exp = {};
+ si_init_function_info(&fninfo);
+
/* Declare input SGPRs. */
- params[ctx->param_rw_buffers = num_params++] = ctx->i64;
- params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
- params[ctx->param_samplers_and_images = num_params++] = ctx->i64;
- assert(num_params == SI_PARAM_ALPHA_REF);
- params[SI_PARAM_ALPHA_REF] = ctx->f32;
- last_sgpr = SI_PARAM_ALPHA_REF;
+ ctx->param_rw_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_bindless_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_const_and_shader_buffers = add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ ctx->param_samplers_and_images = add_arg(&fninfo, ARG_SGPR, ctx->i64);
+ add_arg_checked(&fninfo, ARG_SGPR, ctx->f32, SI_PARAM_ALPHA_REF);
/* Declare input VGPRs. */
- num_params = (last_sgpr + 1) +
+ unsigned required_num_params =
+ fninfo.num_sgpr_params +
util_bitcount(key->ps_epilog.colors_written) * 4 +
key->ps_epilog.writes_z +
key->ps_epilog.writes_stencil +
key->ps_epilog.writes_samplemask;
- num_params = MAX2(num_params,
- last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
-
- assert(num_params <= ARRAY_SIZE(params));
+ required_num_params = MAX2(required_num_params,
+ fninfo.num_sgpr_params + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
- for (i = last_sgpr + 1; i < num_params; i++)
- params[i] = ctx->f32;
+ while (fninfo.num_params < required_num_params)
+ add_arg(&fninfo, ARG_VGPR, ctx->f32);
/* Create the function. */
- si_create_function(ctx, "ps_epilog", NULL, 0, params, num_params,
- last_sgpr, 0);
+ si_create_function(ctx, "ps_epilog", NULL, 0, &fninfo, 0);
/* Disable elimination of unused inputs. */
si_llvm_add_attribute(ctx->main_fn,
"InitialPSInputAddr", 0xffffff);
/* Process colors. */
- unsigned vgpr = last_sgpr + 1;
+ unsigned vgpr = fninfo.num_sgpr_params;
unsigned colors_written = key->ps_epilog.colors_written;
int last_color_export = -1;
@@ -7206,7 +7534,7 @@
color[i] = LLVMGetParam(ctx->main_fn, vgpr++);
si_export_mrt_color(bld_base, color, mrt,
- num_params - 1,
+ fninfo.num_params - 1,
mrt == last_color_export, &exp);
}
@@ -7227,7 +7555,7 @@
si_emit_ps_exports(ctx, &exp);
/* Compile. */
- LLVMBuildRetVoid(gallivm->builder);
+ LLVMBuildRetVoid(ctx->ac.builder);
}
/**
@@ -7382,7 +7710,7 @@
if (r)
return r;
} else {
- /* The shader consists of 2-3 parts:
+ /* The shader consists of several parts:
*
* - the middle part is the user shader, it has 1 variant only
* and it was compiled during the creation of the shader
@@ -7391,8 +7719,15 @@
* - the epilog part is inserted at the end
*
* The prolog and epilog have many (but simple) variants.
+ *
+ * Starting with gfx9, geometry and tessellation control
+ * shaders also contain the prolog and user shader parts of
+ * the previous shader stage.
*/
+ if (!mainp)
+ return -1;
+
/* Copy the compiled TGSI shader data over. */
shader->is_binary_shared = true;
shader->binary = mainp->binary;
@@ -7499,7 +7834,7 @@
r600_resource_reference(&shader->bo, NULL);
if (!shader->is_binary_shared)
- radeon_shader_binary_clean(&shader->binary);
+ si_radeon_shader_binary_clean(&shader->binary);
free(shader->shader_log);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader.h mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader.h
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader.h 2018-01-18 21:30:28.000000000 +0000
@@ -143,6 +143,8 @@
#include "ac_binary.h"
#include "si_state.h"
+struct nir_shader;
+
#define SI_MAX_VS_OUTPUTS 40
/* Shader IO unique indices are supported for TGSI_SEMANTIC_GENERIC with an
@@ -152,12 +154,11 @@
/* SGPR user data indices */
enum {
- /* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
- * and these two are used for other purposes.
- */
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
SI_SGPR_RW_BUFFERS_HI,
- SI_SGPR_CONST_AND_SHADER_BUFFERS,
+ SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
+ SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI,
+ SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
SI_SGPR_SAMPLERS_AND_IMAGES,
SI_SGPR_SAMPLERS_AND_IMAGES_HI,
@@ -172,6 +173,8 @@
SI_SGPR_VS_STATE_BITS,
SI_VS_NUM_USER_SGPR,
+ SI_SGPR_VS_BLIT_DATA = SI_SGPR_CONST_AND_SHADER_BUFFERS,
+
/* TES */
SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
SI_SGPR_TES_OFFCHIP_ADDR_BASE64K,
@@ -217,7 +220,7 @@
/* LLVM function parameter indices */
enum {
- SI_NUM_RESOURCE_PARAMS = 3,
+ SI_NUM_RESOURCE_PARAMS = 4,
/* PS only parameters */
SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
@@ -259,6 +262,16 @@
TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
};
+enum {
+ /* Use a property enum that VS wouldn't use. */
+ TGSI_PROPERTY_VS_BLIT_SGPRS = TGSI_PROPERTY_FS_COORD_ORIGIN,
+
+ /* These represent the number of SGPRs the shader uses. */
+ SI_VS_BLIT_SGPRS_POS = 3,
+ SI_VS_BLIT_SGPRS_POS_COLOR = 7,
+ SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9,
+};
+
/* For VS shader key fix_fetch. */
enum {
SI_FIX_FETCH_NONE = 0,
@@ -320,8 +333,10 @@
struct si_shader *gs_copy_shader;
struct tgsi_token *tokens;
+ struct nir_shader *nir;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
+ struct tgsi_tessctrl_info tcs_info;
/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
unsigned type;
@@ -393,11 +408,13 @@
*/
uint16_t instance_divisor_is_one; /* bitmask of inputs */
uint16_t instance_divisor_is_fetched; /* bitmask of inputs */
+ unsigned ls_vgpr_fix:1;
};
/* Common TCS bits between the shader key and the epilog key. */
struct si_tcs_epilog_bits {
unsigned prim_mode:3;
+ unsigned invoc0_tess_factors_are_def:1;
unsigned tes_reads_tess_factors:1;
};
@@ -439,6 +456,7 @@
unsigned num_merged_next_stage_vgprs:3;
unsigned last_input:4;
unsigned as_ls:1;
+ unsigned as_es:1;
/* Prologs for monolithic shaders shouldn't set EXEC. */
unsigned is_monolithic:1;
} vs_prolog;
@@ -509,6 +527,9 @@
uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
/* When PS needs PrimID and GS is disabled. */
unsigned vs_export_prim_id:1;
+ struct {
+ unsigned interpolate_at_sample_force_center:1;
+ } ps;
} u;
} mono;
@@ -632,6 +653,11 @@
bool writes_samplemask);
const char *si_get_shader_name(const struct si_shader *shader, unsigned processor);
+/* si_shader_nir.c */
+void si_nir_scan_shader(const struct nir_shader *nir,
+ struct tgsi_shader_info *info);
+void si_lower_nir(struct si_shader_selector *sel);
+
/* Inline helpers. */
/* Return the pointer to the main shader part's pointer. */
@@ -658,4 +684,18 @@
return selector ? selector->info.uses_bindless_images : false;
}
+void si_destroy_shader_selector(struct si_context *sctx,
+ struct si_shader_selector *sel);
+
+static inline void
+si_shader_selector_reference(struct si_context *sctx,
+ struct si_shader_selector **dst,
+ struct si_shader_selector *src)
+{
+ if (pipe_reference(&(*dst)->reference, &src->reference))
+ si_destroy_shader_selector(sctx, *dst);
+
+ *dst = src;
+}
+
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_internal.h mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_internal.h
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_internal.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_internal.h 2018-01-18 21:30:28.000000000 +0000
@@ -29,6 +29,7 @@
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_tgsi.h"
#include "tgsi/tgsi_parse.h"
+#include "ac_shader_abi.h"
#include "ac_llvm_util.h"
#include "ac_llvm_build.h"
@@ -67,6 +68,8 @@
/* Whether the prolog will be compiled separately. */
bool separate_prolog;
+ struct ac_shader_abi abi;
+
/** This function is responsible for initilizing the inputs array and will be
* called once for each input declared in the TGSI shader.
*/
@@ -75,13 +78,6 @@
const struct tgsi_full_declaration *decl,
LLVMValueRef out[4]);
- void (*load_system_value)(struct si_shader_context *,
- unsigned index,
- const struct tgsi_full_declaration *decl);
-
- void (*declare_memory_region)(struct si_shader_context *,
- const struct tgsi_full_declaration *decl);
-
/** This array contains the input values for the shader. Typically these
* values will be in the form of a target intrinsic that will inform the
* backend how to load the actual inputs to the shader.
@@ -120,18 +116,14 @@
int param_rw_buffers;
int param_const_and_shader_buffers;
int param_samplers_and_images;
+ int param_bindless_samplers_and_images;
/* Common inputs for merged shaders. */
int param_merged_wave_info;
int param_merged_scratch_offset;
/* API VS */
int param_vertex_buffers;
- int param_base_vertex;
- int param_start_instance;
- int param_draw_id;
- int param_vertex_id;
int param_rel_auto_id;
int param_vs_prim_id;
- int param_instance_id;
int param_vertex_index0;
/* VS states and layout of LS outputs / TCS inputs at the end
* [0] = clamp vertex color
@@ -142,6 +134,7 @@
* max = 32*4
*/
int param_vs_state_bits;
+ int param_vs_blit_inputs;
/* HW VS */
int param_streamout_config;
int param_streamout_write_index;
@@ -169,8 +162,6 @@
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
* max = 32*32*4 + 32*4
- * [13:20] = stride between output vertices in DW = num_inputs * 4
- * max = 32*4
* [26:31] = gl_PatchVerticesIn, max = 32
*/
int param_tcs_out_lds_layout;
@@ -219,6 +210,7 @@
LLVMValueRef gsvs_ring[4];
LLVMValueRef lds;
+ LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
LLVMValueRef gs_next_vertex[4];
LLVMValueRef postponed_kill;
LLVMValueRef return_value;
@@ -247,6 +239,13 @@
return (struct si_shader_context*)bld_base;
}
+static inline struct si_shader_context *
+si_shader_context_from_abi(struct ac_shader_abi *abi)
+{
+ struct si_shader_context *ctx = NULL;
+ return container_of(abi, ctx, abi);
+}
+
void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value);
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
@@ -291,6 +290,7 @@
void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
const struct tgsi_opcode_info *info,
+ unsigned index,
LLVMValueRef dst[4]);
/* Combine these with & instead of |. */
@@ -300,6 +300,9 @@
void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16);
+LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
+ const struct tgsi_ind_register *ind,
+ unsigned addr_mul, int rel_index);
LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
const struct tgsi_ind_register *ind,
int rel_index, unsigned num);
@@ -309,4 +312,28 @@
void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base);
void si_shader_context_init_mem(struct si_shader_context *ctx);
+LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum ac_descriptor_type type);
+LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum ac_descriptor_type desc_type, bool dcc_off);
+
+void si_load_system_value(struct si_shader_context *ctx,
+ unsigned index,
+ const struct tgsi_full_declaration *decl);
+void si_declare_compute_memory(struct si_shader_context *ctx,
+ const struct tgsi_full_declaration *decl);
+
+void si_llvm_load_input_vs(
+ struct si_shader_context *ctx,
+ unsigned input_index,
+ LLVMValueRef out[4]);
+void si_llvm_load_input_fs(
+ struct si_shader_context *ctx,
+ unsigned input_index,
+ LLVMValueRef out[4]);
+
+bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
+
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_nir.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_nir.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_nir.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_nir.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,508 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_shader.h"
+#include "si_shader_internal.h"
+
+#include "ac_nir_to_llvm.h"
+
+#include "tgsi/tgsi_from_mesa.h"
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir_types.h"
+
+
+static int
+type_size(const struct glsl_type *type)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+static void scan_instruction(struct tgsi_shader_info *info,
+ nir_instr *instr)
+{
+ if (instr->type == nir_instr_type_alu) {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ switch (alu->op) {
+ case nir_op_fddx:
+ case nir_op_fddy:
+ case nir_op_fddx_fine:
+ case nir_op_fddy_fine:
+ case nir_op_fddx_coarse:
+ case nir_op_fddy_coarse:
+ info->uses_derivatives = true;
+ break;
+ default:
+ break;
+ }
+ } else if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ switch (tex->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_lod:
+ info->uses_derivatives = true;
+ break;
+ default:
+ break;
+ }
+ } else if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_front_face:
+ info->uses_frontface = 1;
+ break;
+ case nir_intrinsic_load_instance_id:
+ info->uses_instanceid = 1;
+ break;
+ case nir_intrinsic_load_vertex_id:
+ info->uses_vertexid = 1;
+ break;
+ case nir_intrinsic_load_vertex_id_zero_base:
+ info->uses_vertexid_nobase = 1;
+ break;
+ case nir_intrinsic_load_base_vertex:
+ info->uses_basevertex = 1;
+ break;
+ case nir_intrinsic_load_primitive_id:
+ info->uses_primid = 1;
+ break;
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_store_ssbo:
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ info->writes_memory = true;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void si_nir_scan_shader(const struct nir_shader *nir,
+ struct tgsi_shader_info *info)
+{
+ nir_function *func;
+ unsigned i;
+
+ assert(nir->info.stage == MESA_SHADER_VERTEX ||
+ nir->info.stage == MESA_SHADER_FRAGMENT);
+
+ info->processor = pipe_shader_type_from_mesa(nir->info.stage);
+ info->num_tokens = 2; /* indicate that the shader is non-empty */
+ info->num_instructions = 2;
+
+ info->num_inputs = nir->num_inputs;
+ info->num_outputs = nir->num_outputs;
+
+ i = 0;
+ nir_foreach_variable(variable, &nir->inputs) {
+ unsigned semantic_name, semantic_index;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type,
+ nir->info.stage == MESA_SHADER_VERTEX);
+
+ assert(attrib_count == 1 && "not implemented");
+
+ /* Vertex shader inputs don't have semantics. The state
+ * tracker has already mapped them to attributes via
+ * variable->data.driver_location.
+ */
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ continue;
+
+ /* Fragment shader position is a system value. */
+ if (nir->info.stage == MESA_SHADER_FRAGMENT &&
+ variable->data.location == VARYING_SLOT_POS) {
+ if (variable->data.pixel_center_integer)
+ info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] =
+ TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
+ continue;
+ }
+
+ tgsi_get_gl_varying_semantic(variable->data.location, true,
+ &semantic_name, &semantic_index);
+
+ info->input_semantic_name[i] = semantic_name;
+ info->input_semantic_index[i] = semantic_index;
+
+ if (variable->data.sample)
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE;
+ else if (variable->data.centroid)
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
+ else
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
+
+ enum glsl_base_type base_type =
+ glsl_get_base_type(glsl_without_array(variable->type));
+
+ switch (variable->data.interpolation) {
+ case INTERP_MODE_NONE:
+ if (glsl_base_type_is_integer(base_type)) {
+ info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ }
+
+ if (semantic_name == TGSI_SEMANTIC_COLOR) {
+ info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
+ goto persp_locations;
+ }
+ /* fall-through */
+ case INTERP_MODE_SMOOTH:
+ assert(!glsl_base_type_is_integer(base_type));
+
+ info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
+
+ persp_locations:
+ if (variable->data.sample)
+ info->uses_persp_sample = true;
+ else if (variable->data.centroid)
+ info->uses_persp_centroid = true;
+ else
+ info->uses_persp_center = true;
+ break;
+
+ case INTERP_MODE_NOPERSPECTIVE:
+ assert(!glsl_base_type_is_integer(base_type));
+
+ info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
+
+ if (variable->data.sample)
+ info->uses_linear_sample = true;
+ else if (variable->data.centroid)
+ info->uses_linear_centroid = true;
+ else
+ info->uses_linear_center = true;
+ break;
+
+ case INTERP_MODE_FLAT:
+ info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ }
+
+ /* TODO make this more precise */
+ if (variable->data.location == VARYING_SLOT_COL0)
+ info->colors_read |= 0x0f;
+ else if (variable->data.location == VARYING_SLOT_COL1)
+ info->colors_read |= 0xf0;
+
+ i++;
+ }
+
+ i = 0;
+ nir_foreach_variable(variable, &nir->outputs) {
+ unsigned semantic_name, semantic_index;
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ tgsi_get_gl_frag_result_semantic(variable->data.location,
+ &semantic_name, &semantic_index);
+ } else {
+ tgsi_get_gl_varying_semantic(variable->data.location, true,
+ &semantic_name, &semantic_index);
+ }
+
+ info->output_semantic_name[i] = semantic_name;
+ info->output_semantic_index[i] = semantic_index;
+ info->output_usagemask[i] = TGSI_WRITEMASK_XYZW;
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_PRIMID:
+ info->writes_primid = true;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ info->writes_viewport_index = true;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ info->writes_layer = true;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ info->writes_psize = true;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ info->writes_clipvertex = true;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ info->colors_written |= 1 << semantic_index;
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ info->writes_stencil = true;
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->writes_samplemask = true;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ info->writes_edgeflag = true;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ if (info->processor == PIPE_SHADER_FRAGMENT)
+ info->writes_z = true;
+ else
+ info->writes_position = true;
+ break;
+ }
+
+ i++;
+ }
+
+ nir_foreach_variable(variable, &nir->uniforms) {
+ const struct glsl_type *type = variable->type;
+ enum glsl_base_type base_type =
+ glsl_get_base_type(glsl_without_array(type));
+ unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
+
+ /* We rely on the fact that nir_lower_samplers_as_deref has
+ * eliminated struct dereferences.
+ */
+ if (base_type == GLSL_TYPE_SAMPLER)
+ info->samplers_declared |=
+ u_bit_consecutive(variable->data.binding, aoa_size);
+ else if (base_type == GLSL_TYPE_IMAGE)
+ info->images_declared |=
+ u_bit_consecutive(variable->data.binding, aoa_size);
+ }
+
+ info->num_written_clipdistance = nir->info.clip_distance_array_size;
+ info->num_written_culldistance = nir->info.cull_distance_array_size;
+ info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
+ info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);
+
+ if (info->processor == PIPE_SHADER_FRAGMENT)
+ info->uses_kill = nir->info.fs.uses_discard;
+
+ /* TODO make this more accurate */
+ info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
+ info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
+
+ func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr(instr, block)
+ scan_instruction(info, instr);
+ }
+}
+
+/**
+ * Perform "lowering" operations on the NIR that are run once when the shader
+ * selector is created.
+ */
+void
+si_lower_nir(struct si_shader_selector* sel)
+{
+ /* Adjust the driver location of inputs and outputs. The state tracker
+ * interprets them as slots, while the ac/nir backend interprets them
+ * as individual components.
+ */
+ nir_foreach_variable(variable, &sel->nir->inputs)
+ variable->data.driver_location *= 4;
+
+ nir_foreach_variable(variable, &sel->nir->outputs) {
+ variable->data.driver_location *= 4;
+
+ if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
+ if (variable->data.location == FRAG_RESULT_DEPTH)
+ variable->data.driver_location += 2;
+ else if (variable->data.location == FRAG_RESULT_STENCIL)
+ variable->data.driver_location += 1;
+ }
+ }
+
+ /* Perform lowerings (and optimizations) of code.
+ *
+ * Performance considerations aside, we must:
+ * - lower certain ALU operations
+ * - ensure constant offsets for texture instructions are folded
+ * and copy-propagated
+ */
+ NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, type_size,
+ (nir_lower_io_options)0);
+ NIR_PASS_V(sel->nir, nir_lower_uniforms_to_ubo);
+
+ NIR_PASS_V(sel->nir, nir_lower_returns);
+ NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
+ NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar);
+ NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+
+ static const struct nir_lower_tex_options lower_tex_options = {
+ .lower_txp = ~0u,
+ };
+ NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
+
+ bool progress;
+ do {
+ progress = false;
+
+ /* (Constant) copy propagation is needed for txf with offsets. */
+ NIR_PASS(progress, sel->nir, nir_copy_prop);
+ NIR_PASS(progress, sel->nir, nir_opt_remove_phis);
+ NIR_PASS(progress, sel->nir, nir_opt_dce);
+ if (nir_opt_trivial_continues(sel->nir)) {
+ progress = true;
+ NIR_PASS(progress, sel->nir, nir_copy_prop);
+ NIR_PASS(progress, sel->nir, nir_opt_dce);
+ }
+ NIR_PASS(progress, sel->nir, nir_opt_if);
+ NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
+ NIR_PASS(progress, sel->nir, nir_opt_cse);
+ NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8);
+
+ /* Needed for algebraic lowering */
+ NIR_PASS(progress, sel->nir, nir_opt_algebraic);
+ NIR_PASS(progress, sel->nir, nir_opt_constant_folding);
+
+ NIR_PASS(progress, sel->nir, nir_opt_undef);
+ NIR_PASS(progress, sel->nir, nir_opt_conditional_discard);
+ if (sel->nir->options->max_unroll_iterations) {
+ NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0);
+ }
+ } while (progress);
+}
+
+static void declare_nir_input_vs(struct si_shader_context *ctx,
+ struct nir_variable *variable, unsigned rel,
+ LLVMValueRef out[4])
+{
+ si_llvm_load_input_vs(ctx, variable->data.driver_location / 4 + rel, out);
+}
+
+static void declare_nir_input_fs(struct si_shader_context *ctx,
+ struct nir_variable *variable, unsigned rel,
+ unsigned *fs_attr_idx,
+ LLVMValueRef out[4])
+{
+ unsigned slot = variable->data.location + rel;
+
+ assert(variable->data.location >= VARYING_SLOT_VAR0 || rel == 0);
+
+ if (slot == VARYING_SLOT_POS) {
+ out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
+ out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
+ out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
+ out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
+ LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
+ return;
+ }
+
+ si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
+ (*fs_attr_idx)++;
+}
+
+static LLVMValueRef
+si_nir_load_sampler_desc(struct ac_shader_abi *abi,
+ unsigned descriptor_set, unsigned base_index,
+ unsigned constant_index, LLVMValueRef dynamic_index,
+ enum ac_descriptor_type desc_type, bool image,
+ bool write)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
+ LLVMValueRef index = dynamic_index;
+
+ assert(!descriptor_set);
+
+ if (!index)
+ index = ctx->ac.i32_0;
+
+ index = LLVMBuildAdd(builder, index,
+ LLVMConstInt(ctx->ac.i32, base_index + constant_index, false),
+ "");
+
+ if (image) {
+ assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER);
+ assert(base_index + constant_index < ctx->num_images);
+
+ if (dynamic_index)
+ index = si_llvm_bound_index(ctx, index, ctx->num_images);
+
+ index = LLVMBuildSub(ctx->gallivm.builder,
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
+ index, "");
+
+ /* TODO: be smarter about when we use dcc_off */
+ return si_load_image_desc(ctx, list, index, desc_type, write);
+ }
+
+ assert(base_index + constant_index < ctx->num_samplers);
+
+ if (dynamic_index)
+ index = si_llvm_bound_index(ctx, index, ctx->num_samplers);
+
+ index = LLVMBuildAdd(ctx->gallivm.builder, index,
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
+
+ return si_load_sampler_desc(ctx, list, index, desc_type);
+}
+
+bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
+{
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+
+ unsigned fs_attr_idx = 0;
+ nir_foreach_variable(variable, &nir->inputs) {
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type,
+ nir->info.stage == MESA_SHADER_VERTEX);
+ unsigned input_idx = variable->data.driver_location;
+
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ LLVMValueRef data[4];
+
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ declare_nir_input_vs(ctx, variable, i, data);
+ else if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ declare_nir_input_fs(ctx, variable, i, &fs_attr_idx, data);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ ctx->inputs[input_idx + chan] =
+ LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, "");
+ }
+ }
+ }
+
+ ctx->abi.inputs = &ctx->inputs[0];
+ ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
+ ctx->abi.clamp_shadow_reference = true;
+
+ ctx->num_samplers = util_last_bit(info->samplers_declared);
+ ctx->num_images = util_last_bit(info->images_declared);
+
+ ac_nir_translate(&ctx->ac, &ctx->abi, nir, NULL);
+
+ return true;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 2018-01-18 21:30:28.000000000 +0000
@@ -32,15 +32,15 @@
struct lp_build_emit_data *emit_data)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMBuilderRef builder = ctx->ac.builder;
unsigned i;
LLVMValueRef conds[TGSI_NUM_CHANNELS];
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
- bld_base->base.zero, "");
+ ctx->ac.f32_0, "");
}
/* Or the conditions together */
@@ -48,11 +48,11 @@
conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
}
- emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
+ emit_data->dst_type = ctx->voidt;
emit_data->arg_count = 1;
emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
- lp_build_const_float(gallivm, -1.0f),
- bld_base->base.zero, "");
+ LLVMConstReal(ctx->f32, -1.0f),
+ ctx->ac.f32_0, "");
}
static void kil_emit(const struct lp_build_tgsi_action *action,
@@ -60,7 +60,7 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
if (ctx->postponed_kill) {
if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
@@ -93,8 +93,7 @@
struct lp_build_emit_data *emit_data)
{
unsigned pred;
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMContextRef context = bld_base->base.gallivm->context;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
switch (emit_data->inst->Instruction.Opcode) {
case TGSI_OPCODE_USEQ:
@@ -115,11 +114,10 @@
break;
}
- LLVMValueRef v = LLVMBuildICmp(builder, pred,
+ LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, pred,
emit_data->args[0], emit_data->args[1],"");
- v = LLVMBuildSExtOrBitCast(builder, v,
- LLVMInt32TypeInContext(context), "");
+ v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
emit_data->output[emit_data->chan] = v;
}
@@ -128,37 +126,35 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-
- LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
- bld_base->uint_bld.elem_type, "");
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMValueRef arg0 = ac_to_integer(&ctx->ac, emit_data->args[0]);
- LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
- bld_base->uint_bld.zero, "");
+ LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, arg0,
+ ctx->i32_0, "");
emit_data->output[emit_data->chan] =
- LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
+ LLVMBuildSelect(ctx->ac.builder, v, emit_data->args[1], emit_data->args[2], "");
}
static void emit_cmp(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef cond, *args = emit_data->args;
- cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
- bld_base->base.zero, "");
+ cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealOLT, args[0],
+ ctx->ac.f32_0, "");
emit_data->output[emit_data->chan] =
- LLVMBuildSelect(builder, cond, args[1], args[2], "");
+ LLVMBuildSelect(ctx->ac.builder, cond, args[1], args[2], "");
}
static void emit_set_cond(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMRealPredicate pred;
LLVMValueRef cond;
@@ -175,19 +171,18 @@
default: assert(!"unknown instruction"); pred = 0; break;
}
- cond = LLVMBuildFCmp(builder,
+ cond = LLVMBuildFCmp(ctx->ac.builder,
pred, emit_data->args[0], emit_data->args[1], "");
- emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
- cond, bld_base->base.one, bld_base->base.zero, "");
+ emit_data->output[emit_data->chan] = LLVMBuildSelect(ctx->ac.builder,
+ cond, ctx->ac.f32_1, ctx->ac.f32_0, "");
}
static void emit_fcmp(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMContextRef context = bld_base->base.gallivm->context;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMRealPredicate pred;
/* Use ordered for everything but NE (which is usual for
@@ -201,11 +196,10 @@
default: assert(!"unknown instruction"); pred = 0; break;
}
- LLVMValueRef v = LLVMBuildFCmp(builder, pred,
+ LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
emit_data->args[0], emit_data->args[1],"");
- v = LLVMBuildSExtOrBitCast(builder, v,
- LLVMInt32TypeInContext(context), "");
+ v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
emit_data->output[emit_data->chan] = v;
}
@@ -214,8 +208,7 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMContextRef context = bld_base->base.gallivm->context;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMRealPredicate pred;
/* Use ordered for everything but NE (which is usual for
@@ -229,11 +222,10 @@
default: assert(!"unknown instruction"); pred = 0; break;
}
- LLVMValueRef v = LLVMBuildFCmp(builder, pred,
+ LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
emit_data->args[0], emit_data->args[1],"");
- v = LLVMBuildSExtOrBitCast(builder, v,
- LLVMInt32TypeInContext(context), "");
+ v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
emit_data->output[emit_data->chan] = v;
}
@@ -242,28 +234,27 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
- emit_data->args[0]);
- emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMValueRef v = ac_to_integer(&ctx->ac, emit_data->args[0]);
+ emit_data->output[emit_data->chan] = LLVMBuildNot(ctx->ac.builder, v, "");
}
static void emit_arl(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
- emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
- floor_index, bld_base->base.int_elem_type , "");
+ emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
+ floor_index, ctx->i32, "");
}
static void emit_and(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildAnd(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -271,8 +262,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildOr(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -280,8 +271,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildAdd(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -289,8 +280,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildUDiv(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -298,8 +289,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildSDiv(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -307,8 +298,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildSRem(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -316,8 +307,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildURem(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -325,8 +316,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildShl(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -334,16 +325,16 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildLShr(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
static void emit_ishr(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildAShr(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -351,8 +342,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildXor(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
}
@@ -360,7 +351,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef cmp, val;
@@ -368,17 +360,22 @@
cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
- val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
+ val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(ctx->i64, -1, true), "");
} else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
- cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
- val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
- cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
- val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
+ cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], ctx->i32_0, "");
+ val = LLVMBuildSelect(builder, cmp, ctx->i32_1, emit_data->args[0], "");
+ cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, ctx->i32_0, "");
+ val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
+ } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_DSSG) {
+ cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->dbl_bld.zero, "");
+ val = LLVMBuildSelect(builder, cmp, bld_base->dbl_bld.one, emit_data->args[0], "");
+ cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->dbl_bld.zero, "");
+ val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->dbl_bld.elem_type, -1), "");
} else { // float SSG
- cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
- val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
- cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
- val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
+ cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], ctx->ac.f32_0, "");
+ val = LLVMBuildSelect(builder, cmp, ctx->ac.f32_1, emit_data->args[0], "");
+ cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, ctx->ac.f32_0, "");
+ val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(ctx->f32, -1), "");
}
emit_data->output[emit_data->chan] = val;
@@ -388,8 +385,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildNeg(ctx->ac.builder,
emit_data->args[0], "");
}
@@ -397,8 +394,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildFNeg(ctx->ac.builder,
emit_data->args[0], "");
}
@@ -406,7 +403,7 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
char *intr;
if (emit_data->info->opcode == TGSI_OPCODE_FRC)
@@ -418,10 +415,10 @@
return;
}
- LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
+ LLVMValueRef floor = lp_build_intrinsic(ctx->ac.builder, intr, emit_data->dst_type,
&emit_data->args[0], 1,
LP_FUNC_ATTR_READNONE);
- emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
+ emit_data->output[emit_data->chan] = LLVMBuildFSub(ctx->ac.builder,
emit_data->args[0], floor, "");
}
@@ -429,36 +426,36 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
- emit_data->args[0], bld_base->int_bld.elem_type, "");
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
+ emit_data->args[0], ctx->i32, "");
}
static void emit_f2u(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
- emit_data->args[0], bld_base->uint_bld.elem_type, "");
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildFPToUI(ctx->ac.builder,
+ emit_data->args[0], ctx->i32, "");
}
static void emit_i2f(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
- emit_data->args[0], bld_base->base.elem_type, "");
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildSIToFP(ctx->ac.builder,
+ emit_data->args[0], ctx->f32, "");
}
static void emit_u2f(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
- emit_data->args[0], bld_base->base.elem_type, "");
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_data->output[emit_data->chan] = LLVMBuildUIToFP(ctx->ac.builder,
+ emit_data->args[0], ctx->f32, "");
}
static void
@@ -466,9 +463,9 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- struct lp_build_context *base = &bld_base->base;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
emit_data->output[emit_data->chan] =
- lp_build_intrinsic(base->gallivm->builder, action->intr_name,
+ lp_build_intrinsic(ctx->ac.builder, action->intr_name,
emit_data->dst_type, emit_data->args,
emit_data->arg_count, LP_FUNC_ATTR_READNONE);
}
@@ -477,8 +474,8 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef bfi_args[3];
LLVMValueRef bfi_sm5;
LLVMValueRef cond;
@@ -487,9 +484,9 @@
bfi_args[0] = LLVMBuildShl(builder,
LLVMBuildSub(builder,
LLVMBuildShl(builder,
- bld_base->int_bld.one,
+ ctx->i32_1,
emit_data->args[3], ""),
- bld_base->int_bld.one, ""),
+ ctx->i32_1, ""),
emit_data->args[2], "");
bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
@@ -512,7 +509,7 @@
* and disagrees with GLSL semantics when bits (src3) is 32.
*/
cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
- lp_build_const_int32(gallivm, 32), "");
+ LLVMConstInt(ctx->i32, 32, 0), "");
emit_data->output[emit_data->chan] =
LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
}
@@ -522,8 +519,6 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef bfe_sm5;
LLVMValueRef cond;
@@ -532,10 +527,10 @@
emit_data->info->opcode == TGSI_OPCODE_IBFE);
/* Correct for GLSL semantics. */
- cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
+ cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
LLVMConstInt(ctx->i32, 32, 0), "");
emit_data->output[emit_data->chan] =
- LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
+ LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
}
/* this is ffs in C */
@@ -543,8 +538,7 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef args[2] = {
emit_data->args[0],
@@ -555,21 +549,21 @@
* in [0, 31], but GLSL expects that ffs(0) = -1, so
* a conditional assignment to handle 0 is still required.
*/
- LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
+ LLVMConstInt(ctx->i1, 1, 0)
};
LLVMValueRef lsb =
- lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
+ lp_build_intrinsic(ctx->ac.builder, "llvm.cttz.i32",
emit_data->dst_type, args, ARRAY_SIZE(args),
LP_FUNC_ATTR_READNONE);
/* TODO: We need an intrinsic to skip this conditional. */
/* Check for zero: */
emit_data->output[emit_data->chan] =
- LLVMBuildSelect(builder,
- LLVMBuildICmp(builder, LLVMIntEQ, args[0],
- bld_base->uint_bld.zero, ""),
- lp_build_const_int32(gallivm, -1), lsb, "");
+ LLVMBuildSelect(ctx->ac.builder,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, args[0],
+ ctx->i32_0, ""),
+ LLVMConstInt(ctx->i32, -1, 0), lsb, "");
}
/* Find the last bit set. */
@@ -598,12 +592,12 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
emit_data->output[emit_data->chan] =
lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
emit_data->args[0],
- LLVMBuildNeg(builder,
+ LLVMBuildNeg(ctx->ac.builder,
emit_data->args[0], ""));
}
@@ -611,7 +605,7 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMIntPredicate op;
switch (emit_data->info->opcode) {
@@ -636,8 +630,8 @@
}
emit_data->output[emit_data->chan] =
- LLVMBuildSelect(builder,
- LLVMBuildICmp(builder, op, emit_data->args[0],
+ LLVMBuildSelect(ctx->ac.builder,
+ LLVMBuildICmp(ctx->ac.builder, op, emit_data->args[0],
emit_data->args[1], ""),
emit_data->args[0],
emit_data->args[1], "");
@@ -656,27 +650,14 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMContextRef context = bld_base->base.gallivm->context;
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMTypeRef fp16, i16;
- LLVMValueRef const16, comp[2];
- unsigned i;
-
- fp16 = LLVMHalfTypeInContext(context);
- i16 = LLVMInt16TypeInContext(context);
- const16 = lp_build_const_int32(uint_bld->gallivm, 16);
-
- for (i = 0; i < 2; i++) {
- comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
- comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
- comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
- }
-
- comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
- comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
-
- emit_data->output[emit_data->chan] = comp[0];
+ /* From the GLSL 4.50 spec:
+ * "The rounding mode cannot be set and is undefined."
+ *
+ * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
+ */
+ emit_data->output[emit_data->chan] =
+ ac_build_cvt_pkrtz_f16(&si_shader_context(bld_base)->ac,
+ emit_data->args);
}
static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
@@ -690,24 +671,20 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMContextRef context = bld_base->base.gallivm->context;
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMTypeRef fp16, i16;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ LLVMTypeRef i16;
LLVMValueRef const16, input, val;
unsigned i;
- fp16 = LLVMHalfTypeInContext(context);
- i16 = LLVMInt16TypeInContext(context);
- const16 = lp_build_const_int32(uint_bld->gallivm, 16);
+ i16 = LLVMInt16TypeInContext(ctx->ac.context);
+ const16 = LLVMConstInt(ctx->i32, 16, 0);
input = emit_data->args[0];
for (i = 0; i < 2; i++) {
- val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
- val = LLVMBuildTrunc(builder, val, i16, "");
- val = LLVMBuildBitCast(builder, val, fp16, "");
- emit_data->output[i] =
- LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
+ val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input;
+ val = LLVMBuildTrunc(ctx->ac.builder, val, i16, "");
+ val = ac_to_float(&ctx->ac, val);
+ emit_data->output[i] = LLVMBuildFPExt(ctx->ac.builder, val, ctx->f32, "");
}
}
@@ -718,7 +695,7 @@
struct si_shader_context *ctx = si_shader_context(bld_base);
emit_data->output[emit_data->chan] =
- LLVMBuildFDiv(ctx->gallivm.builder,
+ LLVMBuildFDiv(ctx->ac.builder,
emit_data->args[0], emit_data->args[1], "");
/* Use v_rcp_f32 instead of precise division. */
@@ -733,13 +710,36 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
LLVMValueRef sqrt =
lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
emit_data->args[0]);
emit_data->output[emit_data->chan] =
lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
- bld_base->base.one, sqrt);
+ ctx->ac.f32_1, sqrt);
+}
+
+static void dfracexp_fetch_args(struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
+ emit_data->arg_count = 1;
+}
+
+static void dfracexp_emit(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ emit_data->output[emit_data->chan] =
+ lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.frexp.mant.f64",
+ ctx->ac.f64, &emit_data->args[0], 1, 0);
+ emit_data->output1[emit_data->chan] =
+ lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.frexp.exp.i32.f64",
+ ctx->ac.i32, &emit_data->args[0], 1, 0);
}
void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
@@ -758,19 +758,32 @@
bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
+ bld_base->op_actions[TGSI_OPCODE_DCEIL].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DCEIL].intr_name = "llvm.ceil.f64";
+ bld_base->op_actions[TGSI_OPCODE_DFLR].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DFLR].intr_name = "llvm.floor.f64";
bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
+ bld_base->op_actions[TGSI_OPCODE_DROUND].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DROUND].intr_name = "llvm.rint.f64";
bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DSSG].emit = emit_ssg;
bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64";
bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
+ bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64";
+ bld_base->op_actions[TGSI_OPCODE_DFRACEXP].fetch_args = dfracexp_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit;
+ bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64";
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
@@ -799,6 +812,8 @@
bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit;
+ bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_LDEXP].intr_name = "llvm.amdgcn.ldexp.f32";
bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 2018-01-18 21:30:28.000000000 +0000
@@ -37,13 +37,6 @@
static const struct lp_build_tgsi_action tex_action;
-enum desc_type {
- DESC_IMAGE,
- DESC_BUFFER,
- DESC_FMASK,
- DESC_SAMPLER,
-};
-
/**
* Given a v8i32 resource descriptor for a buffer, extract the size of the
* buffer in number of elements and return it as an i32.
@@ -53,8 +46,7 @@
LLVMValueRef descriptor)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef size =
LLVMBuildExtractElement(builder, descriptor,
LLVMConstInt(ctx->i32, 2, 0), "");
@@ -80,36 +72,22 @@
static LLVMValueRef
shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
- const struct tgsi_full_src_register *reg)
+ const struct tgsi_full_src_register *reg,
+ bool ubo)
{
LLVMValueRef index;
- LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
- ctx->param_const_and_shader_buffers);
if (!reg->Register.Indirect) {
- index = LLVMConstInt(ctx->i32,
- si_get_shaderbuf_slot(reg->Register.Index), 0);
+ index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
} else {
- index = si_get_bounded_indirect_index(ctx, ®->Indirect,
- reg->Register.Index,
- ctx->num_shader_buffers);
- index = LLVMBuildSub(ctx->gallivm.builder,
- LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
- index, "");
+ index = si_get_indirect_index(ctx, ®->Indirect,
+ 1, reg->Register.Index);
}
- return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index);
-}
-
-static bool tgsi_is_array_sampler(unsigned target)
-{
- return target == TGSI_TEXTURE_1D_ARRAY ||
- target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
- target == TGSI_TEXTURE_2D_ARRAY ||
- target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
- target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA;
+ if (ubo)
+ return ctx->abi.load_ubo(&ctx->abi, index);
+ else
+ return ctx->abi.load_ssbo(&ctx->abi, index, false);
}
static bool tgsi_is_array_image(unsigned target)
@@ -139,33 +117,38 @@
if (ctx->screen->b.chip_class <= CIK) {
return rsrc;
} else {
- LLVMBuilderRef builder = ctx->gallivm.builder;
LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
LLVMValueRef tmp;
- tmp = LLVMBuildExtractElement(builder, rsrc, i32_6, "");
- tmp = LLVMBuildAnd(builder, tmp, i32_C, "");
- return LLVMBuildInsertElement(builder, rsrc, tmp, i32_6, "");
+ tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
+ tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
+ return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
}
}
-static LLVMValueRef load_image_desc(struct si_shader_context *ctx,
- LLVMValueRef list, LLVMValueRef index,
- unsigned target)
+LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum ac_descriptor_type desc_type, bool dcc_off)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef rsrc;
- if (target == TGSI_TEXTURE_BUFFER) {
+ if (desc_type == AC_DESC_BUFFER) {
index = LLVMBuildMul(builder, index,
LLVMConstInt(ctx->i32, 2, 0), "");
index = LLVMBuildAdd(builder, index,
ctx->i32_1, "");
list = LLVMBuildPointerCast(builder, list,
si_const_array(ctx->v4i32, 0), "");
+ } else {
+ assert(desc_type == AC_DESC_IMAGE);
}
- return ac_build_indexed_load_const(&ctx->ac, list, index);
+ rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
+ if (desc_type == AC_DESC_IMAGE && dcc_off)
+ rsrc = force_dcc_off(ctx, rsrc);
+ return rsrc;
}
/**
@@ -207,26 +190,30 @@
index = si_get_bounded_indirect_index(ctx, &image->Indirect,
image->Register.Index,
ctx->num_images);
- index = LLVMBuildSub(ctx->gallivm.builder,
+ index = LLVMBuildSub(ctx->ac.builder,
LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
index, "");
}
if (image->Register.File != TGSI_FILE_IMAGE) {
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ /* Bindless descriptors are accessible from a different pair of
+ * user SGPR indices.
+ */
+ rsrc_ptr = LLVMGetParam(ctx->main_fn,
+ ctx->param_bindless_samplers_and_images);
+ index = lp_build_emit_fetch_src(bld_base, image,
+ TGSI_TYPE_UNSIGNED, 0);
- LLVMValueRef ptr =
- lp_build_emit_fetch_src(bld_base, image,
- TGSI_TYPE_UNSIGNED64, 0);
- rsrc_ptr = LLVMBuildIntToPtr(builder, ptr,
- si_const_array(ctx->v8i32, 0), "");
- index = LLVMConstInt(ctx->i32, 0, 0);
+ /* For simplicity, bindless image descriptors use fixed
+ * 16-dword slots for now.
+ */
+ index = LLVMBuildMul(ctx->ac.builder, index,
+ LLVMConstInt(ctx->i32, 2, 0), "");
}
- *rsrc = load_image_desc(ctx, rsrc_ptr, index, target);
- if (dcc_off && target != TGSI_TEXTURE_BUFFER)
- *rsrc = force_dcc_off(ctx, *rsrc);
+ *rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
+ target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
+ dcc_off);
}
static LLVMValueRef image_fetch_coords(
@@ -235,8 +222,7 @@
unsigned src, LLVMValueRef desc)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
unsigned target = inst->Memory.Texture;
unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
LLVMValueRef coords[4];
@@ -245,7 +231,7 @@
for (chan = 0; chan < num_coords; ++chan) {
tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
- tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+ tmp = ac_to_integer(&ctx->ac, tmp);
coords[chan] = tmp;
}
@@ -285,7 +271,7 @@
num_coords = 4;
}
- return lp_build_gather_values(gallivm, coords, num_coords);
+ return lp_build_gather_values(&ctx->gallivm, coords, num_coords);
}
/**
@@ -364,22 +350,22 @@
struct lp_build_emit_data * emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned target = inst->Memory.Texture;
LLVMValueRef rsrc;
emit_data->dst_type = ctx->v4f32;
- if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
- LLVMBuilderRef builder = gallivm->builder;
+ if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
+ inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
LLVMValueRef offset;
LLVMValueRef tmp;
- rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
+ bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
+ rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
- offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+ offset = ac_to_integer(&ctx->ac, tmp);
buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
offset, false, false);
@@ -422,7 +408,7 @@
static void load_emit_buffer(struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data,
- bool can_speculate)
+ bool can_speculate, bool allow_smem)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
uint writemask = inst->Dst[0].Register.WriteMask;
@@ -449,20 +435,19 @@
args[2], NULL, 0,
LLVMConstIntGetZExtValue(args[3]),
LLVMConstIntGetZExtValue(args[4]),
- can_speculate, false);
+ can_speculate, allow_smem);
}
static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
const struct tgsi_full_instruction *inst,
LLVMTypeRef type, int arg)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef offset, ptr;
int addr_space;
offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0);
- offset = LLVMBuildBitCast(builder, offset, ctx->i32, "");
+ offset = ac_to_integer(&ctx->ac, offset);
ptr = ctx->shared_memory;
ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
@@ -477,8 +462,6 @@
struct lp_build_emit_data *emit_data)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
unsigned writemask = inst->Dst[0].Register.WriteMask;
LLVMValueRef channels[4], ptr, derived_ptr, index;
int chan;
@@ -492,10 +475,10 @@
}
index = LLVMConstInt(ctx->i32, chan, 0);
- derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
- channels[chan] = LLVMBuildLoad(builder, derived_ptr, "");
+ derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
+ channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
}
- emit_data->output[emit_data->chan] = lp_build_gather_values(gallivm, channels, 4);
+ emit_data->output[emit_data->chan] = lp_build_gather_values(&ctx->gallivm, channels, 4);
}
/**
@@ -567,8 +550,7 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
const struct tgsi_shader_info *info = &ctx->shader->selector->info;
char intrinsic_name[64];
@@ -579,6 +561,11 @@
return;
}
+ if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
+ load_emit_buffer(ctx, emit_data, true, true);
+ return;
+ }
+
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
si_emit_waitcnt(ctx, VM_CNT);
@@ -590,7 +577,7 @@
info->images_atomic);
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
- load_emit_buffer(ctx, emit_data, can_speculate);
+ load_emit_buffer(ctx, emit_data, can_speculate, false);
return;
}
@@ -620,8 +607,6 @@
struct lp_build_emit_data * emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
struct tgsi_full_src_register memory;
LLVMValueRef chans[4];
@@ -629,12 +614,12 @@
LLVMValueRef rsrc;
unsigned chan;
- emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
+ emit_data->dst_type = ctx->voidt;
for (chan = 0; chan < 4; ++chan) {
chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
}
- data = lp_build_gather_values(gallivm, chans, 4);
+ data = lp_build_gather_values(&ctx->gallivm, chans, 4);
emit_data->args[emit_data->arg_count++] = data;
@@ -644,10 +629,10 @@
LLVMValueRef offset;
LLVMValueRef tmp;
- rsrc = shader_buffer_fetch_rsrc(ctx, &memory);
+ rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);
tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
- offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+ offset = ac_to_integer(&ctx->ac, tmp);
buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
offset, false, false);
@@ -687,8 +672,7 @@
bool writeonly_memory)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef base_data = emit_data->args[0];
LLVMValueRef base_offset = emit_data->args[3];
unsigned writemask = inst->Dst[0].Register.WriteMask;
@@ -759,8 +743,7 @@
struct lp_build_emit_data *emit_data)
{
const struct tgsi_full_instruction *inst = emit_data->inst;
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
unsigned writemask = inst->Dst[0].Register.WriteMask;
LLVMValueRef ptr, derived_ptr, data, index;
int chan;
@@ -784,8 +767,7 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
const struct tgsi_shader_info *info = &ctx->shader->selector->info;
unsigned target = inst->Memory.Texture;
@@ -837,8 +819,6 @@
struct lp_build_emit_data * emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
LLVMValueRef data1, data2;
LLVMValueRef rsrc;
@@ -847,11 +827,11 @@
emit_data->dst_type = ctx->f32;
tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
- data1 = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+ data1 = ac_to_integer(&ctx->ac, tmp);
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
- data2 = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+ data2 = ac_to_integer(&ctx->ac, tmp);
}
/* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
@@ -864,10 +844,10 @@
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
LLVMValueRef offset;
- rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
+ rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
- offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+ offset = ac_to_integer(&ctx->ac, tmp);
buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
offset, true, false);
@@ -893,22 +873,21 @@
static void atomic_emit_memory(struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data) {
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
LLVMValueRef ptr, result, arg;
ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0);
- arg = LLVMBuildBitCast(builder, arg, ctx->i32, "");
+ arg = ac_to_integer(&ctx->ac, arg);
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
LLVMValueRef new_data;
new_data = lp_build_emit_fetch(&ctx->bld_base,
inst, 3, 0);
- new_data = LLVMBuildBitCast(builder, new_data, ctx->i32, "");
+ new_data = ac_to_integer(&ctx->ac, new_data);
result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,
LLVMAtomicOrderingSequentiallyConsistent,
@@ -964,8 +943,7 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
char intrinsic_name[40];
LLVMValueRef tmp;
@@ -997,8 +975,7 @@
tmp = lp_build_intrinsic(
builder, intrinsic_name, ctx->i32,
emit_data->args, emit_data->arg_count, 0);
- emit_data->output[emit_data->chan] =
- LLVMBuildBitCast(builder, tmp, ctx->f32, "");
+ emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
}
static void set_tex_fetch_args(struct si_shader_context *ctx,
@@ -1008,7 +985,6 @@
LLVMValueRef *param, unsigned count,
unsigned dmask)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
struct ac_image_args args = {};
/* Pad to power of two vector */
@@ -1016,7 +992,7 @@
param[count++] = LLVMGetUndef(ctx->i32);
if (count > 1)
- args.addr = lp_build_gather_values(gallivm, param, count);
+ args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
else
args.addr = param[0];
@@ -1035,7 +1011,7 @@
static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
unsigned target, LLVMValueRef out)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
/* 1D textures are allocated and used as 2D on GFX9. */
if (ctx->screen->b.chip_class >= GFX9 &&
@@ -1072,7 +1048,7 @@
emit_data->dst_type = ctx->v4i32;
if (reg->Register.File == TGSI_FILE_BUFFER) {
- emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg);
+ emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg, false);
emit_data->arg_count = 1;
} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture,
@@ -1101,8 +1077,7 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
const struct tgsi_full_instruction *inst = emit_data->inst;
LLVMValueRef out;
@@ -1127,31 +1102,30 @@
/**
* Load an image view, fmask view. or sampler state descriptor.
*/
-static LLVMValueRef load_sampler_desc(struct si_shader_context *ctx,
- LLVMValueRef list, LLVMValueRef index,
- enum desc_type type)
+LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum ac_descriptor_type type)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
switch (type) {
- case DESC_IMAGE:
+ case AC_DESC_IMAGE:
/* The image is at [0:7]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
break;
- case DESC_BUFFER:
+ case AC_DESC_BUFFER:
/* The buffer is in [4:7]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
index = LLVMBuildAdd(builder, index, ctx->i32_1, "");
list = LLVMBuildPointerCast(builder, list,
si_const_array(ctx->v4i32, 0), "");
break;
- case DESC_FMASK:
+ case AC_DESC_FMASK:
/* The FMASK is at [8:15]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
index = LLVMBuildAdd(builder, index, ctx->i32_1, "");
break;
- case DESC_SAMPLER:
+ case AC_DESC_SAMPLER:
/* The sampler state is at [12:15]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
@@ -1160,7 +1134,7 @@
break;
}
- return ac_build_indexed_load_const(&ctx->ac, list, index);
+ return ac_build_load_to_sgpr(&ctx->ac, list, index);
}
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
@@ -1177,18 +1151,17 @@
static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
LLVMValueRef res, LLVMValueRef samp)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
LLVMValueRef img7, samp0;
if (ctx->screen->b.chip_class >= VI)
return samp;
- img7 = LLVMBuildExtractElement(builder, res,
+ img7 = LLVMBuildExtractElement(ctx->ac.builder, res,
LLVMConstInt(ctx->i32, 7, 0), "");
- samp0 = LLVMBuildExtractElement(builder, samp,
+ samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp,
ctx->i32_0, "");
- samp0 = LLVMBuildAnd(builder, samp0, img7, "");
- return LLVMBuildInsertElement(builder, samp, samp0,
+ samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, "");
+ return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0,
ctx->i32_0, "");
}
@@ -1213,7 +1186,7 @@
®->Indirect,
reg->Register.Index,
ctx->num_samplers);
- index = LLVMBuildAdd(ctx->gallivm.builder, index,
+ index = LLVMBuildAdd(ctx->ac.builder, index,
LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
} else {
index = LLVMConstInt(ctx->i32,
@@ -1221,21 +1194,19 @@
}
if (reg->Register.File != TGSI_FILE_SAMPLER) {
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
-
- LLVMValueRef ptr =
- lp_build_emit_fetch_src(bld_base, reg,
- TGSI_TYPE_UNSIGNED64, 0);
- list = LLVMBuildIntToPtr(builder, ptr,
- si_const_array(ctx->v8i32, 0), "");
- index = LLVMConstInt(ctx->i32, 0, 0);
+ /* Bindless descriptors are accessible from a different pair of
+ * user SGPR indices.
+ */
+ list = LLVMGetParam(ctx->main_fn,
+ ctx->param_bindless_samplers_and_images);
+ index = lp_build_emit_fetch_src(bld_base, reg,
+ TGSI_TYPE_UNSIGNED, 0);
}
if (target == TGSI_TEXTURE_BUFFER)
- *res_ptr = load_sampler_desc(ctx, list, index, DESC_BUFFER);
+ *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER);
else
- *res_ptr = load_sampler_desc(ctx, list, index, DESC_IMAGE);
+ *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE);
if (samp_ptr)
*samp_ptr = NULL;
@@ -1245,12 +1216,12 @@
if (target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
if (fmask_ptr)
- *fmask_ptr = load_sampler_desc(ctx, list, index,
- DESC_FMASK);
+ *fmask_ptr = si_load_sampler_desc(ctx, list, index,
+ AC_DESC_FMASK);
} else if (target != TGSI_TEXTURE_BUFFER) {
if (samp_ptr) {
- *samp_ptr = load_sampler_desc(ctx, list, index,
- DESC_SAMPLER);
+ *samp_ptr = si_load_sampler_desc(ctx, list, index,
+ AC_DESC_SAMPLER);
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
}
}
@@ -1308,7 +1279,6 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
const struct tgsi_full_instruction *inst = emit_data->inst;
unsigned opcode = inst->Instruction.Opcode;
unsigned target = inst->Texture.Texture;
@@ -1336,7 +1306,7 @@
/* Fetch and project texture coordinates */
coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
- for (chan = 0; chan < 3; chan++ ) {
+ for (chan = 0; chan < 3; chan++) {
coords[chan] = lp_build_emit_fetch(bld_base,
emit_data->inst, 0,
chan);
@@ -1348,7 +1318,7 @@
}
if (opcode == TGSI_OPCODE_TXP)
- coords[3] = bld_base->base.one;
+ coords[3] = ctx->ac.f32_1;
/* Pack offsets. */
if (has_offset &&
@@ -1364,15 +1334,15 @@
for (chan = 0; chan < 3; chan++) {
offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
emit_data->inst, 0, chan);
- offset[chan] = LLVMBuildAnd(gallivm->builder, offset[chan],
+ offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
LLVMConstInt(ctx->i32, 0x3f, 0), "");
if (chan)
- offset[chan] = LLVMBuildShl(gallivm->builder, offset[chan],
+ offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
LLVMConstInt(ctx->i32, chan*8, 0), "");
}
- pack = LLVMBuildOr(gallivm->builder, offset[0], offset[1], "");
- pack = LLVMBuildOr(gallivm->builder, pack, offset[2], "");
+ pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
+ pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
address[count++] = pack;
}
@@ -1407,13 +1377,13 @@
if (ctx->screen->b.chip_class >= VI) {
LLVMValueRef upgraded;
LLVMValueRef clamped;
- upgraded = LLVMBuildExtractElement(gallivm->builder, samp_ptr,
+ upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr,
LLVMConstInt(ctx->i32, 3, false), "");
- upgraded = LLVMBuildLShr(gallivm->builder, upgraded,
+ upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
LLVMConstInt(ctx->i32, 29, false), "");
- upgraded = LLVMBuildTrunc(gallivm->builder, upgraded, ctx->i1, "");
+ upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, "");
clamped = ac_build_clamp(&ctx->ac, z);
- z = LLVMBuildSelect(gallivm->builder, upgraded, clamped, z, "");
+ z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, "");
}
address[count++] = z;
@@ -1476,7 +1446,7 @@
for (chan = num_src_deriv_channels;
chan < num_dst_deriv_channels; chan++)
derivs[param * num_dst_deriv_channels + chan] =
- bld_base->base.zero;
+ ctx->ac.f32_0;
}
}
@@ -1544,10 +1514,8 @@
count = 16;
}
- for (chan = 0; chan < count; chan++ ) {
- address[chan] = LLVMBuildBitCast(gallivm->builder,
- address[chan], ctx->i32, "");
- }
+ for (chan = 0; chan < count; chan++)
+ address[chan] = ac_to_integer(&ctx->ac, address[chan]);
/* Adjust the sample index according to FMASK.
*
@@ -1589,39 +1557,39 @@
/* Apply the formula. */
LLVMValueRef fmask =
- LLVMBuildExtractElement(gallivm->builder,
+ LLVMBuildExtractElement(ctx->ac.builder,
txf_emit_data.output[0],
ctx->i32_0, "");
unsigned sample_chan = txf_count; /* the sample index is last */
LLVMValueRef sample_index4 =
- LLVMBuildMul(gallivm->builder, address[sample_chan], four, "");
+ LLVMBuildMul(ctx->ac.builder, address[sample_chan], four, "");
LLVMValueRef shifted_fmask =
- LLVMBuildLShr(gallivm->builder, fmask, sample_index4, "");
+ LLVMBuildLShr(ctx->ac.builder, fmask, sample_index4, "");
LLVMValueRef final_sample =
- LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
+ LLVMBuildAnd(ctx->ac.builder, shifted_fmask, F, "");
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
* resource descriptor is 0 (invalid),
*/
LLVMValueRef fmask_desc =
- LLVMBuildBitCast(gallivm->builder, fmask_ptr,
+ LLVMBuildBitCast(ctx->ac.builder, fmask_ptr,
ctx->v8i32, "");
LLVMValueRef fmask_word1 =
- LLVMBuildExtractElement(gallivm->builder, fmask_desc,
+ LLVMBuildExtractElement(ctx->ac.builder, fmask_desc,
ctx->i32_1, "");
LLVMValueRef word1_is_nonzero =
- LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
fmask_word1, ctx->i32_0, "");
/* Replace the MSAA sample index. */
address[sample_chan] =
- LLVMBuildSelect(gallivm->builder, word1_is_nonzero,
+ LLVMBuildSelect(ctx->ac.builder, word1_is_nonzero,
final_sample, address[sample_chan], "");
}
@@ -1713,7 +1681,7 @@
unsigned target,
enum tgsi_return_type return_type)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef wa_8888 = NULL;
LLVMValueRef coord = args->addr;
LLVMValueRef half_texel[2];
@@ -1813,9 +1781,9 @@
LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
tmp = LLVMBuildExtractElement(builder, coord, index, "");
- tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
+ tmp = ac_to_float(&ctx->ac, tmp);
tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
- tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+ tmp = ac_to_integer(&ctx->ac, tmp);
coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
}
@@ -1833,7 +1801,7 @@
enum tgsi_return_type return_type,
LLVMValueRef wa)
{
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
assert(return_type == TGSI_RETURN_TYPE_SINT ||
return_type == TGSI_RETURN_TYPE_UINT);
@@ -1849,7 +1817,7 @@
wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
else
wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
- wa_value = LLVMBuildBitCast(builder, wa_value, ctx->f32, "");
+ wa_value = ac_to_float(&ctx->ac, wa_value);
value = LLVMBuildSelect(builder, wa, wa_value, value, "");
result = LLVMBuildInsertElement(builder, result, value, chanv, "");
@@ -1962,8 +1930,6 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res, samples;
LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
@@ -1971,15 +1937,14 @@
/* Read the samples from the descriptor directly. */
- res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, "");
- samples = LLVMBuildExtractElement(
- builder, res,
- LLVMConstInt(ctx->i32, 3, 0), "");
- samples = LLVMBuildLShr(builder, samples,
+ res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, "");
+ samples = LLVMBuildExtractElement(ctx->ac.builder, res,
+ LLVMConstInt(ctx->i32, 3, 0), "");
+ samples = LLVMBuildLShr(ctx->ac.builder, samples,
LLVMConstInt(ctx->i32, 16, 0), "");
- samples = LLVMBuildAnd(builder, samples,
+ samples = LLVMBuildAnd(ctx->ac.builder, samples,
LLVMConstInt(ctx->i32, 0xf, 0), "");
- samples = LLVMBuildShl(builder, ctx->i32_1,
+ samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1,
samples, "");
emit_data->output[emit_data->chan] = samples;
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 2018-01-18 21:30:28.000000000 +0000
@@ -165,20 +165,20 @@
LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type)
{
- LLVMContextRef ctx = bld_base->base.gallivm->context;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
switch (type) {
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
- return LLVMInt32TypeInContext(ctx);
+ return ctx->i32;
case TGSI_TYPE_UNSIGNED64:
case TGSI_TYPE_SIGNED64:
- return LLVMInt64TypeInContext(ctx);
+ return ctx->i64;
case TGSI_TYPE_DOUBLE:
- return LLVMDoubleTypeInContext(ctx);
+ return LLVMDoubleTypeInContext(ctx->ac.context);
case TGSI_TYPE_UNTYPED:
case TGSI_TYPE_FLOAT:
- return LLVMFloatTypeInContext(ctx);
+ return ctx->f32;
default: break;
}
return 0;
@@ -187,11 +187,11 @@
LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type, LLVMValueRef value)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
if (dst_type)
- return LLVMBuildBitCast(builder, value, dst_type, "");
+ return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
else
return value;
}
@@ -204,8 +204,7 @@
LLVMValueRef index,
unsigned num)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
LLVMValueRef cc;
@@ -271,16 +270,15 @@
unsigned swizzle_z,
unsigned swizzle_w)
{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef swizzles[4];
- LLVMTypeRef i32t =
- LLVMInt32TypeInContext(bld_base->base.gallivm->context);
- swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
- swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
- swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
- swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
+ swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
+ swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
+ swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
+ swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
- return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
+ return LLVMBuildShuffleVector(ctx->ac.builder,
value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(swizzles, 4), "");
@@ -331,20 +329,6 @@
return range;
}
-static LLVMValueRef
-emit_array_index(struct si_shader_context *ctx,
- const struct tgsi_ind_register *reg,
- unsigned offset)
-{
- struct gallivm_state *gallivm = &ctx->gallivm;
-
- if (!reg) {
- return LLVMConstInt(ctx->i32, offset, 0);
- }
- LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
- return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
-}
-
/**
* For indirect registers, construct a pointer directly to the requested
* element using getelementptr if possible.
@@ -361,8 +345,7 @@
{
unsigned array_id;
struct tgsi_array_info *array;
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef idxs[2];
LLVMValueRef index;
LLVMValueRef alloca;
@@ -383,8 +366,8 @@
if (!(array->writemask & (1 << swizzle)))
return ctx->undef_alloca;
- index = emit_array_index(ctx, reg_indirect,
- reg_index - ctx->temp_arrays[array_id - 1].range.First);
+ index = si_get_indirect_index(ctx, reg_indirect, 1,
+ reg_index - ctx->temp_arrays[array_id - 1].range.First);
/* Ensure that the index is within a valid range, to guard against
* VM faults and overwriting critical data (e.g. spilled resource
@@ -411,7 +394,7 @@
"");
idxs[0] = ctx->i32_0;
idxs[1] = index;
- return LLVMBuildGEP(builder, alloca, idxs, 2, "");
+ return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
}
LLVMValueRef
@@ -420,19 +403,19 @@
LLVMValueRef ptr,
LLVMValueRef ptr2)
{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef result;
- result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+ result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
- result = LLVMBuildInsertElement(builder,
+ result = LLVMBuildInsertElement(ctx->ac.builder,
result,
- bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
- bld_base->int_bld.zero, "");
- result = LLVMBuildInsertElement(builder,
+ ac_to_integer(&ctx->ac, ptr),
+ ctx->i32_0, "");
+ result = LLVMBuildInsertElement(ctx->ac.builder,
result,
- bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
- bld_base->int_bld.one, "");
+ ac_to_integer(&ctx->ac, ptr2),
+ ctx->i32_1, "");
return bitcast(bld_base, type, result);
}
@@ -443,9 +426,6 @@
unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
-
- LLVMBuilderRef builder = ctx->gallivm.builder;
-
unsigned i, size = range.Last - range.First + 1;
LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
LLVMValueRef result = LLVMGetUndef(vec);
@@ -456,7 +436,7 @@
for (i = 0; i < size; ++i) {
tmp_reg.Register.Index = i + range.First;
LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
- result = LLVMBuildInsertElement(builder, result, temp,
+ result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
LLVMConstInt(ctx->i32, i, 0), "array_vector");
}
return result;
@@ -471,8 +451,7 @@
const struct tgsi_ind_register *reg_indirect)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef ptr;
ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
@@ -490,7 +469,7 @@
struct tgsi_declaration_range range =
get_array_range(bld_base, file, reg_index, reg_indirect);
LLVMValueRef index =
- emit_array_index(ctx, reg_indirect, reg_index - range.First);
+ si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
LLVMValueRef array =
emit_array_fetch(bld_base, file, type, range, swizzle);
return LLVMBuildExtractElement(builder, array, index, "");
@@ -506,8 +485,7 @@
const struct tgsi_ind_register *reg_indirect)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef ptr;
ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
@@ -516,7 +494,7 @@
} else {
unsigned i, size;
struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
- LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
+ LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
LLVMValueRef array =
emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
LLVMValueRef temp_ptr;
@@ -575,7 +553,7 @@
unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef result = NULL, ptr, ptr2;
if (swizzle == ~0) {
@@ -598,7 +576,7 @@
case TGSI_FILE_IMMEDIATE: {
LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
if (tgsi_type_is_64bit(type)) {
- result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
+ result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
result = LLVMConstInsertElement(result,
ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
ctx->i32_0);
@@ -673,7 +651,7 @@
unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef cval = ctx->system_values[reg->Register.Index];
if (tgsi_type_is_64bit(type)) {
@@ -703,7 +681,7 @@
const struct tgsi_full_declaration *decl)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMBuilderRef builder = ctx->ac.builder;
unsigned first, last, i;
switch(decl->Declaration.File) {
case TGSI_FILE_ADDRESS:
@@ -842,7 +820,7 @@
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
- ctx->load_system_value(ctx, idx, decl);
+ si_load_system_value(ctx, idx, decl);
}
}
break;
@@ -870,7 +848,7 @@
}
case TGSI_FILE_MEMORY:
- ctx->declare_memory_region(ctx, decl);
+ si_declare_compute_memory(ctx, decl);
break;
default:
@@ -881,16 +859,15 @@
void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
const struct tgsi_opcode_info *info,
+ unsigned index,
LLVMValueRef dst[4])
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
- const struct tgsi_full_dst_register *reg = &inst->Dst[0];
- LLVMBuilderRef builder = ctx->gallivm.builder;
+ const struct tgsi_full_dst_register *reg = &inst->Dst[index];
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef temp_ptr, temp_ptr2 = NULL;
- unsigned chan, chan_index;
bool is_vec_store = false;
- enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
if (dst[0]) {
LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
@@ -899,16 +876,20 @@
if (is_vec_store) {
LLVMValueRef values[4] = {};
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
+ uint32_t writemask = reg->Register.WriteMask;
+ while (writemask) {
+ unsigned chan = u_bit_scan(&writemask);
LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
- values[chan] = LLVMBuildExtractElement(gallivm->builder,
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
dst[0], index, "");
}
- bld_base->emit_store(bld_base, inst, info, values);
+ bld_base->emit_store(bld_base, inst, info, index, values);
return;
}
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ uint32_t writemask = reg->Register.WriteMask;
+ while (writemask) {
+ unsigned chan_index = u_bit_scan(&writemask);
LLVMValueRef value = dst[chan_index];
if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
@@ -923,7 +904,7 @@
}
if (!tgsi_type_is_64bit(dtype))
- value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+ value = ac_to_float(&ctx->ac, value);
if (reg->Register.Indirect) {
unsigned file = reg->Register.File;
@@ -963,8 +944,8 @@
val2 = LLVMBuildExtractElement(builder, ptr,
ctx->i32_1, "");
- LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
- LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
+ LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
+ LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
}
}
}
@@ -986,18 +967,16 @@
static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
const char *name)
{
- struct gallivm_state *gallivm = &ctx->gallivm;
-
assert(ctx->flow_depth >= 1);
if (ctx->flow_depth >= 2) {
struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
- return LLVMInsertBasicBlockInContext(gallivm->context,
+ return LLVMInsertBasicBlockInContext(ctx->ac.context,
flow->next_block, name);
}
- return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
+ return LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_fn, name);
}
/* Emit a branch to the given default target for the current block if
@@ -1015,13 +994,12 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_llvm_flow *flow = push_flow(ctx);
flow->loop_entry_block = append_basic_block(ctx, "LOOP");
flow->next_block = append_basic_block(ctx, "ENDLOOP");
set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
- LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
+ LLVMBuildBr(ctx->ac.builder, flow->loop_entry_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, flow->loop_entry_block);
}
static void brk_emit(const struct lp_build_tgsi_action *action,
@@ -1029,10 +1007,9 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(gallivm->builder, flow->next_block);
+ LLVMBuildBr(ctx->ac.builder, flow->next_block);
}
static void cont_emit(const struct lp_build_tgsi_action *action,
@@ -1040,10 +1017,9 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
+ LLVMBuildBr(ctx->ac.builder, flow->loop_entry_block);
}
static void else_emit(const struct lp_build_tgsi_action *action,
@@ -1051,16 +1027,15 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_llvm_flow *current_branch = get_current_flow(ctx);
LLVMBasicBlockRef endif_block;
assert(!current_branch->loop_entry_block);
endif_block = append_basic_block(ctx, "ENDIF");
- emit_default_branch(gallivm->builder, endif_block);
+ emit_default_branch(ctx->ac.builder, endif_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, current_branch->next_block);
set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
current_branch->next_block = endif_block;
@@ -1071,13 +1046,12 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_llvm_flow *current_branch = get_current_flow(ctx);
assert(!current_branch->loop_entry_block);
- emit_default_branch(gallivm->builder, current_branch->next_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
+ emit_default_branch(ctx->ac.builder, current_branch->next_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, current_branch->next_block);
set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
ctx->flow_depth--;
@@ -1088,14 +1062,13 @@
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_llvm_flow *current_loop = get_current_flow(ctx);
assert(current_loop->loop_entry_block);
- emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
+ emit_default_branch(ctx->ac.builder, current_loop->loop_entry_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, current_loop->next_block);
set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
ctx->flow_depth--;
}
@@ -1106,27 +1079,26 @@
LLVMValueRef cond)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = &ctx->gallivm;
struct si_llvm_flow *flow = push_flow(ctx);
LLVMBasicBlockRef if_block;
if_block = append_basic_block(ctx, "IF");
flow->next_block = append_basic_block(ctx, "ELSE");
set_basicblock_name(if_block, "if", bld_base->pc);
- LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
+ LLVMBuildCondBr(ctx->ac.builder, cond, if_block, flow->next_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
}
static void if_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef cond;
- cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
+ cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE,
emit_data->args[0],
- bld_base->base.zero, "");
+ ctx->ac.f32_0, "");
if_cond_emit(action, bld_base, emit_data, cond);
}
@@ -1135,12 +1107,11 @@
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
- struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef cond;
- cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
- bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
- bld_base->int_bld.zero, "");
+ cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
+ ac_to_integer(&ctx->ac, emit_data->args[0]), ctx->i32_0, "");
if_cond_emit(action, bld_base, emit_data, cond);
}
@@ -1185,7 +1156,7 @@
LLVMDisposeTargetData(data_layout);
LLVMDisposeMessage(data_layout_str);
- bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
+ bool unsafe_fpmath = (sscreen->b.debug_flags & DBG(UNSAFE_MATH)) != 0;
enum lp_float_mode float_mode =
unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
@@ -1220,10 +1191,10 @@
bld_base->emit_immediate = emit_immediate;
/* metadata allowing 2.5 ULP */
- ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
+ ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->ac.context,
"fpmath", 6);
- LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
- ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
+ LLVMValueRef arg = LLVMConstReal(ctx->ac.f32, 2.5);
+ ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->ac.context,
&arg, 1);
bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
@@ -1238,13 +1209,13 @@
si_shader_context_init_alu(&ctx->bld_base);
si_shader_context_init_mem(ctx);
- ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
- ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
- ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
- ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
- ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
- ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
- ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
+ ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
+ ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
+ ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
+ ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
+ ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
+ ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
+ ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
@@ -1331,19 +1302,19 @@
unsigned real_shader_type;
if (num_return_elems)
- ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
+ ret_type = LLVMStructTypeInContext(ctx->ac.context,
return_types,
num_return_elems, true);
else
- ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
+ ret_type = ctx->voidt;
/* Setup the function */
ctx->return_type = ret_type;
main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
- main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
+ main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
ctx->main_fn, "main_body");
- LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
real_shader_type = ctx->type;
@@ -1387,8 +1358,8 @@
LLVMTargetLibraryInfoRef target_library_info;
/* Dump LLVM IR before any optimization passes */
- if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
- r600_can_dump_shader(&ctx->screen->b, ctx->type))
+ if (ctx->screen->b.debug_flags & DBG(PREOPT_IR) &&
+ si_can_dump_shader(&ctx->screen->b, ctx->type))
LLVMDumpModule(ctx->gallivm.module);
/* Create the pass manager */
@@ -1397,7 +1368,7 @@
target_library_info = gallivm_create_target_library_info(triple);
LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
- if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
+ if (si_extra_shader_checks(&ctx->screen->b, ctx->type))
LLVMAddVerifierPass(gallivm->passmgr);
LLVMAddAlwaysInlinerPass(gallivm->passmgr);
@@ -1419,7 +1390,7 @@
/* Run the pass */
LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
- LLVMDisposeBuilder(gallivm->builder);
+ LLVMDisposeBuilder(ctx->ac.builder);
LLVMDisposePassManager(gallivm->passmgr);
gallivm_dispose_target_library_info(target_library_info);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_binning.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_binning.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_binning.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_binning.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,448 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* This file handles register programming of primitive binning. */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "gfx9d.h"
+#include "radeon/r600_cs.h"
+
+struct uvec2 {
+ unsigned x, y;
+};
+
+struct si_bin_size_map {
+ unsigned start;
+ unsigned bin_size_x;
+ unsigned bin_size_y;
+};
+
+typedef struct si_bin_size_map si_bin_size_subtable[3][9];
+
+/* Find the bin size where sum is >= table[i].start and < table[i + 1].start. */
+static struct uvec2 si_find_bin_size(struct si_screen *sscreen,
+ const si_bin_size_subtable table[],
+ unsigned sum)
+{
+ unsigned log_num_rb_per_se =
+ util_logbase2_ceil(sscreen->b.info.num_render_backends /
+ sscreen->b.info.max_se);
+ unsigned log_num_se = util_logbase2_ceil(sscreen->b.info.max_se);
+ unsigned i;
+
+ /* Get the chip-specific subtable. */
+ const struct si_bin_size_map *subtable =
+ &table[log_num_rb_per_se][log_num_se][0];
+
+ for (i = 0; subtable[i].start != UINT_MAX; i++) {
+ if (sum >= subtable[i].start && sum < subtable[i + 1].start)
+ break;
+ }
+
+ struct uvec2 size = {subtable[i].bin_size_x, subtable[i].bin_size_y};
+ return size;
+}
+
+static struct uvec2 si_get_color_bin_size(struct si_context *sctx,
+ unsigned cb_target_enabled_4bit)
+{
+ unsigned nr_samples = sctx->framebuffer.nr_samples;
+ unsigned sum = 0;
+
+ /* Compute the sum of all Bpp. */
+ for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+ if (!(cb_target_enabled_4bit & (0xf << (i * 4))))
+ continue;
+
+ struct r600_texture *rtex =
+ (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
+ sum += rtex->surface.bpe;
+ }
+
+ /* Multiply the sum by some function of the number of samples. */
+ if (nr_samples >= 2) {
+ if (sctx->ps_iter_samples >= 2)
+ sum *= nr_samples;
+ else
+ sum *= 2;
+ }
+
+ static const si_bin_size_subtable table[] = {
+ {
+ /* One RB / SE */
+ {
+ /* One shader engine */
+ { 0, 128, 128 },
+ { 1, 64, 128 },
+ { 2, 32, 128 },
+ { 3, 16, 128 },
+ { 17, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Two shader engines */
+ { 0, 128, 128 },
+ { 2, 64, 128 },
+ { 3, 32, 128 },
+ { 5, 16, 128 },
+ { 17, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Four shader engines */
+ { 0, 128, 128 },
+ { 3, 64, 128 },
+ { 5, 16, 128 },
+ { 17, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ {
+ /* Two RB / SE */
+ {
+ /* One shader engine */
+ { 0, 128, 128 },
+ { 2, 64, 128 },
+ { 3, 32, 128 },
+ { 5, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Two shader engines */
+ { 0, 128, 128 },
+ { 3, 64, 128 },
+ { 5, 32, 128 },
+ { 9, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Four shader engines */
+ { 0, 256, 256 },
+ { 2, 128, 256 },
+ { 3, 128, 128 },
+ { 5, 64, 128 },
+ { 9, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ {
+ /* Four RB / SE */
+ {
+ /* One shader engine */
+ { 0, 128, 256 },
+ { 2, 128, 128 },
+ { 3, 64, 128 },
+ { 5, 32, 128 },
+ { 9, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Two shader engines */
+ { 0, 256, 256 },
+ { 2, 128, 256 },
+ { 3, 128, 128 },
+ { 5, 64, 128 },
+ { 9, 32, 128 },
+ { 17, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ /* Four shader engines */
+ { 0, 256, 512 },
+ { 2, 256, 256 },
+ { 3, 128, 256 },
+ { 5, 128, 128 },
+ { 9, 64, 128 },
+ { 17, 16, 128 },
+ { 33, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ };
+
+ return si_find_bin_size(sctx->screen, table, sum);
+}
+
+static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
+{
+ struct si_state_dsa *dsa = sctx->queued.named.dsa;
+
+ if (!sctx->framebuffer.state.zsbuf ||
+ (!dsa->depth_enabled && !dsa->stencil_enabled)) {
+ /* Return the max size. */
+ struct uvec2 size = {512, 512};
+ return size;
+ }
+
+ struct r600_texture *rtex =
+ (struct r600_texture*)sctx->framebuffer.state.zsbuf->texture;
+ unsigned depth_coeff = dsa->depth_enabled ? 5 : 0;
+ unsigned stencil_coeff = rtex->surface.has_stencil &&
+ dsa->stencil_enabled ? 1 : 0;
+ unsigned sum = 4 * (depth_coeff + stencil_coeff) *
+ sctx->framebuffer.nr_samples;
+
+ static const si_bin_size_subtable table[] = {
+ {
+ // One RB / SE
+ {
+ // One shader engine
+ { 0, 128, 256 },
+ { 2, 128, 128 },
+ { 4, 64, 128 },
+ { 7, 32, 128 },
+ { 13, 16, 128 },
+ { 49, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Two shader engines
+ { 0, 256, 256 },
+ { 2, 128, 256 },
+ { 4, 128, 128 },
+ { 7, 64, 128 },
+ { 13, 32, 128 },
+ { 25, 16, 128 },
+ { 49, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Four shader engines
+ { 0, 256, 512 },
+ { 2, 256, 256 },
+ { 4, 128, 256 },
+ { 7, 128, 128 },
+ { 13, 64, 128 },
+ { 25, 16, 128 },
+ { 49, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ {
+ // Two RB / SE
+ {
+ // One shader engine
+ { 0, 256, 256 },
+ { 2, 128, 256 },
+ { 4, 128, 128 },
+ { 7, 64, 128 },
+ { 13, 32, 128 },
+ { 25, 16, 128 },
+ { 97, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Two shader engines
+ { 0, 256, 512 },
+ { 2, 256, 256 },
+ { 4, 128, 256 },
+ { 7, 128, 128 },
+ { 13, 64, 128 },
+ { 25, 32, 128 },
+ { 49, 16, 128 },
+ { 97, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Four shader engines
+ { 0, 512, 512 },
+ { 2, 256, 512 },
+ { 4, 256, 256 },
+ { 7, 128, 256 },
+ { 13, 128, 128 },
+ { 25, 64, 128 },
+ { 49, 16, 128 },
+ { 97, 0, 0 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ {
+ // Four RB / SE
+ {
+ // One shader engine
+ { 0, 256, 512 },
+ { 2, 256, 256 },
+ { 4, 128, 256 },
+ { 7, 128, 128 },
+ { 13, 64, 128 },
+ { 25, 32, 128 },
+ { 49, 16, 128 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Two shader engines
+ { 0, 512, 512 },
+ { 2, 256, 512 },
+ { 4, 256, 256 },
+ { 7, 128, 256 },
+ { 13, 128, 128 },
+ { 25, 64, 128 },
+ { 49, 32, 128 },
+ { 97, 16, 128 },
+ { UINT_MAX, 0, 0 },
+ },
+ {
+ // Four shader engines
+ { 0, 512, 512 },
+ { 4, 256, 512 },
+ { 7, 256, 256 },
+ { 13, 128, 256 },
+ { 25, 128, 128 },
+ { 49, 64, 128 },
+ { 97, 16, 128 },
+ { UINT_MAX, 0, 0 },
+ },
+ },
+ };
+
+ return si_find_bin_size(sctx->screen, table, sum);
+}
+
+static void si_emit_dpbb_disable(struct si_context *sctx)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+
+ radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
+ S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+ S_028C44_DISABLE_START_OF_PRIM(1));
+ radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
+ S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
+}
+
+void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state)
+{
+ struct si_screen *sscreen = sctx->screen;
+ struct si_state_blend *blend = sctx->queued.named.blend;
+ struct si_state_dsa *dsa = sctx->queued.named.dsa;
+ unsigned db_shader_control = sctx->ps_db_shader_control;
+
+ assert(sctx->b.chip_class >= GFX9);
+
+ if (!sscreen->dpbb_allowed || !blend || !dsa) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ bool ps_can_kill = G_02880C_KILL_ENABLE(db_shader_control) ||
+ G_02880C_MASK_EXPORT_ENABLE(db_shader_control) ||
+ G_02880C_COVERAGE_TO_MASK_ENABLE(db_shader_control) ||
+ blend->alpha_to_coverage;
+
+ /* This is ported from Vulkan, but it doesn't make much sense to me.
+ * Maybe it's for RE-Z? But Vulkan doesn't use RE-Z. TODO: Clarify this.
+ */
+ bool ps_can_reject_z_trivially =
+ !G_02880C_Z_EXPORT_ENABLE(db_shader_control) ||
+ G_02880C_CONSERVATIVE_Z_EXPORT(db_shader_control);
+
+ /* Disable binning if PS can kill trivially with DB writes.
+ * Ported from Vulkan. (heuristic?)
+ */
+ if (ps_can_kill &&
+ ps_can_reject_z_trivially &&
+ sctx->framebuffer.state.zsbuf &&
+ dsa->db_can_write) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ /* Compute the bin size. */
+ /* TODO: We could also look at enabled pixel shader outputs. */
+ unsigned cb_target_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit &
+ blend->cb_target_enabled_4bit;
+ struct uvec2 color_bin_size =
+ si_get_color_bin_size(sctx, cb_target_enabled_4bit);
+ struct uvec2 depth_bin_size = si_get_depth_bin_size(sctx);
+
+ unsigned color_area = color_bin_size.x * color_bin_size.y;
+ unsigned depth_area = depth_bin_size.x * depth_bin_size.y;
+
+ struct uvec2 bin_size = color_area < depth_area ? color_bin_size
+ : depth_bin_size;
+
+ if (!bin_size.x || !bin_size.y) {
+ si_emit_dpbb_disable(sctx);
+ return;
+ }
+
+ /* Enable DFSM if it's preferred. */
+ unsigned punchout_mode = V_028060_FORCE_OFF;
+ bool disable_start_of_prim = true;
+
+ if (sscreen->dfsm_allowed &&
+ cb_target_enabled_4bit &&
+ !G_02880C_KILL_ENABLE(db_shader_control) &&
+ /* These two also imply that DFSM is disabled when PS writes to memory. */
+ !G_02880C_EXEC_ON_HIER_FAIL(db_shader_control) &&
+ !G_02880C_EXEC_ON_NOOP(db_shader_control) &&
+ G_02880C_Z_ORDER(db_shader_control) == V_02880C_EARLY_Z_THEN_LATE_Z) {
+ punchout_mode = V_028060_AUTO;
+ disable_start_of_prim = (cb_target_enabled_4bit &
+ blend->blend_enable_4bit) != 0;
+ }
+
+ /* Tunable parameters. Also test with DFSM enabled/disabled. */
+ unsigned context_states_per_bin; /* allowed range: [0, 5] */
+ unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
+ unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+
+ switch (sctx->b.family) {
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ /* Tuned for Raven. Vega might need different values. */
+ context_states_per_bin = 5;
+ persistent_states_per_bin = 31;
+ fpovs_per_batch = 63;
+ break;
+ default:
+ assert(0);
+ }
+
+ /* Emit registers. */
+ struct uvec2 bin_size_extend = {};
+ if (bin_size.x >= 32)
+ bin_size_extend.x = util_logbase2(bin_size.x) - 5;
+ if (bin_size.y >= 32)
+ bin_size_extend.y = util_logbase2(bin_size.y) - 5;
+
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
+ S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
+ S_028C44_BIN_SIZE_X(bin_size.x == 16) |
+ S_028C44_BIN_SIZE_Y(bin_size.y == 16) |
+ S_028C44_BIN_SIZE_X_EXTEND(bin_size_extend.x) |
+ S_028C44_BIN_SIZE_Y_EXTEND(bin_size_extend.y) |
+ S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin) |
+ S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin) |
+ S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+ S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
+ S_028C44_OPTIMAL_BIN_SELECTION(1));
+ radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
+ S_028060_PUNCHOUT_MODE(punchout_mode));
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_state.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_state.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_state.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_state.c 2018-01-18 21:30:28.000000000 +0000
@@ -115,7 +115,7 @@
/* GFX9: Flush DFSM when CB_TARGET_MASK changes.
* I think we don't have to do anything between IBs.
*/
- if (sctx->b.chip_class >= GFX9 &&
+ if (sctx->screen->dfsm_allowed &&
sctx->last_cb_target_mask != cb_target_mask) {
sctx->last_cb_target_mask = cb_target_mask;
@@ -209,10 +209,10 @@
break;
case V_028C70_COLOR_32:
- if (swap == V_0280A0_SWAP_STD &&
+ if (swap == V_028C70_SWAP_STD &&
spi_format == V_028714_SPI_SHADER_32_R)
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
- else if (swap == V_0280A0_SWAP_ALT_REV &&
+ else if (swap == V_028C70_SWAP_ALT_REV &&
spi_format == V_028714_SPI_SHADER_32_AR)
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
break;
@@ -224,8 +224,8 @@
spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
- if (swap == V_0280A0_SWAP_STD ||
- swap == V_0280A0_SWAP_STD_REV)
+ if (swap == V_028C70_SWAP_STD ||
+ swap == V_028C70_SWAP_STD_REV)
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
else
sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
@@ -377,6 +377,48 @@
}
}
+static void si_blend_check_commutativity(struct si_screen *sscreen,
+ struct si_state_blend *blend,
+ enum pipe_blend_func func,
+ enum pipe_blendfactor src,
+ enum pipe_blendfactor dst,
+ unsigned chanmask)
+{
+ /* Src factor is allowed when it does not depend on Dst */
+ static const uint32_t src_allowed =
+ (1u << PIPE_BLENDFACTOR_ONE) |
+ (1u << PIPE_BLENDFACTOR_SRC_COLOR) |
+ (1u << PIPE_BLENDFACTOR_SRC_ALPHA) |
+ (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) |
+ (1u << PIPE_BLENDFACTOR_CONST_COLOR) |
+ (1u << PIPE_BLENDFACTOR_CONST_ALPHA) |
+ (1u << PIPE_BLENDFACTOR_SRC1_COLOR) |
+ (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) |
+ (1u << PIPE_BLENDFACTOR_ZERO) |
+ (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) |
+ (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) |
+ (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) |
+ (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) |
+ (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) |
+ (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
+
+ if (dst == PIPE_BLENDFACTOR_ONE &&
+ (src_allowed & (1u << src))) {
+ /* Addition is commutative, but floating point addition isn't
+ * associative: subtle changes can be introduced via different
+ * rounding.
+ *
+ * Out-of-order is also non-deterministic, which means that
+ * this breaks OpenGL invariance requirements. So only enable
+ * out-of-order additive blending if explicitly allowed by a
+ * setting.
+ */
+ if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN ||
+ (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add))
+ blend->commutative_4bit |= chanmask;
+ }
+}
+
/**
* Get rid of DST in the blend factors by commuting the operands:
* func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
@@ -423,6 +465,7 @@
blend->alpha_to_coverage = state->alpha_to_coverage;
blend->alpha_to_one = state->alpha_to_one;
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
+ blend->logicop_enable = state->logicop_enable;
if (state->logicop_enable) {
color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
@@ -441,6 +484,8 @@
blend->need_src_alpha_4bit |= 0xf;
blend->cb_target_mask = 0;
+ blend->cb_target_enabled_4bit = 0;
+
for (int i = 0; i < 8; i++) {
/* state->rt entries > 0 only written if independent blending */
const int j = state->independent_blend_enable ? i : 0;
@@ -482,12 +527,19 @@
/* cb_render_state will disable unused ones */
blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
+ if (state->rt[j].colormask)
+ blend->cb_target_enabled_4bit |= 0xf << (4 * i);
if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
continue;
}
+ si_blend_check_commutativity(sctx->screen, blend,
+ eqRGB, srcRGB, dstRGB, 0x7 << (4 * i));
+ si_blend_check_commutativity(sctx->screen, blend,
+ eqA, srcA, dstA, 0x8 << (4 * i));
+
/* Blending optimizations for RB+.
* These transformations don't change the behavior.
*
@@ -619,6 +671,21 @@
old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
sctx->do_update_shaders = true;
+
+ if (sctx->screen->dpbb_allowed &&
+ (!old_blend ||
+ old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
+ old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
+ old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit))
+ si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+
+ if (sctx->screen->has_out_of_order_rast &&
+ (!old_blend ||
+ (old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
+ old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
+ old_blend->commutative_4bit != blend->commutative_4bit ||
+ old_blend->logicop_enable != blend->logicop_enable)))
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
@@ -631,8 +698,10 @@
const struct pipe_blend_color *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ static const struct pipe_blend_color zeros;
sctx->blend_color.state = *state;
+ sctx->blend_color.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
}
@@ -653,11 +722,13 @@
{
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_constant_buffer cb;
+ static const struct pipe_clip_state zeros;
if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
return;
sctx->clip_state.state = *state;
+ sctx->clip_state.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
cb.buffer = NULL;
@@ -787,6 +858,7 @@
rs->line_stipple_enable = state->line_stipple_enable;
rs->poly_stipple_enable = state->poly_stipple_enable;
rs->line_smooth = state->line_smooth;
+ rs->line_width = state->line_width;
rs->poly_smooth = state->poly_smooth;
rs->uses_poly_offset = state->offset_point || state->offset_line ||
state->offset_tri;
@@ -826,6 +898,8 @@
psize_min = state->point_size;
psize_max = state->point_size;
}
+ rs->max_point_size = psize_max;
+
/* Divide by two, because 0.5 = 1 pixel. */
si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
@@ -932,12 +1006,24 @@
sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
- r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
-
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_poly_offset_state(sctx);
if (!old_rs ||
+ (old_rs->scissor_enable != rs->scissor_enable ||
+ old_rs->line_width != rs->line_width ||
+ old_rs->max_point_size != rs->max_point_size)) {
+ sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+ }
+
+ if (!old_rs ||
+ old_rs->clip_halfz != rs->clip_halfz) {
+ sctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(sctx, &sctx->viewports.atom);
+ }
+
+ if (!old_rs ||
old_rs->clip_plane_enable != rs->clip_plane_enable ||
old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
si_mark_atom_dirty(sctx, &sctx->clip_regs);
@@ -1036,9 +1122,42 @@
return 0;
}
+static bool si_dsa_writes_stencil(const struct pipe_stencil_state *s)
+{
+ return s->enabled && s->writemask &&
+ (s->fail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zfail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zpass_op != PIPE_STENCIL_OP_KEEP);
+}
+
+static bool si_order_invariant_stencil_op(enum pipe_stencil_op op)
+{
+ /* REPLACE is normally order invariant, except when the stencil
+ * reference value is written by the fragment shader. Tracking this
+ * interaction does not seem worth the effort, so be conservative. */
+ return op != PIPE_STENCIL_OP_INCR &&
+ op != PIPE_STENCIL_OP_DECR &&
+ op != PIPE_STENCIL_OP_REPLACE;
+}
+
+/* Compute whether, assuming Z writes are disabled, this stencil state is order
+ * invariant in the sense that the set of passing fragments as well as the
+ * final stencil buffer result does not depend on the order of fragments. */
+static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state)
+{
+ return !state->enabled || !state->writemask ||
+ /* The following assumes that Z writes are disabled. */
+ (state->func == PIPE_FUNC_ALWAYS &&
+ si_order_invariant_stencil_op(state->zpass_op) &&
+ si_order_invariant_stencil_op(state->zfail_op)) ||
+ (state->func == PIPE_FUNC_NEVER &&
+ si_order_invariant_stencil_op(state->fail_op));
+}
+
static void *si_create_dsa_state(struct pipe_context *ctx,
const struct pipe_depth_stencil_alpha_state *state)
{
+ struct si_context *sctx = (struct si_context *)ctx;
struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
struct si_pm4_state *pm4 = &dsa->pm4;
unsigned db_depth_control;
@@ -1093,6 +1212,52 @@
si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
}
+ dsa->depth_enabled = state->depth.enabled;
+ dsa->depth_write_enabled = state->depth.enabled &&
+ state->depth.writemask;
+ dsa->stencil_enabled = state->stencil[0].enabled;
+ dsa->stencil_write_enabled = state->stencil[0].enabled &&
+ (si_dsa_writes_stencil(&state->stencil[0]) ||
+ si_dsa_writes_stencil(&state->stencil[1]));
+ dsa->db_can_write = dsa->depth_write_enabled ||
+ dsa->stencil_write_enabled;
+
+ bool zfunc_is_ordered =
+ state->depth.func == PIPE_FUNC_NEVER ||
+ state->depth.func == PIPE_FUNC_LESS ||
+ state->depth.func == PIPE_FUNC_LEQUAL ||
+ state->depth.func == PIPE_FUNC_GREATER ||
+ state->depth.func == PIPE_FUNC_GEQUAL;
+
+ bool nozwrite_and_order_invariant_stencil =
+ !dsa->db_can_write ||
+ (!dsa->depth_write_enabled &&
+ si_order_invariant_stencil_state(&state->stencil[0]) &&
+ si_order_invariant_stencil_state(&state->stencil[1]));
+
+ dsa->order_invariance[1].zs =
+ nozwrite_and_order_invariant_stencil ||
+ (!dsa->stencil_write_enabled && zfunc_is_ordered);
+ dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered;
+
+ dsa->order_invariance[1].pass_set =
+ nozwrite_and_order_invariant_stencil ||
+ (!dsa->stencil_write_enabled &&
+ (state->depth.func == PIPE_FUNC_ALWAYS ||
+ state->depth.func == PIPE_FUNC_NEVER));
+ dsa->order_invariance[0].pass_set =
+ !dsa->depth_write_enabled ||
+ (state->depth.func == PIPE_FUNC_ALWAYS ||
+ state->depth.func == PIPE_FUNC_NEVER);
+
+ dsa->order_invariance[1].pass_last =
+ sctx->screen->assume_no_z_fights &&
+ !dsa->stencil_write_enabled &&
+ dsa->depth_write_enabled && zfunc_is_ordered;
+ dsa->order_invariance[0].pass_last =
+ sctx->screen->assume_no_z_fights &&
+ dsa->depth_write_enabled && zfunc_is_ordered;
+
return dsa;
}
@@ -1115,6 +1280,19 @@
if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
sctx->do_update_shaders = true;
+
+ if (sctx->screen->dpbb_allowed &&
+ (!old_dsa ||
+ (old_dsa->depth_enabled != dsa->depth_enabled ||
+ old_dsa->stencil_enabled != dsa->stencil_enabled ||
+ old_dsa->db_can_write != dsa->db_can_write)))
+ si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+
+ if (sctx->screen->has_out_of_order_rast &&
+ (!old_dsa ||
+ memcmp(old_dsa->order_invariance, dsa->order_invariance,
+ sizeof(old_dsa->order_invariance))))
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
@@ -1152,11 +1330,18 @@
}
}
-static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
+static void si_set_occlusion_query_state(struct pipe_context *ctx,
+ bool old_enable,
+ bool old_perfect_enable)
{
struct si_context *sctx = (struct si_context*)ctx;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+
+ bool perfect_enable = sctx->b.num_perfect_occlusion_queries != 0;
+
+ if (perfect_enable != old_perfect_enable)
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
@@ -1253,6 +1438,8 @@
static uint32_t si_translate_colorformat(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
+ if (!desc)
+ return V_028C70_COLOR_INVALID;
#define HAS_SIZE(x,y,z,w) \
(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
@@ -1319,6 +1506,8 @@
}
} else if (HAS_SIZE(5,5,5,1)) {
return V_028C70_COLOR_1_5_5_5;
+ } else if (HAS_SIZE(1,5,5,5)) {
+ return V_028C70_COLOR_5_5_5_1;
} else if (HAS_SIZE(10,10,10,2)) {
return V_028C70_COLOR_2_10_10_10;
}
@@ -1525,10 +1714,6 @@
if (!enable_compressed_formats)
goto out_unknown;
- if (!util_format_s3tc_enabled) {
- goto out_unknown;
- }
-
switch (format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
@@ -1581,6 +1766,12 @@
desc->channel[3].size == 1) {
return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
}
+ if (desc->channel[0].size == 1 &&
+ desc->channel[1].size == 5 &&
+ desc->channel[2].size == 5 &&
+ desc->channel[3].size == 5) {
+ return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
+ }
if (desc->channel[0].size == 10 &&
desc->channel[1].size == 10 &&
desc->channel[2].size == 10 &&
@@ -1757,7 +1948,11 @@
static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
{
- return si_translate_texformat(screen, format, util_format_description(format),
+ const struct util_format_description *desc = util_format_description(format);
+ if (!desc)
+ return false;
+
+ return si_translate_texformat(screen, format, desc,
util_format_get_first_non_void_channel(format)) != ~0U;
}
@@ -1886,6 +2081,8 @@
PIPE_BIND_VERTEX_BUFFER)) == 0);
desc = util_format_description(format);
+ if (!desc)
+ return 0;
/* There are no native 8_8_8 or 16_16_16 data formats, and we currently
* select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
@@ -1915,7 +2112,7 @@
static bool si_is_colorbuffer_format_supported(enum pipe_format format)
{
return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
- r600_translate_colorswap(format, false) != ~0U;
+ si_translate_colorswap(format, false) != ~0U;
}
static bool si_is_zs_format_supported(enum pipe_format format)
@@ -2136,36 +2333,36 @@
unsigned color_info, color_attrib, color_view;
unsigned format, swap, ntype, endian;
const struct util_format_description *desc;
- int i;
+ int firstchan;
unsigned blend_clamp = 0, blend_bypass = 0;
color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
desc = util_format_description(surf->base.format);
- for (i = 0; i < 4; i++) {
- if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ for (firstchan = 0; firstchan < 4; firstchan++) {
+ if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
break;
}
}
- if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
+ if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
ntype = V_028C70_NUMBER_FLOAT;
} else {
ntype = V_028C70_NUMBER_UNORM;
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
ntype = V_028C70_NUMBER_SRGB;
- else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- if (desc->channel[i].pure_integer) {
+ else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
+ if (desc->channel[firstchan].pure_integer) {
ntype = V_028C70_NUMBER_SINT;
} else {
- assert(desc->channel[i].normalized);
+ assert(desc->channel[firstchan].normalized);
ntype = V_028C70_NUMBER_SNORM;
}
- } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
- if (desc->channel[i].pure_integer) {
+ } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ if (desc->channel[firstchan].pure_integer) {
ntype = V_028C70_NUMBER_UINT;
} else {
- assert(desc->channel[i].normalized);
+ assert(desc->channel[firstchan].normalized);
ntype = V_028C70_NUMBER_UNORM;
}
}
@@ -2176,7 +2373,7 @@
R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
}
assert(format != V_028C70_COLOR_INVALID);
- swap = r600_translate_colorswap(surf->base.format, false);
+ swap = si_translate_colorswap(surf->base.format, false);
endian = si_colorformat_endian_swap(format);
/* blend clamp should be set for all NORM/SRGB types */
@@ -2288,7 +2485,7 @@
uint32_t z_info, s_info;
format = si_translate_dbformat(rtex->db_render_format);
- stencil_format = rtex->surface.flags & RADEON_SURF_SBUFFER ?
+ stencil_format = rtex->surface.has_stencil ?
V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
assert(format != V_028040_Z_INVALID);
@@ -2317,8 +2514,7 @@
surf->db_depth_size = S_02801C_X_MAX(rtex->resource.b.b.width0 - 1) |
S_02801C_Y_MAX(rtex->resource.b.b.height0 - 1);
- /* Only use HTILE for the first level. */
- if (rtex->htile_offset && !level) {
+ if (r600_htile_enabled(rtex, level)) {
z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
S_028038_ALLOW_EXPCLEAR(1);
@@ -2334,7 +2530,7 @@
s_info |= S_02803C_ITERATE_FLUSH(1);
}
- if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+ if (rtex->surface.has_stencil) {
/* Stencil buffer workaround ported from the SI-CI-VI code.
* See that for explanation.
*/
@@ -2396,12 +2592,11 @@
surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
levelinfo->nblk_y) / 64 - 1);
- /* Only use HTILE for the first level. */
- if (rtex->htile_offset && !level) {
+ if (r600_htile_enabled(rtex, level)) {
z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
S_028040_ALLOW_EXPCLEAR(1);
- if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+ if (rtex->surface.has_stencil) {
/* Workaround: For a not yet understood reason, the
* combination of MSAA, fast stencil clear and stencil
* decompress messes with subsequent stencil buffer
@@ -2457,24 +2652,20 @@
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
- if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+ if (rtex->surface.has_stencil)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
- if (sctx->framebuffer.compressed_cb_mask) {
- struct pipe_surface *surf;
- struct r600_texture *rtex;
- unsigned mask = sctx->framebuffer.compressed_cb_mask;
- do {
- unsigned i = u_bit_scan(&mask);
- surf = sctx->framebuffer.state.cbufs[i];
- rtex = (struct r600_texture*)surf->texture;
-
- if (rtex->fmask.size)
- rtex->dirty_level_mask |= 1 << surf->u.tex.level;
- if (rtex->dcc_gather_statistics)
- rtex->separate_dcc_dirty = true;
- } while (mask);
+ unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask;
+ while (compressed_cb_mask) {
+ unsigned i = u_bit_scan(&compressed_cb_mask);
+ struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
+ struct r600_texture *rtex = (struct r600_texture*)surf->texture;
+
+ if (rtex->fmask.size)
+ rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+ if (rtex->dcc_gather_statistics)
+ rtex->separate_dcc_dirty = true;
}
}
@@ -2502,6 +2693,11 @@
struct r600_texture *rtex;
bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
unsigned old_nr_samples = sctx->framebuffer.nr_samples;
+ unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
+ bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf;
+ bool old_has_stencil =
+ old_has_zsbuf &&
+ ((struct r600_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
bool unbound = false;
int i;
@@ -2538,7 +2734,7 @@
}
if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
- if (!r600_texture_disable_dcc(&sctx->b, rtex))
+ if (!si_texture_disable_dcc(&sctx->b, rtex))
sctx->b.decompress_dcc(ctx, rtex);
surf->dcc_incompatible = false;
@@ -2562,11 +2758,10 @@
* Only flush and wait for CB if there is actually a bound color buffer.
*/
if (sctx->framebuffer.nr_samples <= 1 &&
- sctx->framebuffer.state.nr_cbufs) {
- sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_FLUSH_AND_INV_CB;
- }
+ sctx->framebuffer.state.nr_cbufs)
+ si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
+ sctx->framebuffer.CB_has_shader_readable_metadata);
+
sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
/* u_blitter doesn't invoke depth decompression when it does multiple
@@ -2576,9 +2771,8 @@
* Note that lower mipmap levels aren't compressed.
*/
if (sctx->generate_mipmap_for_depth) {
- sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_FLUSH_AND_INV_DB;
+ si_make_DB_shader_coherent(sctx, 1, false,
+ sctx->framebuffer.DB_has_shader_readable_metadata);
} else if (sctx->b.chip_class == GFX9) {
/* It appears that DB metadata "leaks" in a sequence of:
* - depth clear
@@ -2611,6 +2805,8 @@
sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
sctx->framebuffer.any_dst_linear = false;
+ sctx->framebuffer.CB_has_shader_readable_metadata = false;
+ sctx->framebuffer.DB_has_shader_readable_metadata = false;
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
@@ -2645,6 +2841,9 @@
if (rtex->surface.is_linear)
sctx->framebuffer.any_dst_linear = true;
+ if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
+ sctx->framebuffer.CB_has_shader_readable_metadata = true;
+
r600_context_add_resource_size(ctx, surf->base.texture);
p_atomic_inc(&rtex->framebuffers_bound);
@@ -2656,13 +2855,19 @@
}
}
+ struct r600_texture *zstex = NULL;
+
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
- rtex = (struct r600_texture*)surf->base.texture;
+ zstex = (struct r600_texture*)surf->base.texture;
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
}
+
+ if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level))
+ sctx->framebuffer.DB_has_shader_readable_metadata = true;
+
r600_context_add_resource_size(ctx, surf->base.texture);
}
@@ -2670,9 +2875,18 @@
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+ if (sctx->screen->dpbb_allowed)
+ si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+
if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
si_mark_atom_dirty(sctx, &sctx->msaa_config);
+ if (sctx->screen->has_out_of_order_rast &&
+ (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||
+ !!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||
+ (zstex && zstex->surface.has_stencil != old_has_stencil)))
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
+
if (sctx->framebuffer.nr_samples != old_nr_samples) {
si_mark_atom_dirty(sctx, &sctx->msaa_config);
si_mark_atom_dirty(sctx, &sctx->db_render_state);
@@ -2680,19 +2894,19 @@
/* Set sample locations as fragment shader constants. */
switch (sctx->framebuffer.nr_samples) {
case 1:
- constbuf.user_buffer = sctx->b.sample_locations_1x;
+ constbuf.user_buffer = sctx->sample_locations_1x;
break;
case 2:
- constbuf.user_buffer = sctx->b.sample_locations_2x;
+ constbuf.user_buffer = sctx->sample_locations_2x;
break;
case 4:
- constbuf.user_buffer = sctx->b.sample_locations_4x;
+ constbuf.user_buffer = sctx->sample_locations_4x;
break;
case 8:
- constbuf.user_buffer = sctx->b.sample_locations_8x;
+ constbuf.user_buffer = sctx->sample_locations_8x;
break;
case 16:
- constbuf.user_buffer = sctx->b.sample_locations_16x;
+ constbuf.user_buffer = sctx->sample_locations_16x;
break;
default:
R600_ERR("Requested an invalid number of samples %i.\n",
@@ -2760,13 +2974,15 @@
/* Compute mutable surface parameters. */
cb_color_base = tex->resource.gpu_address >> 8;
- cb_color_fmask = cb_color_base;
+ cb_color_fmask = 0;
cb_dcc_base = 0;
cb_color_info = cb->cb_color_info | tex->cb_color_info;
cb_color_attrib = cb->cb_color_attrib;
- if (tex->fmask.size)
+ if (tex->fmask.size) {
cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8;
+ cb_color_fmask |= tex->fmask.tile_swizzle;
+ }
/* Set up DCC. */
if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
@@ -2780,6 +2996,7 @@
cb_dcc_base = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
tex->dcc_offset) >> 8;
+ cb_dcc_base |= tex->surface.tile_swizzle;
}
if (sctx->b.chip_class >= GFX9) {
@@ -2792,6 +3009,9 @@
/* Set mutable surface parameters. */
cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
+ cb_color_base |= tex->surface.tile_swizzle;
+ if (!tex->fmask.size)
+ cb_color_fmask = cb_color_base;
cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) |
S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) |
S_028C74_RB_ALIGNED(meta.rb_aligned) |
@@ -2824,6 +3044,12 @@
unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
cb_color_base += level_info->offset >> 8;
+ /* Only macrotiled modes can set tile swizzle. */
+ if (level_info->mode == RADEON_SURF_MODE_2D)
+ cb_color_base |= tex->surface.tile_swizzle;
+
+ if (!tex->fmask.size)
+ cb_color_fmask = cb_color_base;
if (cb_dcc_base)
cb_dcc_base += level_info->dcc_offset >> 8;
@@ -2943,7 +3169,7 @@
radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
- if (sctx->b.chip_class >= GFX9) {
+ if (sctx->screen->dfsm_allowed) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
@@ -2971,10 +3197,9 @@
if (has_msaa_sample_loc_bug)
nr_samples = MAX2(nr_samples, 1);
- if (nr_samples >= 1 &&
- (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
+ if (nr_samples != sctx->msaa_sample_locs.nr_samples) {
sctx->msaa_sample_locs.nr_samples = nr_samples;
- cayman_emit_msaa_sample_locs(cs, nr_samples);
+ si_emit_sample_locations(cs, nr_samples);
}
if (sctx->b.family >= CHIP_POLARIS10) {
@@ -2998,16 +3223,86 @@
}
}
+static bool si_out_of_order_rasterization(struct si_context *sctx)
+{
+ struct si_state_blend *blend = sctx->queued.named.blend;
+ struct si_state_dsa *dsa = sctx->queued.named.dsa;
+
+ if (!sctx->screen->has_out_of_order_rast)
+ return false;
+
+ unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit;
+
+ if (blend) {
+ colormask &= blend->cb_target_enabled_4bit;
+ } else {
+ colormask = 0;
+ }
+
+ /* Conservative: No logic op. */
+ if (colormask && blend->logicop_enable)
+ return false;
+
+ struct si_dsa_order_invariance dsa_order_invariant = {
+ .zs = true, .pass_set = true, .pass_last = false
+ };
+
+ if (sctx->framebuffer.state.zsbuf) {
+ struct r600_texture *zstex =
+ (struct r600_texture*)sctx->framebuffer.state.zsbuf->texture;
+ bool has_stencil = zstex->surface.has_stencil;
+ dsa_order_invariant = dsa->order_invariance[has_stencil];
+ if (!dsa_order_invariant.zs)
+ return false;
+
+ /* The set of PS invocations is always order invariant,
+ * except when early Z/S tests are requested. */
+ if (sctx->ps_shader.cso &&
+ sctx->ps_shader.cso->info.writes_memory &&
+ sctx->ps_shader.cso->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] &&
+ !dsa_order_invariant.pass_set)
+ return false;
+
+ if (sctx->b.num_perfect_occlusion_queries != 0 &&
+ !dsa_order_invariant.pass_set)
+ return false;
+ }
+
+ if (!colormask)
+ return true;
+
+ unsigned blendmask = colormask & blend->blend_enable_4bit;
+
+ if (blendmask) {
+ /* Only commutative blending. */
+ if (blendmask & ~blend->commutative_4bit)
+ return false;
+
+ if (!dsa_order_invariant.pass_set)
+ return false;
+ }
+
+ if (colormask & ~blendmask) {
+ if (!dsa_order_invariant.pass_last)
+ return false;
+ }
+
+ return true;
+}
+
static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes;
/* 33% faster rendering to linear color buffers */
bool dst_is_linear = sctx->framebuffer.any_dst_linear;
+ bool out_of_order_rast = si_out_of_order_rasterization(sctx);
unsigned sc_mode_cntl_1 =
S_028A4C_WALK_SIZE(dst_is_linear) |
S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
+ S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
+ S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
/* always 1: */
S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
@@ -3016,13 +3311,71 @@
S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
S_028A4C_FORCE_EOV_REZ_ENABLE(1);
- cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
- sctx->ps_iter_samples,
- sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
- sc_mode_cntl_1);
+ int setup_samples = sctx->framebuffer.nr_samples > 1 ? sctx->framebuffer.nr_samples :
+ sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0;
+
+ /* Required by OpenGL line rasterization.
+ *
+ * TODO: We should also enable perpendicular endcaps for AA lines,
+ * but that requires implementing line stippling in the pixel
+ * shader. SC can only do line stippling with axis-aligned
+ * endcaps.
+ */
+ unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
+
+ if (setup_samples > 1) {
+ /* distance from the pixel center, indexed by log2(nr_samples) */
+ static unsigned max_dist[] = {
+ 0, /* unused */
+ 4, /* 2x MSAA */
+ 6, /* 4x MSAA */
+ 7, /* 8x MSAA */
+ 8, /* 16x MSAA */
+ };
+ unsigned log_samples = util_logbase2(setup_samples);
+ unsigned log_ps_iter_samples =
+ util_logbase2(util_next_power_of_two(sctx->ps_iter_samples));
+
+ radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_emit(cs, sc_line_cntl |
+ S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+ radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
+ S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
+ S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+
+ if (sctx->framebuffer.nr_samples > 1) {
+ radeon_set_context_reg(cs, R_028804_DB_EQAA,
+ S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
+ S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
+ S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
+ S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+ radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
+ S_028A4C_PS_ITER_SAMPLE(sctx->ps_iter_samples > 1) |
+ sc_mode_cntl_1);
+ } else if (sctx->smoothing_enabled) {
+ radeon_set_context_reg(cs, R_028804_DB_EQAA,
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
+ S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
+ radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
+ sc_mode_cntl_1);
+ }
+ } else {
+ radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
+ radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+ radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+
+ radeon_set_context_reg(cs, R_028804_DB_EQAA,
+ S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+ S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+ radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
+ sc_mode_cntl_1);
+ }
/* GFX9: Flush DFSM when the AA mode changes. */
- if (sctx->b.chip_class >= GFX9) {
+ if (sctx->screen->dfsm_allowed) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
}
@@ -3040,6 +3393,8 @@
if (sctx->framebuffer.nr_samples > 1)
si_mark_atom_dirty(sctx, &sctx->msaa_config);
+ if (sctx->screen->dpbb_allowed)
+ si_mark_atom_dirty(sctx, &sctx->dpbb_state);
}
/*
@@ -3347,7 +3702,7 @@
}
if (tex->dcc_offset) {
- unsigned swap = r600_translate_colorswap(pipe_format, false);
+ unsigned swap = si_translate_colorswap(pipe_format, false);
state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
} else {
@@ -3400,7 +3755,7 @@
num_format = V_008F14_IMG_NUM_FORMAT_UINT;
}
- fmask_state[0] = va >> 8;
+ fmask_state[0] = (va >> 8) | tex->fmask.tile_swizzle;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
S_008F14_DATA_FORMAT_GFX6(data_format) |
S_008F14_NUM_FORMAT_GFX6(num_format);
@@ -3523,7 +3878,7 @@
/* Depth/stencil texturing sometimes needs separate texture. */
if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
if (!tmp->flushed_depth_texture &&
- !r600_init_flushed_depth_texture(ctx, texture, NULL)) {
+ !si_init_flushed_depth_texture(ctx, texture, NULL)) {
pipe_resource_reference(&view->base.texture, NULL);
FREE(view);
return NULL;
@@ -3579,6 +3934,12 @@
width, height, depth,
view->state, view->fmask_state);
+ unsigned num_format = G_008F14_NUM_FORMAT_GFX6(view->state[1]);
+ view->is_integer =
+ num_format == V_008F14_IMG_NUM_FORMAT_USCALED ||
+ num_format == V_008F14_IMG_NUM_FORMAT_SSCALED ||
+ num_format == V_008F14_IMG_NUM_FORMAT_UINT ||
+ num_format == V_008F14_IMG_NUM_FORMAT_SINT;
view->base_level_info = &surflevel[base_level];
view->base_level = base_level;
view->block_width = util_format_get_blockwidth(pipe_format);
@@ -3615,24 +3976,36 @@
static uint32_t si_translate_border_color(struct si_context *sctx,
const struct pipe_sampler_state *state,
- const union pipe_color_union *color)
+ const union pipe_color_union *color,
+ bool is_integer)
{
bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
- if ((color->f[0] == 0 && color->f[1] == 0 &&
- color->f[2] == 0 && color->f[3] == 0) ||
- (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) &&
- !wrap_mode_uses_border_color(state->wrap_t, linear_filter) &&
- !wrap_mode_uses_border_color(state->wrap_r, linear_filter)))
+ if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) &&
+ !wrap_mode_uses_border_color(state->wrap_t, linear_filter) &&
+ !wrap_mode_uses_border_color(state->wrap_r, linear_filter))
return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
- if (color->f[0] == 0 && color->f[1] == 0 &&
- color->f[2] == 0 && color->f[3] == 1)
- return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK);
- if (color->f[0] == 1 && color->f[1] == 1 &&
- color->f[2] == 1 && color->f[3] == 1)
- return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE);
+#define simple_border_types(elt) \
+do { \
+ if (color->elt[0] == 0 && color->elt[1] == 0 && \
+ color->elt[2] == 0 && color->elt[3] == 0) \
+ return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \
+ if (color->elt[0] == 0 && color->elt[1] == 0 && \
+ color->elt[2] == 0 && color->elt[3] == 1) \
+ return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \
+ if (color->elt[0] == 1 && color->elt[1] == 1 && \
+ color->elt[2] == 1 && color->elt[3] == 1) \
+ return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \
+} while (false)
+
+ if (is_integer)
+ simple_border_types(ui);
+ else
+ simple_border_types(f);
+
+#undef simple_border_types
int i;
@@ -3702,7 +4075,11 @@
S_008F38_DISABLE_LSB_CEIL(sctx->b.chip_class <= VI) |
S_008F38_FILTER_PREC_FIX(1) |
S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
- rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color);
+ rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false);
+
+ /* Create sampler resource for integer textures. */
+ memcpy(rstate->integer_val, rstate->val, sizeof(rstate->val));
+ rstate->integer_val[3] = si_translate_border_color(sctx, state, &state->border_color, true);
/* Create sampler resource for upgraded depth textures. */
memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));
@@ -3717,7 +4094,7 @@
rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);
else
rstate->upgraded_depth_val[3] =
- si_translate_border_color(sctx, state, &clamped_border_color) |
+ si_translate_border_color(sctx, state, &clamped_border_color, false) |
S_008F3C_UPGRADED_DEPTH(1);
return rstate;
@@ -4018,11 +4395,9 @@
/* Multisample surfaces are flushed in si_decompress_textures. */
if (sctx->framebuffer.nr_samples <= 1 &&
- sctx->framebuffer.state.nr_cbufs) {
- sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_FLUSH_AND_INV_CB;
- }
+ sctx->framebuffer.state.nr_cbufs)
+ si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
+ sctx->framebuffer.CB_has_shader_readable_metadata);
}
/* This only ensures coherency for shader image/buffer stores. */
@@ -4065,8 +4440,10 @@
if (flags & PIPE_BARRIER_FRAMEBUFFER &&
sctx->framebuffer.nr_samples <= 1 &&
sctx->framebuffer.state.nr_cbufs) {
- sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+
+ if (sctx->b.chip_class <= VI)
+ sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
@@ -4096,14 +4473,15 @@
void si_init_state_functions(struct si_context *sctx)
{
si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
- si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
- si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
- si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
- si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
+ si_init_external_atom(sctx, &sctx->streamout.begin_atom, &sctx->atoms.s.streamout_begin);
+ si_init_external_atom(sctx, &sctx->streamout.enable_atom, &sctx->atoms.s.streamout_enable);
+ si_init_external_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors);
+ si_init_external_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports);
si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
+ si_init_atom(sctx, &sctx->dpbb_state, &sctx->atoms.s.dpbb_state, si_emit_dpbb_state);
si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
@@ -4135,7 +4513,6 @@
sctx->b.b.set_stencil_ref = si_set_stencil_ref;
sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
- sctx->b.b.get_sample_position = cayman_get_sample_position;
sctx->b.b.create_sampler_state = si_create_sampler_state;
sctx->b.b.delete_sampler_state = si_delete_sampler_state;
@@ -4247,7 +4624,7 @@
/* Return if DCC is enabled. The texture should be set up with it
* already.
*/
- if (md->size_metadata >= 11 * 4 &&
+ if (md->size_metadata >= 10 * 4 && /* at least 2(header) + 8(desc) dwords */
md->metadata[0] != 0 &&
md->metadata[1] == si_get_bo_metadata_word1(rscreen) &&
G_008F28_COMPRESSION_EN(desc[6])) {
@@ -4268,6 +4645,25 @@
sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata;
}
+static void si_set_grbm_gfx_index(struct si_context *sctx,
+ struct si_pm4_state *pm4, unsigned value)
+{
+ unsigned reg = sctx->b.chip_class >= CIK ? R_030800_GRBM_GFX_INDEX :
+ GRBM_GFX_INDEX;
+ si_pm4_set_reg(pm4, reg, value);
+}
+
+static void si_set_grbm_gfx_index_se(struct si_context *sctx,
+ struct si_pm4_state *pm4, unsigned se)
+{
+ assert(se == ~0 || se < sctx->screen->b.info.max_se);
+ si_set_grbm_gfx_index(sctx, pm4,
+ (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) :
+ S_030800_SE_INDEX(se)) |
+ S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
+}
+
static void
si_write_harvested_raster_configs(struct si_context *sctx,
struct si_pm4_state *pm4,
@@ -4370,28 +4766,12 @@
}
}
- /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
- if (sctx->b.chip_class < CIK)
- si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
- SE_INDEX(se) | SH_BROADCAST_WRITES |
- INSTANCE_BROADCAST_WRITES);
- else
- si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
+ si_set_grbm_gfx_index_se(sctx, pm4, se);
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
}
+ si_set_grbm_gfx_index(sctx, pm4, ~0);
- /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
- if (sctx->b.chip_class < CIK)
- si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
- SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
- INSTANCE_BROADCAST_WRITES);
- else {
- si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
-
+ if (sctx->b.chip_class >= CIK) {
if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
(!se_mask[2] && !se_mask[3]))) {
raster_config_1 &= C_028354_SE_PAIR_MAP;
@@ -4409,51 +4789,14 @@
}
}
-static void si_init_config(struct si_context *sctx)
+static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4)
{
struct si_screen *sscreen = sctx->screen;
unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
unsigned raster_config, raster_config_1;
- uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-
- if (!pm4)
- return;
- si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
- si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
- si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
- si_pm4_cmd_end(pm4, false);
-
- si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
- si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
-
- /* FIXME calculate these values somehow ??? */
- if (sctx->b.chip_class <= VI) {
- si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
- si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
- }
- si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
-
- si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
- si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
-
- si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
- si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
- if (sctx->b.chip_class >= GFX9)
- si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
- si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
- if (sctx->b.chip_class < CIK)
- si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
- S_008A14_CLIP_VTX_REORDER_ENA(1));
-
- si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
- si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
-
- si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
-
- switch (sctx->screen->b.family) {
+ switch (sctx->b.family) {
case CHIP_TAHITI:
case CHIP_PITCAIRN:
raster_config = 0x2a00126a;
@@ -4525,61 +4868,125 @@
raster_config_1 = 0x00000000;
break;
default:
- if (sctx->b.chip_class <= VI) {
- fprintf(stderr,
- "radeonsi: Unknown GPU, using 0 for raster_config\n");
- raster_config = 0x00000000;
- raster_config_1 = 0x00000000;
- }
- break;
+ fprintf(stderr,
+ "radeonsi: Unknown GPU, using 0 for raster_config\n");
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
}
+ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+ /* Always use the default config when all backends are enabled
+ * (or when we failed to determine the enabled backends).
+ */
+ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
+ raster_config);
+ if (sctx->b.chip_class >= CIK)
+ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
+ raster_config_1);
+ } else {
+ si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
+ }
+}
+
+static void si_init_config(struct si_context *sctx)
+{
+ struct si_screen *sscreen = sctx->screen;
+ uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
+ bool has_clear_state = sscreen->has_clear_state;
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+
+ /* Only SI can disable CLEAR_STATE for now. */
+ assert(has_clear_state || sscreen->b.chip_class == SI);
+
+ if (!pm4)
+ return;
+
+ si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
+ si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
+ si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
+ si_pm4_cmd_end(pm4, false);
+
+ if (has_clear_state) {
+ si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE);
+ si_pm4_cmd_add(pm4, 0);
+ si_pm4_cmd_end(pm4, false);
+ }
+
+ if (sctx->b.chip_class <= VI)
+ si_set_raster_config(sctx, pm4);
+
+ si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+ if (!has_clear_state)
+ si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
+
+ /* FIXME calculate these values somehow ??? */
if (sctx->b.chip_class <= VI) {
- if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
- /* Always use the default config when all backends are enabled
- * (or when we failed to determine the enabled backends).
- */
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
- raster_config);
- if (sctx->b.chip_class >= CIK)
- si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
- raster_config_1);
- } else {
- si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
- }
+ si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
+ si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
+ }
+
+ if (!has_clear_state) {
+ si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
+ si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
+ si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
}
- si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
- si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
- si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
- S_028244_BR_X(16384) | S_028244_BR_Y(16384));
- si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
- si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
- S_028034_BR_X(16384) | S_028034_BR_Y(16384));
-
- si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
- si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
- S_028230_ER_TRI(0xA) |
- S_028230_ER_POINT(0xA) |
- S_028230_ER_RECT(0xA) |
- /* Required by DX10_DIAMOND_TEST_ENA: */
- S_028230_ER_LINE_LR(0x1A) |
- S_028230_ER_LINE_RL(0x26) |
- S_028230_ER_LINE_TB(0xA) |
- S_028230_ER_LINE_BT(0xA));
- /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
- si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
- si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
- si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
- si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
- si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
- si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
+ si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
+ if (!has_clear_state)
+ si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
+ if (sctx->b.chip_class < CIK)
+ si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
+ S_008A14_CLIP_VTX_REORDER_ENA(1));
+
+ si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
+ si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
+
+ if (!has_clear_state)
+ si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
+
+ /* CLEAR_STATE doesn't clear these correctly on certain generations.
+ * I don't know why. Deduced by trial and error.
+ */
+ if (sctx->b.chip_class <= CIK) {
+ si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+ si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
+ S_028244_BR_X(16384) | S_028244_BR_Y(16384));
+ si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
+ si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
+ S_028034_BR_X(16384) | S_028034_BR_Y(16384));
+ }
+
+ if (!has_clear_state) {
+ si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
+ si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
+ S_028230_ER_TRI(0xA) |
+ S_028230_ER_POINT(0xA) |
+ S_028230_ER_RECT(0xA) |
+ /* Required by DX10_DIAMOND_TEST_ENA: */
+ S_028230_ER_LINE_LR(0x1A) |
+ S_028230_ER_LINE_RL(0x26) |
+ S_028230_ER_LINE_TB(0xA) |
+ S_028230_ER_LINE_BT(0xA));
+ /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
+ si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+ si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
+ si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
+ si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
+ si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+ si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
+ }
if (sctx->b.chip_class >= GFX9) {
si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
} else {
+ /* These registers, when written, also overwrite the CLEAR_STATE
+ * context, so we can't rely on CLEAR_STATE setting them.
+ * It would be an issue if there was another UMD changing them.
+ */
si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
@@ -4587,11 +4994,15 @@
if (sctx->b.chip_class >= CIK) {
if (sctx->b.chip_class >= GFX9) {
- si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
} else {
- si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
- si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+ S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+ si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_WAVE_LIMIT(0x3F));
+ si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+ S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
@@ -4601,29 +5012,45 @@
S_028A44_ES_VERTS_PER_SUBGRP(64) |
S_028A44_GS_PRIMS_PER_SUBGRP(4));
}
- si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+ S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
- if (sscreen->b.info.num_good_compute_units /
- (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
+ /* Compute LATE_ALLOC_VS.LIMIT. */
+ unsigned num_cu_per_sh = sscreen->b.info.num_good_compute_units /
+ (sscreen->b.info.max_se *
+ sscreen->b.info.max_sh_per_se);
+ unsigned late_alloc_limit; /* The limit is per SH. */
+
+ if (sctx->b.family == CHIP_KABINI) {
+ late_alloc_limit = 0; /* Potential hang on Kabini. */
+ } else if (num_cu_per_sh <= 4) {
/* Too few available compute units per SH. Disallowing
- * VS to run on CU0 could hurt us more than late VS
+ * VS to run on one CU could hurt us more than late VS
* allocation would help.
*
- * LATE_ALLOC_VS = 2 is the highest safe number.
+ * 2 is the highest safe number that allows us to keep
+ * all CUs enabled.
*/
- si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
+ late_alloc_limit = 2;
} else {
- /* Set LATE_ALLOC_VS == 31. It should be less than
- * the number of scratch waves. Limitations:
- * - VS can't execute on CU0.
- * - If HS writes outputs to LDS, LS can't execute on CU0.
+ /* This is a good initial value, allowing 1 late_alloc
+ * wave per SIMD on num_cu - 2.
*/
- si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
- si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
+ late_alloc_limit = (num_cu_per_sh - 2) * 4;
+
+ /* The limit is 0-based, so 0 means 1. */
+ assert(late_alloc_limit > 0 && late_alloc_limit <= 64);
+ late_alloc_limit -= 1;
}
- si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
+ /* VS can't execute on one CU if the limit is > 2. */
+ si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+ S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 0xffff) |
+ S_00B118_WAVE_LIMIT(0x3F));
+ si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
+ S_00B11C_LIMIT(late_alloc_limit));
+ si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+ S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
}
if (sctx->b.chip_class >= VI) {
@@ -4632,9 +5059,6 @@
si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
S_028424_OVERWRITE_COMBINER_WATERMARK(4));
- if (sctx->b.family < CHIP_POLARIS10)
- si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
- si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
vgt_tess_distribution =
S_028B50_ACCUM_ISOLINE(32) |
@@ -4650,14 +5074,11 @@
vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
- } else {
+ } else if (!has_clear_state) {
si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
}
- if (sctx->screen->b.has_rbplus)
- si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
-
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
@@ -4679,16 +5100,6 @@
assert(0);
}
- si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL,
- S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
- si_pm4_set_reg(pm4, R_028064_DB_RENDER_FILTER, 0);
- /* TODO: We can use this to disable RBs for rendering to GART: */
- si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0);
- si_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
- /* TODO: Enable the binner: */
- si_pm4_set_reg(pm4, R_028C44_PA_SC_BINNER_CNTL_0,
- S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
- S_028C44_DISABLE_START_OF_PRIM(1));
si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
S_028C48_MAX_PRIM_PER_BATCH(1023));
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_draw.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_draw.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_draw.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_draw.c 2018-01-18 21:30:28.000000000 +0000
@@ -30,6 +30,7 @@
#include "gfx9d.h"
#include "util/u_index_modify.h"
+#include "util/u_log.h"
#include "util/u_upload_mgr.h"
#include "util/u_prim.h"
@@ -105,7 +106,7 @@
unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
bool has_primid_instancing_bug = sctx->b.chip_class == SI &&
sctx->b.screen->info.max_se == 1;
- unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
+ unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL];
unsigned num_tcs_input_cp = info->vertices_per_patch;
unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
unsigned num_tcs_patch_outputs;
@@ -194,11 +195,7 @@
*/
*num_patches = MIN2(*num_patches, 40);
- if (sctx->b.chip_class == SI ||
- /* TODO: fix GFX9 where a threadgroup contains more than 1 wave and
- * LS vertices per patch > HS vertices per patch. Piglit: 16in-1out */
- (sctx->b.chip_class == GFX9 &&
- num_tcs_input_cp > num_tcs_output_cp)) {
+ if (sctx->b.chip_class == SI) {
/* SI bug workaround, related to power management. Limit LS-HS
* threadgroups to only one wave.
*/
@@ -216,7 +213,7 @@
* doesn't work correctly on SI when there is no other
* SE to switch to.
*/
- if (has_primid_instancing_bug)
+ if (has_primid_instancing_bug && tess_uses_primid)
*num_patches = 1;
sctx->last_num_patches = *num_patches;
@@ -236,8 +233,7 @@
tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
S_VS_STATE_LS_OUT_VERTEX_SIZE(input_vertex_size / 4);
- tcs_out_layout = (output_patch_size / 4) |
- ((output_vertex_size / 4) << 13);
+ tcs_out_layout = output_patch_size / 4;
tcs_out_offsets = (output_patch0_offset / 16) |
((perpatch_output_offset / 16) << 16);
offchip_layout = *num_patches |
@@ -372,7 +368,7 @@
/* This is a hardware requirement. */
if (key->u.line_stipple_enabled ||
- (sscreen->b.debug_flags & DBG_SWITCH_ON_EOP)) {
+ (sscreen->b.debug_flags & DBG(SWITCH_ON_EOP))) {
ia_switch_on_eop = true;
wd_switch_on_eop = true;
}
@@ -535,7 +531,7 @@
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- enum pipe_prim_type rast_prim = sctx->b.current_rast_prim;
+ enum pipe_prim_type rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer;
/* Skip this if not rendering lines. */
@@ -567,11 +563,17 @@
sctx->current_vs_state &= C_VS_STATE_INDEXED;
sctx->current_vs_state |= S_VS_STATE_INDEXED(!!info->index_size);
+ if (sctx->num_vs_blit_sgprs) {
+ /* Re-emit the state after we leave u_blitter. */
+ sctx->last_vs_state = ~0;
+ return;
+ }
+
if (sctx->current_vs_state != sctx->last_vs_state) {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
radeon_set_sh_reg(cs,
- sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX] +
+ sctx->shader_pointers.sh_base[PIPE_SHADER_VERTEX] +
SI_SGPR_VS_STATE_BITS * 4,
sctx->current_vs_state);
@@ -585,7 +587,7 @@
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned prim = si_conv_pipe_prim(info->mode);
- unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->b.current_rast_prim);
+ unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
unsigned ia_multi_vgt_param;
ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
@@ -644,14 +646,14 @@
{
struct pipe_draw_indirect_info *indirect = info->indirect;
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
- unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
+ unsigned sh_base_reg = sctx->shader_pointers.sh_base[PIPE_SHADER_VERTEX];
bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
uint32_t index_max_size = 0;
uint64_t index_va = 0;
if (info->count_from_stream_output) {
- struct r600_so_target *t =
- (struct r600_so_target*)info->count_from_stream_output;
+ struct si_streamout_target *t =
+ (struct si_streamout_target*)info->count_from_stream_output;
uint64_t va = t->buf_filled_size->gpu_address +
t->buf_filled_size_offset;
@@ -799,11 +801,20 @@
/* Base vertex and start instance. */
base_vertex = index_size ? info->index_bias : info->start;
- if (base_vertex != sctx->last_base_vertex ||
- sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
- info->start_instance != sctx->last_start_instance ||
- info->drawid != sctx->last_drawid ||
- sh_base_reg != sctx->last_sh_base_reg) {
+ if (sctx->num_vs_blit_sgprs) {
+ /* Re-emit draw constants after we leave u_blitter. */
+ si_invalidate_draw_sh_constants(sctx);
+
+ /* Blit VS doesn't use BASE_VERTEX, START_INSTANCE, and DRAWID. */
+ radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_VS_BLIT_DATA * 4,
+ sctx->num_vs_blit_sgprs);
+ radeon_emit_array(cs, sctx->vs_blit_sh_data,
+ sctx->num_vs_blit_sgprs);
+ } else if (base_vertex != sctx->last_base_vertex ||
+ sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN ||
+ info->start_instance != sctx->last_start_instance ||
+ info->drawid != sctx->last_drawid ||
+ sh_base_reg != sctx->last_sh_base_reg) {
radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 3);
radeon_emit(cs, base_vertex);
radeon_emit(cs, info->start_instance);
@@ -897,8 +908,9 @@
/* Necessary for DCC */
if (rctx->chip_class == VI)
- r600_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
- 0, 0, NULL, 0, 0, 0);
+ si_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
+ 0, EOP_DATA_SEL_DISCARD, NULL,
+ 0, 0, R600_NOT_QUERY);
}
if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB)
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
@@ -975,17 +987,30 @@
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
}
- /* TC | TC_WB = invalidate L2 data
- * TC_MD | TC_WB = invalidate L2 metadata (DCC, etc.)
- * TC | TC_WB | TC_MD = invalidate L2 data & metadata
+ /* These are the only allowed combinations. If you need to
+ * do multiple operations at once, do them separately.
+ * All operations that invalidate L2 also seem to invalidate
+ * metadata. Volatile (VOL) and WC flushes are not listed here.
+ *
+ * TC | TC_WB = writeback & invalidate L2 & L1
+ * TC | TC_WB | TC_NC = writeback & invalidate L2 for MTYPE == NC
+ * TC_WB | TC_NC = writeback L2 for MTYPE == NC
+ * TC | TC_NC = invalidate L2 for MTYPE == NC
+ * TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.)
+ * TCL1 = invalidate L1
*/
tc_flags = 0;
+ if (rctx->flags & SI_CONTEXT_INV_L2_METADATA) {
+ tc_flags = EVENT_TC_ACTION_ENA |
+ EVENT_TC_MD_ACTION_ENA;
+ }
+
/* Ideally flush TC together with CB/DB. */
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {
- tc_flags |= EVENT_TC_ACTION_ENA |
- EVENT_TC_WB_ACTION_ENA |
- EVENT_TCL1_ACTION_ENA;
+ /* Writeback and invalidate everything in L2 & L1. */
+ tc_flags = EVENT_TC_ACTION_ENA |
+ EVENT_TC_WB_ACTION_ENA;
/* Clear the flags. */
rctx->flags &= ~(SI_CONTEXT_INV_GLOBAL_L2 |
@@ -998,11 +1023,11 @@
va = sctx->wait_mem_scratch->gpu_address;
sctx->wait_mem_number++;
- r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1,
+ si_gfx_write_event_eop(rctx, cb_db_event, tc_flags,
+ EOP_DATA_SEL_VALUE_32BIT,
sctx->wait_mem_scratch, va,
- sctx->wait_mem_number - 1,
- sctx->wait_mem_number);
- r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
+ sctx->wait_mem_number, R600_NOT_QUERY);
+ si_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
}
/* Make sure ME is idle (it executes most packets) before continuing.
@@ -1146,25 +1171,40 @@
}
}
-void si_ce_pre_draw_synchronization(struct si_context *sctx)
+static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info,
+ unsigned skip_atom_mask)
{
- if (sctx->ce_need_synchronization) {
- radeon_emit(sctx->ce_ib, PKT3(PKT3_INCREMENT_CE_COUNTER, 0, 0));
- radeon_emit(sctx->ce_ib, 1);
+ /* Emit state atoms. */
+ unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
+ while (mask) {
+ struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
- radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_WAIT_ON_CE_COUNTER, 0, 0));
- radeon_emit(sctx->b.gfx.cs, 1);
+ atom->emit(&sctx->b, atom);
}
-}
+ sctx->dirty_atoms &= skip_atom_mask;
-void si_ce_post_draw_synchronization(struct si_context *sctx)
-{
- if (sctx->ce_need_synchronization) {
- radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_INCREMENT_DE_COUNTER, 0, 0));
- radeon_emit(sctx->b.gfx.cs, 0);
+ /* Emit states. */
+ mask = sctx->dirty_states;
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+ struct si_pm4_state *state = sctx->queued.array[i];
- sctx->ce_need_synchronization = false;
+ if (!state || sctx->emitted.array[i] == state)
+ continue;
+
+ si_pm4_emit(sctx, state);
+ sctx->emitted.array[i] = state;
}
+ sctx->dirty_states = 0;
+
+ /* Emit draw states. */
+ unsigned num_patches = 0;
+
+ si_emit_rasterizer_prim_state(sctx);
+ if (sctx->tes_shader.cso)
+ si_emit_derived_tess_state(sctx, info, &num_patches);
+ si_emit_vs_state(sctx, info);
+ si_emit_draw_registers(sctx, info, num_patches);
}
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
@@ -1172,9 +1212,8 @@
struct si_context *sctx = (struct si_context *)ctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct pipe_resource *indexbuf = info->index.resource;
- unsigned mask, dirty_tex_counter;
+ unsigned dirty_tex_counter;
enum pipe_prim_type rast_prim;
- unsigned num_patches = 0;
unsigned index_size = info->index_size;
unsigned index_offset = info->indirect ? info->start * index_size : 0;
@@ -1216,7 +1255,7 @@
si_update_all_texture_descriptors(sctx);
}
- si_decompress_graphics_textures(sctx);
+ si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
/* Set the rasterization primitive type.
*
@@ -1233,18 +1272,39 @@
} else
rast_prim = info->mode;
- if (rast_prim != sctx->b.current_rast_prim) {
- bool old_is_poly = sctx->b.current_rast_prim >= PIPE_PRIM_TRIANGLES;
+ if (rast_prim != sctx->current_rast_prim) {
+ bool old_is_poly = sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES;
bool new_is_poly = rast_prim >= PIPE_PRIM_TRIANGLES;
if (old_is_poly != new_is_poly) {
- sctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
- si_set_atom_dirty(sctx, &sctx->b.scissors.atom, true);
+ sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(sctx, &sctx->scissors.atom);
}
- sctx->b.current_rast_prim = rast_prim;
+ sctx->current_rast_prim = rast_prim;
sctx->do_update_shaders = true;
}
+ if (sctx->tes_shader.cso &&
+ (sctx->b.family == CHIP_VEGA10 || sctx->b.family == CHIP_RAVEN)) {
+ /* Determine whether the LS VGPR fix should be applied.
+ *
+ * It is only required when num input CPs > num output CPs,
+ * which cannot happen with the fixed function TCS. We should
+ * also update this bit when switching from TCS to fixed
+ * function TCS.
+ */
+ struct si_shader_selector *tcs = sctx->tcs_shader.cso;
+ bool ls_vgpr_fix =
+ tcs &&
+ info->vertices_per_patch >
+ tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+
+ if (ls_vgpr_fix != sctx->ls_vgpr_fix) {
+ sctx->ls_vgpr_fix = ls_vgpr_fix;
+ sctx->do_update_shaders = true;
+ }
+ }
+
if (sctx->gs_shader.cso) {
/* Determine whether the GS triangle strip adjacency fix should
* be applied. Rotate every other triangle if
@@ -1266,9 +1326,6 @@
if (sctx->do_update_shaders && !si_update_shaders(sctx))
return;
- if (!si_upload_graphics_shader_descriptors(sctx))
- return;
-
if (index_size) {
/* Translate or upload, if needed. */
/* 8-bit indices are supported on VI. */
@@ -1351,84 +1408,164 @@
if (!si_upload_vertex_buffer_descriptors(sctx))
return;
- /* GFX9 scissor bug workaround. There is also a more efficient but
- * more involved alternative workaround. */
+ /* GFX9 scissor bug workaround. This must be done before VPORT scissor
+ * registers are changed. There is also a more efficient but more
+ * involved alternative workaround.
+ */
if (sctx->b.chip_class == GFX9 &&
- si_is_atom_dirty(sctx, &sctx->b.scissors.atom))
+ si_is_atom_dirty(sctx, &sctx->scissors.atom)) {
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+ si_emit_cache_flush(sctx);
+ }
+
+ /* Use optimal packet order based on whether we need to sync the pipeline. */
+ if (unlikely(sctx->b.flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
+ SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_PS_PARTIAL_FLUSH |
+ SI_CONTEXT_CS_PARTIAL_FLUSH))) {
+ /* If we have to wait for idle, set all states first, so that all
+ * SET packets are processed in parallel with previous draw calls.
+ * Then upload descriptors, set shader pointers, and draw, and
+ * prefetch at the end. This ensures that the time the CUs
+ * are idle is very short. (there are only SET_SH packets between
+ * the wait and the draw)
+ */
+ struct r600_atom *shader_pointers = &sctx->shader_pointers.atom;
+ unsigned masked_atoms = 1u << shader_pointers->id;
+
+ if (unlikely(sctx->b.flags & R600_CONTEXT_FLUSH_FOR_RENDER_COND))
+ masked_atoms |= 1u << sctx->b.render_cond_atom.id;
- /* Flush caches before the first state atom, which does L2 prefetches. */
- if (sctx->b.flags)
+ /* Emit all states except shader pointers and render condition. */
+ si_emit_all_states(sctx, info, masked_atoms);
si_emit_cache_flush(sctx);
- /* Emit state atoms. */
- mask = sctx->dirty_atoms;
- while (mask) {
- struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
+ /* <-- CUs are idle here. */
+ if (!si_upload_graphics_shader_descriptors(sctx))
+ return;
- atom->emit(&sctx->b, atom);
- }
- sctx->dirty_atoms = 0;
+ /* Set shader pointers after descriptors are uploaded. */
+ if (si_is_atom_dirty(sctx, shader_pointers))
+ shader_pointers->emit(&sctx->b, NULL);
+ if (si_is_atom_dirty(sctx, &sctx->b.render_cond_atom))
+ sctx->b.render_cond_atom.emit(&sctx->b, NULL);
+ sctx->dirty_atoms = 0;
- /* Emit states. */
- mask = sctx->dirty_states;
- while (mask) {
- unsigned i = u_bit_scan(&mask);
- struct si_pm4_state *state = sctx->queued.array[i];
+ si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
+ /* <-- CUs are busy here. */
- if (!state || sctx->emitted.array[i] == state)
- continue;
+ /* Start prefetches after the draw has been started. Both will run
+ * in parallel, but starting the draw first is more important.
+ */
+ if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
+ cik_emit_prefetch_L2(sctx);
+ } else {
+ /* If we don't wait for idle, start prefetches first, then set
+ * states, and draw at the end.
+ */
+ if (sctx->b.flags)
+ si_emit_cache_flush(sctx);
- si_pm4_emit(sctx, state);
- sctx->emitted.array[i] = state;
- }
- sctx->dirty_states = 0;
+ if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
+ cik_emit_prefetch_L2(sctx);
- si_emit_rasterizer_prim_state(sctx);
- if (sctx->tes_shader.cso)
- si_emit_derived_tess_state(sctx, info, &num_patches);
- si_emit_vs_state(sctx, info);
- si_emit_draw_registers(sctx, info, num_patches);
+ if (!si_upload_graphics_shader_descriptors(sctx))
+ return;
- si_ce_pre_draw_synchronization(sctx);
- si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
- si_ce_post_draw_synchronization(sctx);
+ si_emit_all_states(sctx, info, 0);
+ si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
+ }
- if (sctx->trace_buf)
+ if (unlikely(sctx->current_saved_cs)) {
si_trace_emit(sctx);
+ si_log_draw_state(sctx, sctx->b.log);
+ }
/* Workaround for a VGT hang when streamout is enabled.
* It must be done after drawing. */
if ((sctx->b.family == CHIP_HAWAII ||
sctx->b.family == CHIP_TONGA ||
sctx->b.family == CHIP_FIJI) &&
- r600_get_strmout_en(&sctx->b)) {
+ si_get_strmout_en(sctx)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
}
- sctx->b.num_draw_calls++;
- if (info->primitive_restart)
- sctx->b.num_prim_restart_calls++;
- if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
- sctx->b.num_spill_draw_calls++;
+ if (unlikely(sctx->decompression_enabled)) {
+ sctx->b.num_decompress_calls++;
+ } else {
+ sctx->b.num_draw_calls++;
+ if (sctx->framebuffer.state.nr_cbufs > 1)
+ sctx->b.num_mrt_draw_calls++;
+ if (info->primitive_restart)
+ sctx->b.num_prim_restart_calls++;
+ if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
+ sctx->b.num_spill_draw_calls++;
+ }
if (index_size && indexbuf != info->index.resource)
pipe_resource_reference(&indexbuf, NULL);
}
+void si_draw_rectangle(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
+ int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances,
+ enum blitter_attrib_type type,
+ const union blitter_attrib *attrib)
+{
+ struct pipe_context *pipe = util_blitter_get_pipe(blitter);
+ struct si_context *sctx = (struct si_context*)pipe;
+
+ /* Pack position coordinates as signed int16. */
+ sctx->vs_blit_sh_data[0] = (uint32_t)(x1 & 0xffff) |
+ ((uint32_t)(y1 & 0xffff) << 16);
+ sctx->vs_blit_sh_data[1] = (uint32_t)(x2 & 0xffff) |
+ ((uint32_t)(y2 & 0xffff) << 16);
+ sctx->vs_blit_sh_data[2] = fui(depth);
+
+ switch (type) {
+ case UTIL_BLITTER_ATTRIB_COLOR:
+ memcpy(&sctx->vs_blit_sh_data[3], attrib->color,
+ sizeof(float)*4);
+ break;
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XY:
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW:
+ memcpy(&sctx->vs_blit_sh_data[3], &attrib->texcoord,
+ sizeof(attrib->texcoord));
+ break;
+ case UTIL_BLITTER_ATTRIB_NONE:;
+ }
+
+ pipe->bind_vs_state(pipe, si_get_blit_vs(sctx, type, num_instances));
+
+ struct pipe_draw_info info = {};
+ info.mode = R600_PRIM_RECTANGLE_LIST;
+ info.count = 3;
+ info.instance_count = num_instances;
+
+ /* Don't set per-stage shader pointers for VS. */
+ sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(VERTEX);
+ sctx->vertex_buffer_pointer_dirty = false;
+
+ si_draw_vbo(pipe, &info);
+}
+
void si_trace_emit(struct si_context *sctx)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
+ uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
- sctx->trace_id++;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf,
- RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cs, sctx->trace_buf->gpu_address);
- radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
- radeon_emit(cs, sctx->trace_id);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, trace_id);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, AC_ENCODE_TRACE_POINT(sctx->trace_id));
+ radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
+
+ if (sctx->b.log)
+ u_log_flush(sctx->b.log);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_state.h mesa-17.3.3/src/gallium/drivers/radeonsi/si_state.h
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_state.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_state.h 2018-01-18 21:30:28.000000000 +0000
@@ -49,14 +49,17 @@
struct si_state_blend {
struct si_pm4_state pm4;
uint32_t cb_target_mask;
- bool alpha_to_coverage;
- bool alpha_to_one;
- bool dual_src_blend;
/* Set 0xf or 0x0 (4 bits) per render target if the following is
* true. ANDed with spi_shader_col_format.
*/
+ unsigned cb_target_enabled_4bit;
unsigned blend_enable_4bit;
unsigned need_src_alpha_4bit;
+ unsigned commutative_4bit;
+ bool alpha_to_coverage:1;
+ bool alpha_to_one:1;
+ bool dual_src_blend:1;
+ bool logicop_enable:1;
};
struct si_state_rasterizer {
@@ -65,6 +68,8 @@
struct si_pm4_state *pm4_poly_offset;
unsigned pa_sc_line_stipple;
unsigned pa_cl_clip_cntl;
+ float line_width;
+ float max_point_size;
unsigned sprite_coord_enable:8;
unsigned clip_plane_enable:8;
unsigned flatshade:1;
@@ -88,10 +93,36 @@
uint8_t writemask[2];
};
+struct si_dsa_order_invariance {
+ /** Whether the final result in Z/S buffers is guaranteed to be
+ * invariant under changes to the order in which fragments arrive. */
+ bool zs:1;
+
+ /** Whether the set of fragments that pass the combined Z/S test is
+ * guaranteed to be invariant under changes to the order in which
+ * fragments arrive. */
+ bool pass_set:1;
+
+ /** Whether the last fragment that passes the combined Z/S test at each
+ * sample is guaranteed to be invariant under changes to the order in
+ * which fragments arrive. */
+ bool pass_last:1;
+};
+
struct si_state_dsa {
struct si_pm4_state pm4;
- unsigned alpha_func;
struct si_dsa_stencil_ref_part stencil_ref;
+
+ /* 0 = without stencil buffer, 1 = when both Z and S buffers are present */
+ struct si_dsa_order_invariance order_invariance[2];
+
+ ubyte alpha_func:3;
+ bool depth_enabled:1;
+ bool depth_write_enabled:1;
+ bool stencil_enabled:1;
+ bool stencil_write_enabled:1;
+ bool db_can_write:1;
+
};
struct si_stencil_ref {
@@ -141,20 +172,20 @@
union si_state_atoms {
struct {
/* The order matters. */
- struct r600_atom *prefetch_L2;
struct r600_atom *render_cond;
struct r600_atom *streamout_begin;
struct r600_atom *streamout_enable; /* must be after streamout_begin */
struct r600_atom *framebuffer;
struct r600_atom *msaa_sample_locs;
struct r600_atom *db_render_state;
+ struct r600_atom *dpbb_state;
struct r600_atom *msaa_config;
struct r600_atom *sample_mask;
struct r600_atom *cb_render_state;
struct r600_atom *blend_color;
struct r600_atom *clip_regs;
struct r600_atom *clip_state;
- struct r600_atom *shader_userdata;
+ struct r600_atom *shader_pointers;
struct r600_atom *scissors;
struct r600_atom *viewports;
struct r600_atom *stencil_ref;
@@ -196,13 +227,12 @@
* are contiguous:
*
* 0 - rw buffers
- * 1 - vertex const buffers
- * 2 - vertex shader buffers
- * ...
- * 5 - fragment const buffers
- * ...
- * 21 - compute const buffers
+ * 1 - vertex const and shader buffers
+ * 2 - vertex samplers and images
+ * 3 - fragment const and shader buffer
* ...
+ * 11 - compute const and shader buffers
+ * 12 - compute samplers and images
*/
enum {
SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS,
@@ -217,6 +247,11 @@
#define SI_NUM_DESCS (SI_DESCS_FIRST_SHADER + \
SI_NUM_SHADERS * SI_NUM_SHADER_DESCS)
+#define SI_DESCS_SHADER_MASK(name) \
+ u_bit_consecutive(SI_DESCS_FIRST_SHADER + \
+ PIPE_SHADER_##name * SI_NUM_SHADER_DESCS, \
+ SI_NUM_SHADER_DESCS)
+
/* This represents descriptors in memory, such as buffer resources,
* image resources, and sampler states.
*/
@@ -225,50 +260,28 @@
uint32_t *list;
/* The list in mapped GPU memory. */
uint32_t *gpu_list;
- /* Slots that have been changed and need to be uploaded. */
- uint64_t dirty_mask;
/* The buffer where the descriptors have been uploaded. */
struct r600_resource *buffer;
- int buffer_offset; /* can be negative if not using lower slots */
+ uint64_t gpu_address;
- /* The size of one descriptor. */
- ubyte element_dw_size;
/* The maximum number of descriptors. */
- ubyte num_elements;
-
- /* Offset in CE RAM */
- uint16_t ce_offset;
-
- /* Slots allocated in CE RAM. If we get active slots outside of this
- * range, direct uploads to memory will be used instead. This basically
- * governs switching between onchip (CE) and offchip (upload) modes.
- */
- ubyte first_ce_slot;
- ubyte num_ce_slots;
+ uint32_t num_elements;
/* Slots that are used by currently-bound shaders.
- * With CE: It determines which slots are dumped to L2.
- * It doesn't skip uploads to CE RAM.
- * Without CE: It determines which slots are uploaded.
+ * It determines which slots are uploaded.
*/
- ubyte first_active_slot;
- ubyte num_active_slots;
-
- /* Whether CE is used to upload this descriptor array. */
- bool uses_ce;
+ uint32_t first_active_slot;
+ uint32_t num_active_slots;
/* The SGPR index where the 64-bit pointer to the descriptor array will
* be stored. */
ubyte shader_userdata_offset;
-};
-
-struct si_sampler_views {
- struct pipe_sampler_view *views[SI_NUM_SAMPLERS];
- struct si_sampler_state *sampler_states[SI_NUM_SAMPLERS];
-
- /* The i-th bit is set if that element is enabled (non-NULL resource). */
- unsigned enabled_mask;
+ /* The size of one descriptor. */
+ ubyte element_dw_size;
+ /* If there is only one slot enabled, bind it directly instead of
+ * uploading descriptors. -1 if disabled. */
+ signed char slot_index_to_bind_directly;
};
struct si_buffer_resources {
@@ -289,6 +302,9 @@
#define si_pm4_state_changed(sctx, member) \
((sctx)->queued.named.member != (sctx)->emitted.named.member)
+#define si_pm4_state_enabled_and_changed(sctx, member) \
+ ((sctx)->queued.named.member && si_pm4_state_changed(sctx, member))
+
#define si_pm4_bind_state(sctx, member, value) \
do { \
(sctx)->queued.named.member = (value); \
@@ -305,9 +321,6 @@
} while(0)
/* si_descriptors.c */
-void si_ce_save_all_descriptors_at_ib_end(struct si_context* sctx);
-void si_ce_restore_all_descriptors_at_ib_start(struct si_context *sctx);
-void si_ce_enable_loads(struct radeon_winsys_cs *ib);
void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
struct r600_texture *tex,
const struct legacy_surf_level *base_level_info,
@@ -337,9 +350,9 @@
void si_update_all_texture_descriptors(struct si_context *sctx);
void si_shader_change_notify(struct si_context *sctx);
void si_update_needs_color_decompress_masks(struct si_context *sctx);
-void si_emit_graphics_shader_userdata(struct si_context *sctx,
+void si_emit_graphics_shader_pointers(struct si_context *sctx,
struct r600_atom *atom);
-void si_emit_compute_shader_userdata(struct si_context *sctx);
+void si_emit_compute_shader_pointers(struct si_context *sctx);
void si_set_rw_buffer(struct si_context *sctx,
uint slot, const struct pipe_constant_buffer *input);
void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
@@ -386,24 +399,44 @@
unsigned force_level);
void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
-/* si_state_shader.c */
+/* si_state_binning.c */
+void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state);
+
+/* si_state_shaders.c */
bool si_update_shaders(struct si_context *sctx);
void si_init_shader_functions(struct si_context *sctx);
bool si_init_shader_cache(struct si_screen *sscreen);
void si_destroy_shader_cache(struct si_screen *sscreen);
-void si_init_shader_selector_async(void *job, int thread_index);
void si_get_active_slot_masks(const struct tgsi_shader_info *info,
uint32_t *const_and_shader_buffers,
uint64_t *samplers_and_images);
+void *si_get_blit_vs(struct si_context *sctx, enum blitter_attrib_type type,
+ unsigned num_layers);
/* si_state_draw.c */
void si_init_ia_multi_vgt_param_table(struct si_context *sctx);
void si_emit_cache_flush(struct si_context *sctx);
-void si_ce_pre_draw_synchronization(struct si_context *sctx);
-void si_ce_post_draw_synchronization(struct si_context *sctx);
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
+void si_draw_rectangle(struct blitter_context *blitter,
+ void *vertex_elements_cso,
+ blitter_get_vs_func get_vs,
+ int x1, int y1, int x2, int y2,
+ float depth, unsigned num_instances,
+ enum blitter_attrib_type type,
+ const union blitter_attrib *attrib);
void si_trace_emit(struct si_context *sctx);
+/* si_state_msaa.c */
+void si_init_msaa_functions(struct si_context *sctx);
+void si_emit_sample_locations(struct radeon_winsys_cs *cs, int nr_samples);
+
+/* si_state_streamout.c */
+void si_streamout_buffers_dirty(struct si_context *sctx);
+void si_emit_streamout_end(struct si_context *sctx);
+void si_update_prims_generated_query_state(struct si_context *sctx,
+ unsigned type, int diff);
+void si_init_streamout_functions(struct si_context *sctx);
+
static inline unsigned
si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_msaa.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_msaa.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_msaa.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_msaa.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ *
+ */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+
+/* For MSAA sample positions. */
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
+ (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
+ (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \
+ (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
+ (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
+
+/* 2xMSAA
+ * There are two locations (4, 4), (-4, -4). */
+static const uint32_t sample_locs_2x[4] = {
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+ FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
+};
+/* 4xMSAA
+ * There are 4 locations: (-2, -6), (6, -2), (-6, 2), (2, 6). */
+static const uint32_t sample_locs_4x[4] = {
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+ FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
+};
+
+/* Cayman 8xMSAA */
+static const uint32_t sample_locs_8x[] = {
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+};
+/* Cayman 16xMSAA */
+static const uint32_t sample_locs_16x[] = {
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+ FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
+};
+
+static void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
+ unsigned sample_index, float *out_value)
+{
+ int offset, index;
+ struct {
+ int idx:4;
+ } val;
+
+ switch (sample_count) {
+ case 1:
+ default:
+ out_value[0] = out_value[1] = 0.5;
+ break;
+ case 2:
+ offset = 4 * (sample_index * 2);
+ val.idx = (sample_locs_2x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 4:
+ offset = 4 * (sample_index * 2);
+ val.idx = (sample_locs_4x[0] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 8:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (sample_locs_8x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_8x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ case 16:
+ offset = 4 * (sample_index % 4 * 2);
+ index = (sample_index / 4) * 4;
+ val.idx = (sample_locs_16x[index] >> offset) & 0xf;
+ out_value[0] = (float)(val.idx + 8) / 16.0f;
+ val.idx = (sample_locs_16x[index] >> (offset + 4)) & 0xf;
+ out_value[1] = (float)(val.idx + 8) / 16.0f;
+ break;
+ }
+}
+
+void si_emit_sample_locations(struct radeon_winsys_cs *cs, int nr_samples)
+{
+ switch (nr_samples) {
+ default:
+ case 1:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+ break;
+ case 2:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]);
+ break;
+ case 4:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]);
+ break;
+ case 8:
+ radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+ radeon_emit(cs, sample_locs_8x[0]);
+ radeon_emit(cs, sample_locs_8x[4]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, sample_locs_8x[1]);
+ radeon_emit(cs, sample_locs_8x[5]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, sample_locs_8x[2]);
+ radeon_emit(cs, sample_locs_8x[6]);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, sample_locs_8x[3]);
+ radeon_emit(cs, sample_locs_8x[7]);
+ break;
+ case 16:
+ radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+ radeon_emit(cs, sample_locs_16x[0]);
+ radeon_emit(cs, sample_locs_16x[4]);
+ radeon_emit(cs, sample_locs_16x[8]);
+ radeon_emit(cs, sample_locs_16x[12]);
+ radeon_emit(cs, sample_locs_16x[1]);
+ radeon_emit(cs, sample_locs_16x[5]);
+ radeon_emit(cs, sample_locs_16x[9]);
+ radeon_emit(cs, sample_locs_16x[13]);
+ radeon_emit(cs, sample_locs_16x[2]);
+ radeon_emit(cs, sample_locs_16x[6]);
+ radeon_emit(cs, sample_locs_16x[10]);
+ radeon_emit(cs, sample_locs_16x[14]);
+ radeon_emit(cs, sample_locs_16x[3]);
+ radeon_emit(cs, sample_locs_16x[7]);
+ radeon_emit(cs, sample_locs_16x[11]);
+ radeon_emit(cs, sample_locs_16x[15]);
+ break;
+ }
+}
+
+void si_init_msaa_functions(struct si_context *sctx)
+{
+ int i;
+
+ sctx->b.b.get_sample_position = si_get_sample_position;
+
+ si_get_sample_position(&sctx->b.b, 1, 0, sctx->sample_locations_1x[0]);
+
+ for (i = 0; i < 2; i++)
+ si_get_sample_position(&sctx->b.b, 2, i, sctx->sample_locations_2x[i]);
+ for (i = 0; i < 4; i++)
+ si_get_sample_position(&sctx->b.b, 4, i, sctx->sample_locations_4x[i]);
+ for (i = 0; i < 8; i++)
+ si_get_sample_position(&sctx->b.b, 8, i, sctx->sample_locations_8x[i]);
+ for (i = 0; i < 16; i++)
+ si_get_sample_position(&sctx->b.b, 16, i, sctx->sample_locations_16x[i]);
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_shaders.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_shaders.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_shaders.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_shaders.c 2018-01-18 21:30:28.000000000 +0000
@@ -213,7 +213,7 @@
disk_cache_compute_key(sscreen->b.disk_shader_cache, tgsi_binary,
*((uint32_t *)tgsi_binary), key);
disk_cache_put(sscreen->b.disk_shader_cache, key, hw_binary,
- *((uint32_t *) hw_binary));
+ *((uint32_t *) hw_binary), NULL);
}
return true;
@@ -860,14 +860,10 @@
* not sent again.
*/
if (!gs) {
- unsigned mode = 0;
+ unsigned mode = V_028A40_GS_OFF;
- /* PrimID needs GS scenario A.
- * GFX9 also needs it when ViewportIndex is enabled.
- */
- if (enable_prim_id ||
- (sscreen->b.chip_class >= GFX9 &&
- shader->selector->info.writes_viewport_index))
+ /* PrimID needs GS scenario A. */
+ if (enable_prim_id)
mode = V_028A40_GS_SCENARIO_A;
si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, S_028A40_MODE(mode));
@@ -895,7 +891,13 @@
* StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
*/
vgpr_comp_cnt = enable_prim_id ? 2 : (shader->info.uses_instanceid ? 1 : 0);
- num_user_sgprs = SI_VS_NUM_USER_SGPR;
+
+ if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]) {
+ num_user_sgprs = SI_SGPR_VS_BLIT_DATA +
+ info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
+ } else {
+ num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ }
} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
vgpr_comp_cnt = enable_prim_id ? 3 : 2;
num_user_sgprs = SI_TES_NUM_USER_SGPR;
@@ -1226,10 +1228,13 @@
/* Find out if PS is disabled. */
bool ps_disabled = true;
if (ps) {
+ const struct si_state_blend *blend = sctx->queued.named.blend;
+ bool alpha_to_coverage = blend && blend->alpha_to_coverage;
bool ps_modifies_zs = ps->info.uses_kill ||
ps->info.writes_z ||
ps->info.writes_stencil ||
ps->info.writes_samplemask ||
+ alpha_to_coverage ||
si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS;
unsigned ps_colormask = sctx->framebuffer.colorbuf_enabled_4bit &
@@ -1289,10 +1294,26 @@
si_shader_selector_key_vs(sctx, sctx->vs_shader.cso,
key, &key->part.tcs.ls_prolog);
key->part.tcs.ls = sctx->vs_shader.cso;
+
+ /* When the LS VGPR fix is needed, monolithic shaders
+ * can:
+ * - avoid initializing EXEC in both the LS prolog
+ * and the LS main part when !vs_needs_prolog
+ * - remove the fixup for unused input VGPRs
+ */
+ key->part.tcs.ls_prolog.ls_vgpr_fix = sctx->ls_vgpr_fix;
+
+ /* The LS output / HS input layout can be communicated
+ * directly instead of via user SGPRs for merged LS-HS.
+ * The LS VGPR fix prefers this too.
+ */
+ key->opt.prefer_mono = 1;
}
key->part.tcs.epilog.prim_mode =
sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ key->part.tcs.epilog.invoc0_tess_factors_are_def =
+ sel->tcs_info.tessfactors_are_def_in_all_invocs;
key->part.tcs.epilog.tes_reads_tess_factors =
sctx->tes_shader.cso->info.reads_tess_factors;
@@ -1361,6 +1382,7 @@
sctx->framebuffer.spi_shader_col_format_alpha) |
(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
sctx->framebuffer.spi_shader_col_format);
+ key->part.ps.epilog.spi_shader_col_format &= blend->cb_target_enabled_4bit;
/* The output for dual source blending should have
* the same format as the first output.
@@ -1395,10 +1417,10 @@
}
if (rs) {
- bool is_poly = (sctx->b.current_rast_prim >= PIPE_PRIM_TRIANGLES &&
- sctx->b.current_rast_prim <= PIPE_PRIM_POLYGON) ||
- sctx->b.current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
- bool is_line = !is_poly && sctx->b.current_rast_prim != PIPE_PRIM_POINTS;
+ bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
+ sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||
+ sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY;
+ bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS;
key->part.ps.prolog.color_two_side = rs->two_side && sel->info.colors_read;
key->part.ps.prolog.flatshade_colors = rs->flatshade && sel->info.colors_read;
@@ -1451,6 +1473,9 @@
sel->info.uses_linear_center +
sel->info.uses_linear_centroid +
sel->info.uses_linear_sample > 1;
+
+ if (sel->info.opcode_count[TGSI_OPCODE_INTERP_SAMPLE])
+ key->mono.u.ps.interpolate_at_sample_force_center = 1;
}
}
@@ -1461,7 +1486,7 @@
assert(0);
}
- if (unlikely(sctx->screen->b.debug_flags & DBG_NO_OPT_VARIANT))
+ if (unlikely(sctx->screen->b.debug_flags & DBG(NO_OPT_VARIANT)))
memset(&key->opt, 0, sizeof(key->opt));
}
@@ -1549,19 +1574,6 @@
return true;
}
-static void si_destroy_shader_selector(struct si_context *sctx,
- struct si_shader_selector *sel);
-
-static void si_shader_selector_reference(struct si_context *sctx,
- struct si_shader_selector **dst,
- struct si_shader_selector *src)
-{
- if (pipe_reference(&(*dst)->reference, &src->reference))
- si_destroy_shader_selector(sctx, *dst);
-
- *dst = src;
-}
-
/* Select the hw shader variant depending on the current state. */
static int si_shader_select_with_key(struct si_screen *sscreen,
struct si_shader_ctx_state *state,
@@ -1755,6 +1767,7 @@
}
static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
+ bool streamout,
struct si_shader_key *key)
{
unsigned next_shader = info->properties[TGSI_PROPERTY_NEXT_SHADER];
@@ -1770,11 +1783,12 @@
key->as_ls = 1;
break;
default:
- /* If POSITION isn't written, it can't be a HW VS.
- * Assume that it's a HW LS. (the next shader is TCS)
+ /* If POSITION isn't written, it can only be a HW VS
+ * if streamout is used. If streamout isn't used,
+ * assume that it's a HW LS. (the next shader is TCS)
* This heuristic is needed for separate shader objects.
*/
- if (!info->writes_position)
+ if (!info->writes_position && !streamout)
key->as_ls = 1;
}
break;
@@ -1792,7 +1806,7 @@
* si_shader_selector initialization. Since it can be done asynchronously,
* there is no way to report compile failures to applications.
*/
-void si_init_shader_selector_async(void *job, int thread_index)
+static void si_init_shader_selector_async(void *job, int thread_index)
{
struct si_shader_selector *sel = (struct si_shader_selector *)job;
struct si_screen *sscreen = sel->screen;
@@ -1815,7 +1829,7 @@
*/
if (!sscreen->use_monolithic_shaders) {
struct si_shader *shader = CALLOC_STRUCT(si_shader);
- void *tgsi_binary;
+ void *tgsi_binary = NULL;
if (!shader) {
fprintf(stderr, "radeonsi: can't allocate a main shader part\n");
@@ -1823,9 +1837,12 @@
}
shader->selector = sel;
- si_parse_next_shader_property(&sel->info, &shader->key);
+ si_parse_next_shader_property(&sel->info,
+ sel->so.num_outputs != 0,
+ &shader->key);
- tgsi_binary = si_get_tgsi_binary(sel);
+ if (sel->tokens)
+ tgsi_binary = si_get_tgsi_binary(sel);
/* Try to load the shader from the shader cache. */
mtx_lock(&sscreen->shader_cache_mutex);
@@ -1899,12 +1916,24 @@
}
/* Pre-compilation. */
- if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
+ if (sscreen->b.debug_flags & DBG(PRECOMPILE) &&
+ /* GFX9 needs LS or ES for compilation, which we don't have here. */
+ (sscreen->b.chip_class <= VI ||
+ (sel->type != PIPE_SHADER_TESS_CTRL &&
+ sel->type != PIPE_SHADER_GEOMETRY))) {
struct si_shader_ctx_state state = {sel};
struct si_shader_key key;
memset(&key, 0, sizeof(key));
- si_parse_next_shader_property(&sel->info, &key);
+ si_parse_next_shader_property(&sel->info,
+ sel->so.num_outputs != 0,
+ &key);
+
+ /* GFX9 doesn't have LS and ES. */
+ if (sscreen->b.chip_class >= GFX9) {
+ key.as_ls = 0;
+ key.as_es = 0;
+ }
/* Set reasonable defaults, so that the shader key doesn't
* cause any code to be eliminated.
@@ -1986,14 +2015,28 @@
sel->compiler_ctx_state.tm = sctx->tm;
sel->compiler_ctx_state.debug = sctx->b.debug;
sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
- sel->tokens = tgsi_dup_tokens(state->tokens);
- if (!sel->tokens) {
- FREE(sel);
- return NULL;
- }
sel->so = state->stream_output;
- tgsi_scan_shader(state->tokens, &sel->info);
+
+ if (state->type == PIPE_SHADER_IR_TGSI) {
+ sel->tokens = tgsi_dup_tokens(state->tokens);
+ if (!sel->tokens) {
+ FREE(sel);
+ return NULL;
+ }
+
+ tgsi_scan_shader(state->tokens, &sel->info);
+ tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
+ } else {
+ assert(state->type == PIPE_SHADER_IR_NIR);
+
+ sel->nir = state->ir.nir;
+
+ si_nir_scan_shader(sel->nir, &sel->info);
+
+ si_lower_nir(sel);
+ }
+
sel->type = sel->info.processor;
p_atomic_inc(&sscreen->b.num_shaders_created);
si_get_active_slot_masks(&sel->info,
@@ -2009,7 +2052,8 @@
/* The prolog is a no-op if there are no inputs. */
sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX &&
- sel->info.num_inputs;
+ sel->info.num_inputs &&
+ !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
/* Set which opcode uses which (i,j) pair. */
if (sel->info.uses_persp_opcode_interp_centroid)
@@ -2194,7 +2238,7 @@
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
sctx->is_debug ||
- r600_can_dump_shader(&sscreen->b, sel->info.processor))
+ si_can_dump_shader(&sscreen->b, sel->info.processor))
si_init_shader_selector_async(sel, -1);
else
util_queue_add_job(&sscreen->shader_compiler_queue, sel,
@@ -2211,9 +2255,9 @@
if (!shader_with_so)
return;
- sctx->b.streamout.enabled_stream_buffers_mask =
+ sctx->streamout.enabled_stream_buffers_mask =
shader_with_so->enabled_streamout_buffer_mask;
- sctx->b.streamout.stride_in_dw = shader_with_so->so.stride;
+ sctx->streamout.stride_in_dw = shader_with_so->so.stride;
}
static void si_update_clip_regs(struct si_context *sctx,
@@ -2265,9 +2309,10 @@
sctx->vs_shader.cso = sel;
sctx->vs_shader.current = sel ? sel->first_variant : NULL;
+ sctx->num_vs_blit_sgprs = sel ? sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS] : 0;
si_update_common_shader_state(sctx);
- r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
+ si_update_vs_viewport_state(sctx);
si_set_active_descriptors_for_shader(sctx, sel);
si_update_streamout_state(sctx);
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
@@ -2310,7 +2355,7 @@
if (sctx->ia_multi_vgt_param_key.u.uses_tess)
si_update_tess_uses_prim_id(sctx);
}
- r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
+ si_update_vs_viewport_state(sctx);
si_set_active_descriptors_for_shader(sctx, sel);
si_update_streamout_state(sctx);
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
@@ -2361,7 +2406,7 @@
si_shader_change_notify(sctx);
sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
}
- r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
+ si_update_vs_viewport_state(sctx);
si_set_active_descriptors_for_shader(sctx, sel);
si_update_streamout_state(sctx);
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
@@ -2389,6 +2434,13 @@
if (!old_sel ||
old_sel->info.colors_written != sel->info.colors_written)
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
+ if (sctx->screen->has_out_of_order_rast &&
+ (!old_sel ||
+ old_sel->info.writes_memory != sel->info.writes_memory ||
+ old_sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] !=
+ sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]))
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
si_set_active_descriptors_for_shader(sctx, sel);
}
@@ -2442,8 +2494,8 @@
free(shader);
}
-static void si_destroy_shader_selector(struct si_context *sctx,
- struct si_shader_selector *sel)
+void si_destroy_shader_selector(struct si_context *sctx,
+ struct si_shader_selector *sel)
{
struct si_shader *p = sel->first_variant, *c;
struct si_shader_ctx_state *current_shader[SI_NUM_SHADERS] = {
@@ -2479,6 +2531,7 @@
util_queue_fence_destroy(&sel->ready);
mtx_destroy(&sel->mutex);
free(sel->tokens);
+ ralloc_free(sel->nir);
free(sel);
}
@@ -2670,7 +2723,7 @@
if (update_esgs) {
pipe_resource_reference(&sctx->esgs_ring, NULL);
sctx->esgs_ring =
- r600_aligned_buffer_create(sctx->b.b.screen,
+ si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
esgs_ring_size, alignment);
@@ -2681,7 +2734,7 @@
if (update_gsvs) {
pipe_resource_reference(&sctx->gsvs_ring, NULL);
sctx->gsvs_ring =
- r600_aligned_buffer_create(sctx->b.b.screen,
+ si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
gsvs_ring_size, alignment);
@@ -2923,7 +2976,7 @@
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = (struct r600_resource*)
- r600_aligned_buffer_create(&sctx->screen->b.b,
+ si_aligned_buffer_create(&sctx->screen->b.b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
scratch_needed_size, 256);
@@ -2981,7 +3034,7 @@
/* Use 64K alignment for both rings, so that we can pass the address
* to shaders as one SGPR containing bits [16:47].
*/
- sctx->tf_ring = r600_aligned_buffer_create(sctx->b.b.screen,
+ sctx->tf_ring = si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
32768 * sctx->screen->b.info.max_se,
@@ -2992,7 +3045,7 @@
assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
sctx->tess_offchip_ring =
- r600_aligned_buffer_create(sctx->b.b.screen,
+ si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
max_offchip_buffers *
@@ -3278,6 +3331,8 @@
if (sctx->ps_db_shader_control != db_shader_control) {
sctx->ps_db_shader_control = db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ if (sctx->screen->dpbb_allowed)
+ si_mark_atom_dirty(sctx, &sctx->dpbb_state);
}
if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) {
@@ -3292,18 +3347,47 @@
}
}
- if (si_pm4_state_changed(sctx, ls) ||
- si_pm4_state_changed(sctx, hs) ||
- si_pm4_state_changed(sctx, es) ||
- si_pm4_state_changed(sctx, gs) ||
- si_pm4_state_changed(sctx, vs) ||
- si_pm4_state_changed(sctx, ps)) {
+ if (si_pm4_state_enabled_and_changed(sctx, ls) ||
+ si_pm4_state_enabled_and_changed(sctx, hs) ||
+ si_pm4_state_enabled_and_changed(sctx, es) ||
+ si_pm4_state_enabled_and_changed(sctx, gs) ||
+ si_pm4_state_enabled_and_changed(sctx, vs) ||
+ si_pm4_state_enabled_and_changed(sctx, ps)) {
if (!si_update_spi_tmpring_size(sctx))
return false;
}
- if (sctx->b.chip_class >= CIK)
- si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
+ if (sctx->b.chip_class >= CIK) {
+ if (si_pm4_state_enabled_and_changed(sctx, ls))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_LS;
+ else if (!sctx->queued.named.ls)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_LS;
+
+ if (si_pm4_state_enabled_and_changed(sctx, hs))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_HS;
+ else if (!sctx->queued.named.hs)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_HS;
+
+ if (si_pm4_state_enabled_and_changed(sctx, es))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_ES;
+ else if (!sctx->queued.named.es)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_ES;
+
+ if (si_pm4_state_enabled_and_changed(sctx, gs))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_GS;
+ else if (!sctx->queued.named.gs)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_GS;
+
+ if (si_pm4_state_enabled_and_changed(sctx, vs))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_VS;
+ else if (!sctx->queued.named.vs)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_VS;
+
+ if (si_pm4_state_enabled_and_changed(sctx, ps))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_PS;
+ else if (!sctx->queued.named.ps)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_PS;
+ }
sctx->do_update_shaders = false;
return true;
@@ -3324,6 +3408,71 @@
}
}
+void *si_get_blit_vs(struct si_context *sctx, enum blitter_attrib_type type,
+ unsigned num_layers)
+{
+ struct pipe_context *pipe = &sctx->b.b;
+ unsigned vs_blit_property;
+ void **vs;
+
+ switch (type) {
+ case UTIL_BLITTER_ATTRIB_NONE:
+ vs = num_layers > 1 ? &sctx->vs_blit_pos_layered :
+ &sctx->vs_blit_pos;
+ vs_blit_property = SI_VS_BLIT_SGPRS_POS;
+ break;
+ case UTIL_BLITTER_ATTRIB_COLOR:
+ vs = num_layers > 1 ? &sctx->vs_blit_color_layered :
+ &sctx->vs_blit_color;
+ vs_blit_property = SI_VS_BLIT_SGPRS_POS_COLOR;
+ break;
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XY:
+ case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW:
+ assert(num_layers == 1);
+ vs = &sctx->vs_blit_texcoord;
+ vs_blit_property = SI_VS_BLIT_SGPRS_POS_TEXCOORD;
+ break;
+ default:
+ assert(0);
+ return NULL;
+ }
+ if (*vs)
+ return *vs;
+
+ struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
+ if (!ureg)
+ return NULL;
+
+ /* Tell the shader to load VS inputs from SGPRs: */
+ ureg_property(ureg, TGSI_PROPERTY_VS_BLIT_SGPRS, vs_blit_property);
+ ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
+
+ /* This is just a pass-through shader with 1-3 MOV instructions. */
+ ureg_MOV(ureg,
+ ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0),
+ ureg_DECL_vs_input(ureg, 0));
+
+ if (type != UTIL_BLITTER_ATTRIB_NONE) {
+ ureg_MOV(ureg,
+ ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0),
+ ureg_DECL_vs_input(ureg, 1));
+ }
+
+ if (num_layers > 1) {
+ struct ureg_src instance_id =
+ ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0);
+ struct ureg_dst layer =
+ ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+
+ ureg_MOV(ureg, ureg_writemask(layer, TGSI_WRITEMASK_X),
+ ureg_scalar(instance_id, TGSI_SWIZZLE_X));
+ }
+ ureg_END(ureg);
+
+ *vs = ureg_create_shader_and_destroy(ureg, pipe);
+ return *vs;
+}
+
void si_init_shader_functions(struct si_context *sctx)
{
si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map);
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_streamout.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_streamout.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_streamout.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_streamout.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,423 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák
+ *
+ */
+
+#include "si_pipe.h"
+#include "si_state.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+
+#include "util/u_memory.h"
+
+static void si_set_streamout_enable(struct si_context *sctx, bool enable);
+
+static inline void si_so_target_reference(struct si_streamout_target **dst,
+ struct pipe_stream_output_target *src)
+{
+ pipe_so_target_reference((struct pipe_stream_output_target**)dst, src);
+}
+
+static struct pipe_stream_output_target *
+si_create_so_target(struct pipe_context *ctx,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_streamout_target *t;
+ struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+
+ t = CALLOC_STRUCT(si_streamout_target);
+ if (!t) {
+ return NULL;
+ }
+
+ u_suballocator_alloc(sctx->b.allocator_zeroed_memory, 4, 4,
+ &t->buf_filled_size_offset,
+ (struct pipe_resource**)&t->buf_filled_size);
+ if (!t->buf_filled_size) {
+ FREE(t);
+ return NULL;
+ }
+
+ t->b.reference.count = 1;
+ t->b.context = ctx;
+ pipe_resource_reference(&t->b.buffer, buffer);
+ t->b.buffer_offset = buffer_offset;
+ t->b.buffer_size = buffer_size;
+
+ util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+ buffer_offset + buffer_size);
+ return &t->b;
+}
+
+static void si_so_target_destroy(struct pipe_context *ctx,
+ struct pipe_stream_output_target *target)
+{
+ struct si_streamout_target *t = (struct si_streamout_target*)target;
+ pipe_resource_reference(&t->b.buffer, NULL);
+ r600_resource_reference(&t->buf_filled_size, NULL);
+ FREE(t);
+}
+
+void si_streamout_buffers_dirty(struct si_context *sctx)
+{
+ if (!sctx->streamout.enabled_mask)
+ return;
+
+ si_mark_atom_dirty(sctx, &sctx->streamout.begin_atom);
+ si_set_streamout_enable(sctx, true);
+}
+
+static void si_set_streamout_targets(struct pipe_context *ctx,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_buffer_resources *buffers = &sctx->rw_buffers;
+ struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
+ unsigned old_num_targets = sctx->streamout.num_targets;
+ unsigned i, bufidx;
+
+ /* We are going to unbind the buffers. Mark which caches need to be flushed. */
+ if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
+ /* Since streamout uses vector writes which go through TC L2
+ * and most other clients can use TC L2 as well, we don't need
+ * to flush it.
+ *
+ * The only cases which requires flushing it is VGT DMA index
+ * fetching (on <= CIK) and indirect draw data, which are rare
+ * cases. Thus, flag the TC L2 dirtiness in the resource and
+ * handle it at draw call time.
+ */
+ for (i = 0; i < sctx->streamout.num_targets; i++)
+ if (sctx->streamout.targets[i])
+ r600_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
+
+ /* Invalidate the scalar cache in case a streamout buffer is
+ * going to be used as a constant buffer.
+ *
+ * Invalidate TC L1, because streamout bypasses it (done by
+ * setting GLC=1 in the store instruction), but it can contain
+ * outdated data of streamout buffers.
+ *
+ * VS_PARTIAL_FLUSH is required if the buffers are going to be
+ * used as an input immediately.
+ */
+ sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+ SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_VS_PARTIAL_FLUSH;
+ }
+
+ /* All readers of the streamout targets need to be finished before we can
+ * start writing to the targets.
+ */
+ if (num_targets)
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ SI_CONTEXT_CS_PARTIAL_FLUSH;
+
+ /* Streamout buffers must be bound in 2 places:
+ * 1) in VGT by setting the VGT_STRMOUT registers
+ * 2) as shader resources
+ */
+
+ /* Stop streamout. */
+ if (sctx->streamout.num_targets && sctx->streamout.begin_emitted)
+ si_emit_streamout_end(sctx);
+
+ /* Set the new targets. */
+ unsigned enabled_mask = 0, append_bitmask = 0;
+ for (i = 0; i < num_targets; i++) {
+ si_so_target_reference(&sctx->streamout.targets[i], targets[i]);
+ if (!targets[i])
+ continue;
+
+ r600_context_add_resource_size(ctx, targets[i]->buffer);
+ enabled_mask |= 1 << i;
+
+ if (offsets[i] == ((unsigned)-1))
+ append_bitmask |= 1 << i;
+ }
+
+ for (; i < sctx->streamout.num_targets; i++)
+ si_so_target_reference(&sctx->streamout.targets[i], NULL);
+
+ sctx->streamout.enabled_mask = enabled_mask;
+ sctx->streamout.num_targets = num_targets;
+ sctx->streamout.append_bitmask = append_bitmask;
+
+ /* Update dirty state bits. */
+ if (num_targets) {
+ si_streamout_buffers_dirty(sctx);
+ } else {
+ si_set_atom_dirty(sctx, &sctx->streamout.begin_atom, false);
+ si_set_streamout_enable(sctx, false);
+ }
+
+ /* Set the shader resources.*/
+ for (i = 0; i < num_targets; i++) {
+ bufidx = SI_VS_STREAMOUT_BUF0 + i;
+
+ if (targets[i]) {
+ struct pipe_resource *buffer = targets[i]->buffer;
+ uint64_t va = r600_resource(buffer)->gpu_address;
+
+ /* Set the descriptor.
+ *
+ * On VI, the format must be non-INVALID, otherwise
+ * the buffer will be considered not bound and store
+ * instructions will be no-ops.
+ */
+ uint32_t *desc = descs->list + bufidx*4;
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+ desc[2] = 0xffffffff;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+ /* Set the resource. */
+ pipe_resource_reference(&buffers->buffers[bufidx],
+ buffer);
+ radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
+ (struct r600_resource*)buffer,
+ buffers->shader_usage,
+ RADEON_PRIO_SHADER_RW_BUFFER,
+ true);
+ r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
+
+ buffers->enabled_mask |= 1u << bufidx;
+ } else {
+ /* Clear the descriptor and unset the resource. */
+ memset(descs->list + bufidx*4, 0,
+ sizeof(uint32_t) * 4);
+ pipe_resource_reference(&buffers->buffers[bufidx],
+ NULL);
+ buffers->enabled_mask &= ~(1u << bufidx);
+ }
+ }
+ for (; i < old_num_targets; i++) {
+ bufidx = SI_VS_STREAMOUT_BUF0 + i;
+ /* Clear the descriptor and unset the resource. */
+ memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
+ pipe_resource_reference(&buffers->buffers[bufidx], NULL);
+ buffers->enabled_mask &= ~(1u << bufidx);
+ }
+
+ sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
+}
+
+static void si_flush_vgt_streamout(struct si_context *sctx)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ unsigned reg_strmout_cntl;
+
+ /* The register is at different places on different ASICs. */
+ if (sctx->b.chip_class >= CIK) {
+ reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
+ radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
+ } else {
+ reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+ radeon_set_config_reg(cs, reg_strmout_cntl, 0);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+}
+
+static void si_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct si_context *sctx = (struct si_context*)rctx;
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ struct si_streamout_target **t = sctx->streamout.targets;
+ uint16_t *stride_in_dw = sctx->streamout.stride_in_dw;
+ unsigned i;
+
+ si_flush_vgt_streamout(sctx);
+
+ for (i = 0; i < sctx->streamout.num_targets; i++) {
+ if (!t[i])
+ continue;
+
+ t[i]->stride_in_dw = stride_in_dw[i];
+
+ /* SI binds streamout buffers as shader resources.
+ * VGT only counts primitives and tells the shader
+ * through SGPRs what to do. */
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
+ radeon_emit(cs, (t[i]->b.buffer_offset +
+ t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+
+ if (sctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
+ uint64_t va = t[i]->buf_filled_size->gpu_address +
+ t[i]->buf_filled_size_offset;
+
+ /* Append. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va); /* src address lo */
+ radeon_emit(cs, va >> 32); /* src address hi */
+
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ t[i]->buf_filled_size,
+ RADEON_USAGE_READ,
+ RADEON_PRIO_SO_FILLED_SIZE);
+ } else {
+ /* Start from the beginning. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
+ radeon_emit(cs, 0); /* unused */
+ }
+ }
+
+ sctx->streamout.begin_emitted = true;
+}
+
+void si_emit_streamout_end(struct si_context *sctx)
+{
+ struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ struct si_streamout_target **t = sctx->streamout.targets;
+ unsigned i;
+ uint64_t va;
+
+ si_flush_vgt_streamout(sctx);
+
+ for (i = 0; i < sctx->streamout.num_targets; i++) {
+ if (!t[i])
+ continue;
+
+ va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+ STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+ radeon_emit(cs, va); /* dst address lo */
+ radeon_emit(cs, va >> 32); /* dst address hi */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ t[i]->buf_filled_size,
+ RADEON_USAGE_WRITE,
+ RADEON_PRIO_SO_FILLED_SIZE);
+
+ /* Zero the buffer size. The counters (primitives generated,
+ * primitives emitted) may be enabled even if there is not
+ * buffer bound. This ensures that the primitives-emitted query
+ * won't increment. */
+ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+
+ t[i]->buf_filled_size_valid = true;
+ }
+
+ sctx->streamout.begin_emitted = false;
+ sctx->b.flags |= R600_CONTEXT_STREAMOUT_FLUSH;
+}
+
+/* STREAMOUT CONFIG DERIVED STATE
+ *
+ * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
+ * The buffer mask is an independent state, so no writes occur if there
+ * are no buffers bound.
+ */
+
+static void si_emit_streamout_enable(struct r600_common_context *rctx,
+ struct r600_atom *atom)
+{
+ struct si_context *sctx = (struct si_context*)rctx;
+
+ radeon_set_context_reg_seq(sctx->b.gfx.cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
+ radeon_emit(sctx->b.gfx.cs,
+ S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) |
+ S_028B94_RAST_STREAM(0) |
+ S_028B94_STREAMOUT_1_EN(si_get_strmout_en(sctx)) |
+ S_028B94_STREAMOUT_2_EN(si_get_strmout_en(sctx)) |
+ S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx)));
+ radeon_emit(sctx->b.gfx.cs,
+ sctx->streamout.hw_enabled_mask &
+ sctx->streamout.enabled_stream_buffers_mask);
+}
+
+static void si_set_streamout_enable(struct si_context *sctx, bool enable)
+{
+ bool old_strmout_en = si_get_strmout_en(sctx);
+ unsigned old_hw_enabled_mask = sctx->streamout.hw_enabled_mask;
+
+ sctx->streamout.streamout_enabled = enable;
+
+ sctx->streamout.hw_enabled_mask = sctx->streamout.enabled_mask |
+ (sctx->streamout.enabled_mask << 4) |
+ (sctx->streamout.enabled_mask << 8) |
+ (sctx->streamout.enabled_mask << 12);
+
+ if ((old_strmout_en != si_get_strmout_en(sctx)) ||
+ (old_hw_enabled_mask != sctx->streamout.hw_enabled_mask))
+ si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom);
+}
+
+void si_update_prims_generated_query_state(struct si_context *sctx,
+ unsigned type, int diff)
+{
+ if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
+ bool old_strmout_en = si_get_strmout_en(sctx);
+
+ sctx->streamout.num_prims_gen_queries += diff;
+ assert(sctx->streamout.num_prims_gen_queries >= 0);
+
+ sctx->streamout.prims_gen_query_enabled =
+ sctx->streamout.num_prims_gen_queries != 0;
+
+ if (old_strmout_en != si_get_strmout_en(sctx))
+ si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom);
+ }
+}
+
+void si_init_streamout_functions(struct si_context *sctx)
+{
+ sctx->b.b.create_stream_output_target = si_create_so_target;
+ sctx->b.b.stream_output_target_destroy = si_so_target_destroy;
+ sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
+ sctx->streamout.begin_atom.emit = si_emit_streamout_begin;
+ sctx->streamout.enable_atom.emit = si_emit_streamout_enable;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_viewport.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_viewport.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_state_viewport.c 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_state_viewport.c 2018-01-18 21:30:28.000000000 +0000
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+#include "util/u_viewport.h"
+#include "tgsi/tgsi_scan.h"
+
+#define SI_MAX_SCISSOR 16384
+
+static void si_set_scissor_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *state)
+{
+ struct si_context *ctx = (struct si_context *)pctx;
+ int i;
+
+ for (i = 0; i < num_scissors; i++)
+ ctx->scissors.states[start_slot + i] = state[i];
+
+ if (!ctx->queued.named.rasterizer ||
+ !ctx->queued.named.rasterizer->scissor_enable)
+ return;
+
+ ctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+}
+
+/* Since the guard band disables clipping, we have to clip per-pixel
+ * using a scissor.
+ */
+static void si_get_scissor_from_viewport(struct si_context *ctx,
+ const struct pipe_viewport_state *vp,
+ struct si_signed_scissor *scissor)
+{
+ float tmp, minx, miny, maxx, maxy;
+
+ /* Convert (-1, -1) and (1, 1) from clip space into window space. */
+ minx = -vp->scale[0] + vp->translate[0];
+ miny = -vp->scale[1] + vp->translate[1];
+ maxx = vp->scale[0] + vp->translate[0];
+ maxy = vp->scale[1] + vp->translate[1];
+
+ /* Handle inverted viewports. */
+ if (minx > maxx) {
+ tmp = minx;
+ minx = maxx;
+ maxx = tmp;
+ }
+ if (miny > maxy) {
+ tmp = miny;
+ miny = maxy;
+ maxy = tmp;
+ }
+
+ /* Convert to integer and round up the max bounds. */
+ scissor->minx = minx;
+ scissor->miny = miny;
+ scissor->maxx = ceilf(maxx);
+ scissor->maxy = ceilf(maxy);
+}
+
+static void si_clamp_scissor(struct si_context *ctx,
+ struct pipe_scissor_state *out,
+ struct si_signed_scissor *scissor)
+{
+ out->minx = CLAMP(scissor->minx, 0, SI_MAX_SCISSOR);
+ out->miny = CLAMP(scissor->miny, 0, SI_MAX_SCISSOR);
+ out->maxx = CLAMP(scissor->maxx, 0, SI_MAX_SCISSOR);
+ out->maxy = CLAMP(scissor->maxy, 0, SI_MAX_SCISSOR);
+}
+
+static void si_clip_scissor(struct pipe_scissor_state *out,
+ struct pipe_scissor_state *clip)
+{
+ out->minx = MAX2(out->minx, clip->minx);
+ out->miny = MAX2(out->miny, clip->miny);
+ out->maxx = MIN2(out->maxx, clip->maxx);
+ out->maxy = MIN2(out->maxy, clip->maxy);
+}
+
+static void si_scissor_make_union(struct si_signed_scissor *out,
+ struct si_signed_scissor *in)
+{
+ out->minx = MIN2(out->minx, in->minx);
+ out->miny = MIN2(out->miny, in->miny);
+ out->maxx = MAX2(out->maxx, in->maxx);
+ out->maxy = MAX2(out->maxy, in->maxy);
+}
+
+static void si_emit_one_scissor(struct si_context *ctx,
+ struct radeon_winsys_cs *cs,
+ struct si_signed_scissor *vp_scissor,
+ struct pipe_scissor_state *scissor)
+{
+ struct pipe_scissor_state final;
+
+ if (ctx->vs_disables_clipping_viewport) {
+ final.minx = final.miny = 0;
+ final.maxx = final.maxy = SI_MAX_SCISSOR;
+ } else {
+ si_clamp_scissor(ctx, &final, vp_scissor);
+ }
+
+ if (scissor)
+ si_clip_scissor(&final, scissor);
+
+ radeon_emit(cs, S_028250_TL_X(final.minx) |
+ S_028250_TL_Y(final.miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(final.maxx) |
+ S_028254_BR_Y(final.maxy));
+}
+
+/* the range is [-MAX, MAX] */
+#define GET_MAX_VIEWPORT_RANGE(rctx) (32768)
+
+static void si_emit_guardband(struct si_context *ctx,
+ struct si_signed_scissor *vp_as_scissor)
+{
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct pipe_viewport_state vp;
+ float left, top, right, bottom, max_range, guardband_x, guardband_y;
+ float discard_x, discard_y;
+
+ /* Reconstruct the viewport transformation from the scissor. */
+ vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
+ vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
+ vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
+ vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
+
+ /* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
+ if (vp_as_scissor->minx == vp_as_scissor->maxx)
+ vp.scale[0] = 0.5;
+ if (vp_as_scissor->miny == vp_as_scissor->maxy)
+ vp.scale[1] = 0.5;
+
+ /* Find the biggest guard band that is inside the supported viewport
+ * range. The guard band is specified as a horizontal and vertical
+ * distance from (0,0) in clip space.
+ *
+ * This is done by applying the inverse viewport transformation
+ * on the viewport limits to get those limits in clip space.
+ *
+ * Use a limit one pixel smaller to allow for some precision error.
+ */
+ max_range = GET_MAX_VIEWPORT_RANGE(ctx) - 1;
+ left = (-max_range - vp.translate[0]) / vp.scale[0];
+ right = ( max_range - vp.translate[0]) / vp.scale[0];
+ top = (-max_range - vp.translate[1]) / vp.scale[1];
+ bottom = ( max_range - vp.translate[1]) / vp.scale[1];
+
+ assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
+
+ guardband_x = MIN2(-left, right);
+ guardband_y = MIN2(-top, bottom);
+
+ discard_x = 1.0;
+ discard_y = 1.0;
+
+ if (unlikely(ctx->current_rast_prim < PIPE_PRIM_TRIANGLES) &&
+ ctx->queued.named.rasterizer) {
+ /* When rendering wide points or lines, we need to be more
+ * conservative about when to discard them entirely. */
+ const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
+ float pixels;
+
+ if (ctx->current_rast_prim == PIPE_PRIM_POINTS)
+ pixels = rs->max_point_size;
+ else
+ pixels = rs->line_width;
+
+ /* Add half the point size / line width */
+ discard_x += pixels / (2.0 * vp.scale[0]);
+ discard_y += pixels / (2.0 * vp.scale[1]);
+
+ /* Discard primitives that would lie entirely outside the clip
+ * region. */
+ discard_x = MIN2(discard_x, guardband_x);
+ discard_y = MIN2(discard_y, guardband_y);
+ }
+
+ /* If any of the GB registers is updated, all of them must be updated. */
+ radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+
+ radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
+ radeon_emit(cs, fui(discard_y)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
+ radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
+ radeon_emit(cs, fui(discard_x)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
+}
+
+static void si_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+ struct si_context *ctx = (struct si_context *)rctx;
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct pipe_scissor_state *states = ctx->scissors.states;
+ unsigned mask = ctx->scissors.dirty_mask;
+ bool scissor_enabled = false;
+ struct si_signed_scissor max_vp_scissor;
+ int i;
+
+ if (ctx->queued.named.rasterizer)
+ scissor_enabled = ctx->queued.named.rasterizer->scissor_enable;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!ctx->vs_writes_viewport_index) {
+ struct si_signed_scissor *vp = &ctx->viewports.as_scissor[0];
+
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
+ si_emit_one_scissor(ctx, cs, vp, scissor_enabled ? &states[0] : NULL);
+ si_emit_guardband(ctx, vp);
+ ctx->scissors.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ /* Shaders can draw to any viewport. Make a union of all viewports. */
+ max_vp_scissor = ctx->viewports.as_scissor[0];
+ for (i = 1; i < SI_MAX_VIEWPORTS; i++)
+ si_scissor_make_union(&max_vp_scissor,
+ &ctx->viewports.as_scissor[i]);
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ si_emit_one_scissor(ctx, cs, &ctx->viewports.as_scissor[i],
+ scissor_enabled ? &states[i] : NULL);
+ }
+ }
+ si_emit_guardband(ctx, &max_vp_scissor);
+ ctx->scissors.dirty_mask = 0;
+}
+
+static void si_set_viewport_states(struct pipe_context *pctx,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *state)
+{
+ struct si_context *ctx = (struct si_context *)pctx;
+ unsigned mask;
+ int i;
+
+ for (i = 0; i < num_viewports; i++) {
+ unsigned index = start_slot + i;
+
+ ctx->viewports.states[index] = state[i];
+ si_get_scissor_from_viewport(ctx, &state[i],
+ &ctx->viewports.as_scissor[index]);
+ }
+
+ mask = ((1 << num_viewports) - 1) << start_slot;
+ ctx->viewports.dirty_mask |= mask;
+ ctx->viewports.depth_range_dirty_mask |= mask;
+ ctx->scissors.dirty_mask |= mask;
+ si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+}
+
+static void si_emit_one_viewport(struct si_context *ctx,
+ struct pipe_viewport_state *state)
+{
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+
+ radeon_emit(cs, fui(state->scale[0]));
+ radeon_emit(cs, fui(state->translate[0]));
+ radeon_emit(cs, fui(state->scale[1]));
+ radeon_emit(cs, fui(state->translate[1]));
+ radeon_emit(cs, fui(state->scale[2]));
+ radeon_emit(cs, fui(state->translate[2]));
+}
+
+static void si_emit_viewports(struct si_context *ctx)
+{
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct pipe_viewport_state *states = ctx->viewports.states;
+ unsigned mask = ctx->viewports.dirty_mask;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!ctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
+ si_emit_one_viewport(ctx, &states[0]);
+ ctx->viewports.dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
+ start * 4 * 6, count * 6);
+ for (i = start; i < start+count; i++)
+ si_emit_one_viewport(ctx, &states[i]);
+ }
+ ctx->viewports.dirty_mask = 0;
+}
+
+static inline void
+si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
+ bool window_space_position, float *zmin, float *zmax)
+{
+ if (window_space_position) {
+ *zmin = 0;
+ *zmax = 1;
+ return;
+ }
+ util_viewport_zmin_zmax(vp, halfz, zmin, zmax);
+}
+
+static void si_emit_depth_ranges(struct si_context *ctx)
+{
+ struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+ struct pipe_viewport_state *states = ctx->viewports.states;
+ unsigned mask = ctx->viewports.depth_range_dirty_mask;
+ bool clip_halfz = false;
+ bool window_space = ctx->vs_disables_clipping_viewport;
+ float zmin, zmax;
+
+ if (ctx->queued.named.rasterizer)
+ clip_halfz = ctx->queued.named.rasterizer->clip_halfz;
+
+ /* The simple case: Only 1 viewport is active. */
+ if (!ctx->vs_writes_viewport_index) {
+ if (!(mask & 1))
+ return;
+
+ si_viewport_zmin_zmax(&states[0], clip_halfz, window_space,
+ &zmin, &zmax);
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ ctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
+ return;
+ }
+
+ while (mask) {
+ int start, count, i;
+
+ u_bit_scan_consecutive_range(&mask, &start, &count);
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
+ start * 4 * 2, count * 2);
+ for (i = start; i < start+count; i++) {
+ si_viewport_zmin_zmax(&states[i], clip_halfz, window_space,
+ &zmin, &zmax);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ }
+ }
+ ctx->viewports.depth_range_dirty_mask = 0;
+}
+
+static void si_emit_viewport_states(struct r600_common_context *rctx,
+ struct r600_atom *atom)
+{
+ struct si_context *ctx = (struct si_context *)rctx;
+ si_emit_viewports(ctx);
+ si_emit_depth_ranges(ctx);
+}
+
+/**
+ * This reacts to 2 state changes:
+ * - VS.writes_viewport_index
+ * - VS output position in window space (enable/disable)
+ *
+ * Normally, we only emit 1 viewport and 1 scissor if no shader is using
+ * the VIEWPORT_INDEX output, and emitting the other viewports and scissors
+ * is delayed. When a shader with VIEWPORT_INDEX appears, this should be
+ * called to emit the rest.
+ */
+void si_update_vs_viewport_state(struct si_context *ctx)
+{
+ struct tgsi_shader_info *info = si_get_vs_info(ctx);
+ bool vs_window_space;
+
+ if (!info)
+ return;
+
+ /* When the VS disables clipping and viewport transformation. */
+ vs_window_space =
+ info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+
+ if (ctx->vs_disables_clipping_viewport != vs_window_space) {
+ ctx->vs_disables_clipping_viewport = vs_window_space;
+ ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ ctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+ si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+ }
+
+ /* Viewport index handling. */
+ ctx->vs_writes_viewport_index = info->writes_viewport_index;
+ if (!ctx->vs_writes_viewport_index)
+ return;
+
+ if (ctx->scissors.dirty_mask)
+ si_mark_atom_dirty(ctx, &ctx->scissors.atom);
+
+ if (ctx->viewports.dirty_mask ||
+ ctx->viewports.depth_range_dirty_mask)
+ si_mark_atom_dirty(ctx, &ctx->viewports.atom);
+}
+
+void si_init_viewport_functions(struct si_context *ctx)
+{
+ ctx->scissors.atom.emit = si_emit_scissors;
+ ctx->viewports.atom.emit = si_emit_viewport_states;
+
+ ctx->b.b.set_scissor_states = si_set_scissor_states;
+ ctx->b.b.set_viewport_states = si_set_viewport_states;
+}
diff -Nru mesa-17.2.4/src/gallium/drivers/radeonsi/si_uvd.c mesa-17.3.3/src/gallium/drivers/radeonsi/si_uvd.c
--- mesa-17.2.4/src/gallium/drivers/radeonsi/si_uvd.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/radeonsi/si_uvd.c 2018-01-18 21:30:28.000000000 +0000
@@ -48,7 +48,7 @@
struct radeon_surf *surfaces[VL_NUM_COMPONENTS] = {};
struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {};
const enum pipe_format *resource_formats;
- struct pipe_video_buffer template;
+ struct pipe_video_buffer vidtemplate;
struct pipe_resource templ;
unsigned i, array_size;
@@ -60,34 +60,26 @@
return NULL;
array_size = tmpl->interlaced ? 2 : 1;
- template = *tmpl;
- template.width = align(tmpl->width, VL_MACROBLOCK_WIDTH);
- template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT);
-
- vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_DEFAULT, 0);
- /* TODO: get tiling working */
- templ.bind = PIPE_BIND_LINEAR;
- resources[0] = (struct r600_texture *)
- pipe->screen->resource_create(pipe->screen, &templ);
- if (!resources[0])
- goto error;
-
- if (resource_formats[1] != PIPE_FORMAT_NONE) {
- vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_DEFAULT, 1);
- templ.bind = PIPE_BIND_LINEAR;
- resources[1] = (struct r600_texture *)
- pipe->screen->resource_create(pipe->screen, &templ);
- if (!resources[1])
- goto error;
- }
+ vidtemplate = *tmpl;
+ vidtemplate.width = align(tmpl->width, VL_MACROBLOCK_WIDTH);
+ vidtemplate.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT);
+
+ assert(resource_formats[0] != PIPE_FORMAT_NONE);
- if (resource_formats[2] != PIPE_FORMAT_NONE) {
- vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_DEFAULT, 2);
- templ.bind = PIPE_BIND_LINEAR;
- resources[2] = (struct r600_texture *)
- pipe->screen->resource_create(pipe->screen, &templ);
- if (!resources[2])
- goto error;
+ for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (resource_formats[i] != PIPE_FORMAT_NONE) {
+ vl_video_buffer_template(&templ, &vidtemplate,
+ resource_formats[i], 1,
+ array_size, PIPE_USAGE_DEFAULT, i);
+ /* Set PIPE_BIND_SHARED to avoid reallocation in r600_texture_get_handle,
+ * which can't handle joined surfaces. */
+ /* TODO: get tiling working */
+ templ.bind = PIPE_BIND_LINEAR | PIPE_BIND_SHARED;
+ resources[i] = (struct r600_texture *)
+ pipe->screen->resource_create(pipe->screen, &templ);
+ if (!resources[i])
+ goto error;
+ }
}
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
@@ -98,7 +90,7 @@
pbs[i] = &resources[i]->resource.buf;
}
- rvid_join_surfaces(&ctx->b, pbs, surfaces);
+ si_vid_join_surfaces(&ctx->b, pbs, surfaces);
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!resources[i])
@@ -109,8 +101,8 @@
resources[i]->resource.buf);
}
- template.height *= array_size;
- return vl_video_buffer_create_ex2(pipe, &template, (struct pipe_resource **)resources);
+ vidtemplate.height *= array_size;
+ return vl_video_buffer_create_ex2(pipe, &vidtemplate, (struct pipe_resource **)resources);
error:
for (i = 0; i < VL_NUM_COMPONENTS; ++i)
@@ -131,7 +123,7 @@
msg->body.decode.dt_field_mode = buf->base.interlaced;
- ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface, type);
+ si_uvd_set_dt_surfaces(msg, &luma->surface, (chroma) ? &chroma->surface : NULL, type);
return luma->resource.buf;
}
@@ -159,9 +151,9 @@
struct si_context *ctx = (struct si_context *)context;
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
- if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
- return rvce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+ if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
+ return si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
return (vcn) ? radeon_create_decoder(context, templ) :
- ruvd_create_decoder(context, templ, si_uvd_set_dtb);
+ si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb);
}
diff -Nru mesa-17.2.4/src/gallium/drivers/rbug/Makefile.in mesa-17.3.3/src/gallium/drivers/rbug/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/rbug/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/rbug/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -123,7 +123,8 @@
subdir = src/gallium/drivers/rbug
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -351,9 +352,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -407,6 +408,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -418,12 +421,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -559,6 +565,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/rbug/meson.build mesa-17.3.3/src/gallium/drivers/rbug/meson.build
--- mesa-17.2.4/src/gallium/drivers/rbug/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/rbug/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,28 @@
+# Copyright © 2017 Dylan Baker
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+librbug = static_library(
+ 'rbug',
+ files('rbug_context.c', 'rbug_context.h', 'rbug_core.c', 'rbug_objects.c',
+ 'rbug_objects.h', 'rbug_public.h', 'rbug_screen.c', 'rbug_screen.h'),
+ include_directories : [inc_gallium, inc_gallium_aux, inc_include, inc_src],
+ c_args : [c_vis_args],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/drivers/rbug/rbug_screen.c mesa-17.3.3/src/gallium/drivers/rbug/rbug_screen.c
--- mesa-17.2.4/src/gallium/drivers/rbug/rbug_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/rbug/rbug_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -183,6 +183,19 @@
return result;
}
+static bool
+rbug_screen_check_resource_capability(struct pipe_screen *_screen,
+ struct pipe_resource *_resource,
+ unsigned bind)
+{
+ struct rbug_screen *rb_screen = rbug_screen(_screen);
+ struct rbug_resource *rb_resource = rbug_resource(_resource);
+ struct pipe_screen *screen = rb_screen->screen;
+ struct pipe_resource *resource = rb_resource->resource;
+
+ return screen->check_resource_capability(screen, resource, bind);
+}
+
static boolean
rbug_screen_resource_get_handle(struct pipe_screen *_screen,
struct pipe_context *_pipe,
@@ -301,6 +314,7 @@
rb_screen->base.context_create = rbug_screen_context_create;
rb_screen->base.resource_create = rbug_screen_resource_create;
rb_screen->base.resource_from_handle = rbug_screen_resource_from_handle;
+ SCR_INIT(check_resource_capability);
rb_screen->base.resource_get_handle = rbug_screen_resource_get_handle;
SCR_INIT(resource_changed);
rb_screen->base.resource_destroy = rbug_screen_resource_destroy;
diff -Nru mesa-17.2.4/src/gallium/drivers/softpipe/Makefile.in mesa-17.3.3/src/gallium/drivers/softpipe/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/softpipe/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/softpipe/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -123,7 +123,8 @@
subdir = src/gallium/drivers/softpipe
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -359,9 +360,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -415,6 +416,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -426,12 +429,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -614,6 +620,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/softpipe/meson.build mesa-17.3.3/src/gallium/drivers/softpipe/meson.build
--- mesa-17.2.4/src/gallium/drivers/softpipe/meson.build 1970-01-01 00:00:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/softpipe/meson.build 2018-02-01 16:17:32.000000000 +0000
@@ -0,0 +1,85 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_softpipe = files(
+ 'sp_buffer.c',
+ 'sp_buffer.h',
+ 'sp_clear.c',
+ 'sp_clear.h',
+ 'sp_context.c',
+ 'sp_context.h',
+ 'sp_compute.c',
+ 'sp_draw_arrays.c',
+ 'sp_fence.c',
+ 'sp_fence.h',
+ 'sp_flush.c',
+ 'sp_flush.h',
+ 'sp_fs_exec.c',
+ 'sp_fs.h',
+ 'sp_image.c',
+ 'sp_image.h',
+ 'sp_limits.h',
+ 'sp_prim_vbuf.c',
+ 'sp_prim_vbuf.h',
+ 'sp_public.h',
+ 'sp_quad_blend.c',
+ 'sp_quad_depth_test.c',
+ 'sp_quad_depth_test_tmp.h',
+ 'sp_quad_fs.c',
+ 'sp_quad.h',
+ 'sp_quad_pipe.c',
+ 'sp_quad_pipe.h',
+ 'sp_quad_stipple.c',
+ 'sp_query.c',
+ 'sp_query.h',
+ 'sp_screen.c',
+ 'sp_screen.h',
+ 'sp_setup.c',
+ 'sp_setup.h',
+ 'sp_state_blend.c',
+ 'sp_state_clip.c',
+ 'sp_state_derived.c',
+ 'sp_state_image.c',
+ 'sp_state.h',
+ 'sp_state_rasterizer.c',
+ 'sp_state_sampler.c',
+ 'sp_state_shader.c',
+ 'sp_state_so.c',
+ 'sp_state_surface.c',
+ 'sp_state_vertex.c',
+ 'sp_surface.c',
+ 'sp_surface.h',
+ 'sp_tex_sample.c',
+ 'sp_tex_sample.h',
+ 'sp_tex_tile_cache.c',
+ 'sp_tex_tile_cache.h',
+ 'sp_texture.c',
+ 'sp_texture.h',
+ 'sp_tile_cache.c',
+ 'sp_tile_cache.h',
+)
+
+libsoftpipe = static_library(
+ 'softpipe',
+ files_softpipe,
+ include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src],
+ c_args : [c_vis_args, c_msvc_compat_args],
+ build_by_default : false,
+)
diff -Nru mesa-17.2.4/src/gallium/drivers/softpipe/sp_query.c mesa-17.3.3/src/gallium/drivers/softpipe/sp_query.c
--- mesa-17.2.4/src/gallium/drivers/softpipe/sp_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/softpipe/sp_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -60,9 +60,11 @@
assert(type == PIPE_QUERY_OCCLUSION_COUNTER ||
type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
type == PIPE_QUERY_TIME_ELAPSED ||
type == PIPE_QUERY_SO_STATISTICS ||
type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+ type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
type == PIPE_QUERY_PRIMITIVES_EMITTED ||
type == PIPE_QUERY_PRIMITIVES_GENERATED ||
type == PIPE_QUERY_PIPELINE_STATISTICS ||
@@ -92,6 +94,7 @@
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
sq->start = softpipe->occlusion_count;
break;
case PIPE_QUERY_TIME_ELAPSED:
@@ -102,7 +105,9 @@
sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- sq->end = FALSE;
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written;
+ sq->so.primitives_storage_needed = softpipe->so_stats.primitives_storage_needed;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
sq->so.num_primitives_written = softpipe->so_stats.num_primitives_written;
@@ -144,6 +149,7 @@
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
sq->end = softpipe->occlusion_count;
break;
case PIPE_QUERY_TIMESTAMP:
@@ -153,6 +159,7 @@
sq->end = os_time_get_nano();
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
sq->so.num_primitives_written =
softpipe->so_stats.num_primitives_written - sq->so.num_primitives_written;
sq->so.primitives_storage_needed =
@@ -230,6 +237,7 @@
vresult->b = TRUE;
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
vresult->b = sq->end != 0;
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: {
@@ -247,6 +255,7 @@
*result = sq->so.primitives_storage_needed;
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
vresult->b = sq->end - sq->start != 0;
break;
default:
diff -Nru mesa-17.2.4/src/gallium/drivers/softpipe/sp_screen.c mesa-17.3.3/src/gallium/drivers/softpipe/sp_screen.c
--- mesa-17.2.4/src/gallium/drivers/softpipe/sp_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/softpipe/sp_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -220,6 +220,8 @@
return 31;
case PIPE_CAP_DRAW_INDIRECT:
return 1;
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ return 1;
case PIPE_CAP_VENDOR_ID:
return 0xFFFFFFFF;
@@ -306,6 +308,11 @@
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
return 4;
@@ -455,10 +462,6 @@
* All other operations (sampling, transfer, etc).
*/
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- return util_format_s3tc_enabled;
- }
-
/*
* Everything else should be supported by u_format.
*/
@@ -585,8 +588,6 @@
screen->base.get_compute_param = softpipe_get_compute_param;
screen->use_llvm = debug_get_option_use_llvm();
- util_format_s3tc_init();
-
softpipe_init_screen_texture_funcs(&screen->base);
softpipe_init_screen_fence_funcs(&screen->base);
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/Makefile.in mesa-17.3.3/src/gallium/drivers/svga/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/svga/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/Makefile.in 2018-01-18 21:30:39.000000000 +0000
@@ -124,7 +124,8 @@
subdir = src/gallium/drivers/svga
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -376,9 +377,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -432,6 +433,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -443,12 +446,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -683,6 +689,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_context.h mesa-17.3.3/src/gallium/drivers/svga/svga_context.h
--- mesa-17.2.4/src/gallium/drivers/svga/svga_context.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_context.h 2018-01-18 21:30:28.000000000 +0000
@@ -106,6 +106,7 @@
unsigned need_white_fragments:1;
unsigned independent_blend_enable:1;
unsigned alpha_to_coverage:1;
+ unsigned alpha_to_one:1;
unsigned blend_color_alpha:1; /**< set blend color to alpha value */
/** Per-render target state */
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_draw_arrays.c mesa-17.3.3/src/gallium/drivers/svga/svga_draw_arrays.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_draw_arrays.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_draw_arrays.c 2018-01-18 21:30:28.000000000 +0000
@@ -286,25 +286,25 @@
gen_type,
gen_nr,
gen_size, gen_func, &gen_buf);
- if (ret != PIPE_OK)
- goto done;
+ if (ret == PIPE_OK) {
+ pipe_debug_message(&svga->debug.callback, PERF_INFO,
+ "generating temporary index buffer for drawing %s",
+ u_prim_name(prim));
- pipe_debug_message(&svga->debug.callback, PERF_INFO,
- "generating temporary index buffer for drawing %s",
- u_prim_name(prim));
+ ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
+ gen_buf,
+ gen_size,
+ start,
+ 0,
+ count - 1,
+ gen_prim, 0, gen_nr,
+ start_instance,
+ instance_count);
+ }
- ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
- gen_buf,
- gen_size,
- start,
- 0,
- count - 1,
- gen_prim, 0, gen_nr,
- start_instance,
- instance_count);
-done:
- if (gen_buf)
+ if (gen_buf) {
pipe_resource_reference(&gen_buf, NULL);
+ }
}
SVGA_STATS_TIME_POP(svga_sws(svga));
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_draw.c mesa-17.3.3/src/gallium/drivers/svga/svga_draw.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_draw.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_draw.c 2018-01-18 21:30:28.000000000 +0000
@@ -489,6 +489,24 @@
}
+/**
+ * A helper function to compare vertex buffers.
+ * They are equal if the vertex buffer attributes and the vertex buffer
+ * resources are identical.
+ */
+static boolean
+vertex_buffers_equal(unsigned count,
+ SVGA3dVertexBuffer *pVBufAttr1,
+ struct pipe_resource **pVBuf1,
+ SVGA3dVertexBuffer *pVBufAttr2,
+ struct pipe_resource **pVBuf2)
+{
+ return (memcmp(pVBufAttr1, pVBufAttr2,
+ count * sizeof(*pVBufAttr1)) == 0) &&
+ (memcmp(pVBuf1, pVBuf2, count * sizeof(*pVBuf1)) == 0);
+}
+
+
static enum pipe_error
draw_vgpu10(struct svga_hwtnl *hwtnl,
const SVGA3dPrimitiveRange *range,
@@ -571,12 +589,12 @@
if (ib) {
struct svga_buffer *sbuf = svga_buffer(ib);
- assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER);
- (void) sbuf; /* silence unused var warning */
-
ib_handle = svga_buffer_handle(svga, ib, PIPE_BIND_INDEX_BUFFER);
if (!ib_handle)
return PIPE_ERROR_OUT_OF_MEMORY;
+
+ assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_INDEX_BUFFER);
+ (void) sbuf; /* silence unused var warning */
}
else {
ib_handle = NULL;
@@ -607,10 +625,11 @@
*/
if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) == 0) ||
vbuf_count != svga->state.hw_draw.num_vbuffers ||
- memcmp(vbuffer_attrs, svga->state.hw_draw.vbuffer_attrs,
- vbuf_count * sizeof(vbuffer_attrs[0])) ||
- memcmp(vbuffers, svga->state.hw_draw.vbuffers,
- vbuf_count * sizeof(vbuffers[0]))) {
+ !vertex_buffers_equal(vbuf_count,
+ vbuffer_attrs,
+ vbuffers,
+ svga->state.hw_draw.vbuffer_attrs,
+ svga->state.hw_draw.vbuffers)) {
unsigned num_vbuffers;
@@ -631,13 +650,52 @@
}
if (num_vbuffers > 0) {
-
- ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc, num_vbuffers,
- 0, /* startBuffer */
- vbuffer_attrs,
- vbuffer_handles);
- if (ret != PIPE_OK)
- return ret;
+ SVGA3dVertexBuffer *pbufAttrs = vbuffer_attrs;
+ struct svga_winsys_surface **pbufHandles = vbuffer_handles;
+ unsigned numVBuf = 0;
+
+ /* Loop through the vertex buffer lists to only emit
+ * those vertex buffers that are not already in the
+ * corresponding entries in the device's vertex buffer list.
+ */
+ for (i = 0; i < num_vbuffers; i++) {
+ boolean emit;
+
+ emit = vertex_buffers_equal(1,
+ &vbuffer_attrs[i],
+ &vbuffers[i],
+ &svga->state.hw_draw.vbuffer_attrs[i],
+ &svga->state.hw_draw.vbuffers[i]);
+
+ if (!emit && i == num_vbuffers-1) {
+ /* Include the last vertex buffer in the next emit
+ * if it is different.
+ */
+ emit = TRUE;
+ numVBuf++;
+ i++;
+ }
+
+ if (emit) {
+ /* numVBuf can only be 0 if the first vertex buffer
+ * is the same as the one in the device's list.
+ * In this case, there is nothing to send yet.
+ */
+ if (numVBuf) {
+ ret = SVGA3D_vgpu10_SetVertexBuffers(svga->swc,
+ numVBuf,
+ i - numVBuf,
+ pbufAttrs, pbufHandles);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ pbufAttrs += (numVBuf + 1);
+ pbufHandles += (numVBuf + 1);
+ numVBuf = 0;
+ }
+ else
+ numVBuf++;
+ }
/* save the number of vertex buffers sent to the device, not
* including trailing unbound vertex buffers.
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_draw_elements.c mesa-17.3.3/src/gallium/drivers/svga/svga_draw_elements.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_draw_elements.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_draw_elements.c 2018-01-18 21:30:28.000000000 +0000
@@ -242,21 +242,21 @@
prim, gen_prim,
count, gen_nr, gen_size,
gen_func, &gen_buf);
- if (ret != PIPE_OK)
- goto done;
+ if (ret == PIPE_OK) {
+ ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
+ gen_buf,
+ gen_size,
+ index_bias,
+ min_index,
+ max_index,
+ gen_prim, 0, gen_nr,
+ start_instance,
+ instance_count);
+ }
- ret = svga_hwtnl_simple_draw_range_elements(hwtnl,
- gen_buf,
- gen_size,
- index_bias,
- min_index,
- max_index,
- gen_prim, 0, gen_nr,
- start_instance,
- instance_count);
-done:
- if (gen_buf)
+ if (gen_buf) {
pipe_resource_reference(&gen_buf, NULL);
+ }
}
SVGA_STATS_TIME_POP(svga_sws(hwtnl->svga));
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_draw_private.h mesa-17.3.3/src/gallium/drivers/svga/svga_draw_private.h
--- mesa-17.2.4/src/gallium/drivers/svga/svga_draw_private.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_draw_private.h 2018-01-18 21:30:28.000000000 +0000
@@ -42,7 +42,7 @@
* handled by the svga device. Other types will be converted to
* these types by the index/translation code.
*/
-static const unsigned svga_hw_prims =
+static const unsigned svga_hw_prims =
((1 << PIPE_PRIM_POINTS) |
(1 << PIPE_PRIM_LINES) |
(1 << PIPE_PRIM_LINE_STRIP) |
@@ -73,23 +73,23 @@
case PIPE_PRIM_LINES:
*prim_count = vcount / 2;
- return SVGA3D_PRIMITIVE_LINELIST;
+ return SVGA3D_PRIMITIVE_LINELIST;
case PIPE_PRIM_LINE_STRIP:
*prim_count = vcount - 1;
- return SVGA3D_PRIMITIVE_LINESTRIP;
+ return SVGA3D_PRIMITIVE_LINESTRIP;
case PIPE_PRIM_TRIANGLES:
*prim_count = vcount / 3;
- return SVGA3D_PRIMITIVE_TRIANGLELIST;
+ return SVGA3D_PRIMITIVE_TRIANGLELIST;
case PIPE_PRIM_TRIANGLE_STRIP:
*prim_count = vcount - 2;
- return SVGA3D_PRIMITIVE_TRIANGLESTRIP;
+ return SVGA3D_PRIMITIVE_TRIANGLESTRIP;
case PIPE_PRIM_TRIANGLE_FAN:
*prim_count = vcount - 2;
- return SVGA3D_PRIMITIVE_TRIANGLEFAN;
+ return SVGA3D_PRIMITIVE_TRIANGLEFAN;
case PIPE_PRIM_LINES_ADJACENCY:
*prim_count = vcount / 4;
@@ -119,8 +119,7 @@
u_generate_func generate;
unsigned gen_nr;
- /* If non-null, this buffer is filled by calling
- * generate(nr, map(buffer))
+ /* If non-null, this buffer is filled by calling generate(nr, map(buffer))
*/
struct pipe_resource *buffer;
};
@@ -160,7 +159,7 @@
* vertex buffers.
*/
int index_bias;
-
+
/* Provoking vertex information (for flat shading). */
unsigned api_pv; /**< app-requested PV mode (PV_FIRST or PV_LAST) */
unsigned hw_pv; /**< device-supported PV mode (PV_FIRST or PV_LAST) */
@@ -220,27 +219,26 @@
}
-enum pipe_error
-svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
- const SVGA3dPrimitiveRange *range,
- unsigned vcount,
- unsigned min_index,
- unsigned max_index,
- struct pipe_resource *ib,
- unsigned start_instance, unsigned instance_count);
-
enum pipe_error
-svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
- struct pipe_resource *indexBuffer,
- unsigned index_size,
- int index_bias,
- unsigned min_index,
- unsigned max_index,
- enum pipe_prim_type prim,
- unsigned start,
- unsigned count,
- unsigned start_instance,
- unsigned instance_count);
+svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
+ const SVGA3dPrimitiveRange *range,
+ unsigned vcount,
+ unsigned min_index,
+ unsigned max_index,
+ struct pipe_resource *ib,
+ unsigned start_instance, unsigned instance_count);
+enum pipe_error
+svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
+ struct pipe_resource *indexBuffer,
+ unsigned index_size,
+ int index_bias,
+ unsigned min_index,
+ unsigned max_index,
+ enum pipe_prim_type prim,
+ unsigned start,
+ unsigned count,
+ unsigned start_instance,
+ unsigned instance_count);
#endif
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_format.c mesa-17.3.3/src/gallium/drivers/svga/svga_format.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_format.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_format.c 2018-01-18 21:30:28.000000000 +0000
@@ -49,6 +49,11 @@
const SVGA3dSurfaceFormat *compat_format;
};
+
+/**
+ * Table mapping Gallium formats to SVGA3d vertex/pixel formats.
+ * Note: the table is ordered according to PIPE_FORMAT_x order.
+ */
static const struct vgpu10_format_entry format_conversion_table[] =
{
/* Gallium format SVGA3D vertex format SVGA3D pixel format Flags */
@@ -360,6 +365,9 @@
{ PIPE_FORMAT_ASTC_12x10_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_ASTC_12x12_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
{ PIPE_FORMAT_P016, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_R10G10B10X2_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_A1B5G5R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ { PIPE_FORMAT_X1B5G5R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
};
@@ -1826,9 +1834,9 @@
case SVGA3D_R32G32_FLOAT:
return SVGA3D_R32G32_TYPELESS;
case SVGA3D_D32_FLOAT_S8X24_UINT:
- return SVGA3D_R32G8X24_TYPELESS;
case SVGA3D_X32_G8X24_UINT:
- return SVGA3D_R32_FLOAT_X8X24;
+ case SVGA3D_R32G8X24_TYPELESS:
+ return SVGA3D_R32G8X24_TYPELESS;
case SVGA3D_R10G10B10A2_UINT:
case SVGA3D_R10G10B10A2_UNORM:
return SVGA3D_R10G10B10A2_TYPELESS;
@@ -1837,6 +1845,7 @@
case SVGA3D_R8G8B8A8_UNORM_SRGB:
case SVGA3D_R8G8B8A8_UINT:
case SVGA3D_R8G8B8A8_SINT:
+ case SVGA3D_R8G8B8A8_TYPELESS:
return SVGA3D_R8G8B8A8_TYPELESS;
case SVGA3D_R16G16_UINT:
case SVGA3D_R16G16_SINT:
@@ -1848,8 +1857,10 @@
case SVGA3D_R32_FLOAT:
case SVGA3D_R32_UINT:
case SVGA3D_R32_SINT:
+ case SVGA3D_R32_TYPELESS:
return SVGA3D_R32_TYPELESS;
case SVGA3D_D24_UNORM_S8_UINT:
+ case SVGA3D_R24G8_TYPELESS:
return SVGA3D_R24G8_TYPELESS;
case SVGA3D_X24_G8_UINT:
return SVGA3D_R24_UNORM_X8;
@@ -1864,6 +1875,7 @@
case SVGA3D_R16_SNORM:
case SVGA3D_R16_SINT:
case SVGA3D_R16_FLOAT:
+ case SVGA3D_R16_TYPELESS:
return SVGA3D_R16_TYPELESS;
case SVGA3D_R8_UNORM:
case SVGA3D_R8_UINT:
@@ -1872,18 +1884,23 @@
return SVGA3D_R8_TYPELESS;
case SVGA3D_B8G8R8A8_UNORM_SRGB:
case SVGA3D_B8G8R8A8_UNORM:
+ case SVGA3D_B8G8R8A8_TYPELESS:
return SVGA3D_B8G8R8A8_TYPELESS;
case SVGA3D_B8G8R8X8_UNORM_SRGB:
case SVGA3D_B8G8R8X8_UNORM:
+ case SVGA3D_B8G8R8X8_TYPELESS:
return SVGA3D_B8G8R8X8_TYPELESS;
case SVGA3D_BC1_UNORM:
case SVGA3D_BC1_UNORM_SRGB:
+ case SVGA3D_BC1_TYPELESS:
return SVGA3D_BC1_TYPELESS;
case SVGA3D_BC2_UNORM:
case SVGA3D_BC2_UNORM_SRGB:
+ case SVGA3D_BC2_TYPELESS:
return SVGA3D_BC2_TYPELESS;
case SVGA3D_BC3_UNORM:
case SVGA3D_BC3_UNORM_SRGB:
+ case SVGA3D_BC3_TYPELESS:
return SVGA3D_BC3_TYPELESS;
case SVGA3D_BC4_UNORM:
case SVGA3D_BC4_SNORM:
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_pipe_blend.c mesa-17.3.3/src/gallium/drivers/svga/svga_pipe_blend.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_pipe_blend.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_pipe_blend.c 2018-01-18 21:30:28.000000000 +0000
@@ -331,6 +331,7 @@
blend->independent_blend_enable = templ->independent_blend_enable;
blend->alpha_to_coverage = templ->alpha_to_coverage;
+ blend->alpha_to_one = templ->alpha_to_one;
if (svga_have_vgpu10(svga)) {
define_blend_state_object(svga, blend);
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_pipe_query.c mesa-17.3.3/src/gallium/drivers/svga/svga_pipe_query.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_pipe_query.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_pipe_query.c 2018-01-18 21:30:28.000000000 +0000
@@ -707,6 +707,7 @@
}
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
if (svga_have_vgpu10(svga)) {
sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
@@ -789,6 +790,7 @@
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
if (svga_have_vgpu10(svga)) {
/* make sure to also destroy any associated predicate query */
if (sq->predicate)
@@ -864,6 +866,7 @@
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
if (svga_have_vgpu10(svga)) {
ret = begin_query_vgpu10(svga, sq);
/* also need to start the associated occlusion predicate query */
@@ -977,6 +980,7 @@
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
if (svga_have_vgpu10(svga)) {
ret = end_query_vgpu10(svga, sq);
/* also need to end the associated occlusion predicate query */
@@ -1093,7 +1097,8 @@
ret = get_query_result_vgpu9(svga, sq, wait, result);
}
break;
- case PIPE_QUERY_OCCLUSION_PREDICATE: {
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
if (svga_have_vgpu10(svga)) {
SVGADXOcclusionPredicateQueryResult occResult;
ret = get_query_result_vgpu10(svga, sq, wait,
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_resource_texture.c mesa-17.3.3/src/gallium/drivers/svga/svga_resource_texture.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_resource_texture.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_resource_texture.c 2018-01-18 21:30:28.000000000 +0000
@@ -985,13 +985,16 @@
tex->key.arraySize = 1;
tex->key.numFaces = 1;
- /* single sample texture can be treated as non-multisamples texture */
- tex->key.sampleCount = template->nr_samples > 1 ? template->nr_samples : 0;
-
- if (template->nr_samples > 1) {
+ /* nr_samples=1 must be treated as a non-multisample texture */
+ if (tex->b.b.nr_samples == 1) {
+ tex->b.b.nr_samples = 0;
+ }
+ else if (tex->b.b.nr_samples > 1) {
tex->key.flags |= SVGA3D_SURFACE_MASKABLE_ANTIALIAS;
}
+ tex->key.sampleCount = tex->b.b.nr_samples;
+
if (svgascreen->sws->have_vgpu10) {
switch (template->target) {
case PIPE_TEXTURE_1D:
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_screen.c mesa-17.3.3/src/gallium/drivers/svga/svga_screen.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_screen.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_screen.c 2018-01-18 21:30:28.000000000 +0000
@@ -49,10 +49,6 @@
/* NOTE: this constant may get moved into a svga3d*.h header file */
#define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024)
-#ifndef MESA_GIT_SHA1
-#define MESA_GIT_SHA1 "(unknown git revision)"
-#endif
-
#ifdef DEBUG
int SVGA_DEBUG = 0;
@@ -340,8 +336,10 @@
case PIPE_CAP_NATIVE_FENCE_FD:
return sws->have_fence_fd;
- /* Unsupported features */
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ return 1;
+
+ /* Unsupported features */
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
@@ -450,6 +448,12 @@
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+ case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+ case PIPE_CAP_QUERY_SO_OVERFLOW:
+ case PIPE_CAP_MEMOBJ:
+ case PIPE_CAP_LOAD_CONSTBUF:
+ case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+ case PIPE_CAP_TILE_RASTER_ORDER:
return 0;
}
@@ -496,12 +500,12 @@
val = get_uint_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, 32);
return MIN2(val, SVGA3D_TEMPREG_MAX);
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
- /*
- * Although PS 3.0 has some addressing abilities it can only represent
- * loops that can be statically determined and unrolled. Given we can
- * only handle a subset of the cases that the state tracker already
- * does it is better to defer loop unrolling to the state tracker.
- */
+ /*
+ * Although PS 3.0 has some addressing abilities it can only represent
+ * loops that can be statically determined and unrolled. Given we can
+ * only handle a subset of the cases that the state tracker already
+ * does it is better to defer loop unrolling to the state tracker.
+ */
return 0;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 0;
@@ -513,8 +517,11 @@
return 0;
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_INTEGERS:
return 0;
+ case PIPE_SHADER_CAP_FP16:
+ return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16;
@@ -524,6 +531,7 @@
return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
@@ -574,8 +582,11 @@
return 1;
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_INTEGERS:
return 0;
+ case PIPE_SHADER_CAP_FP16:
+ return 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 0;
@@ -585,6 +596,7 @@
return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
@@ -671,21 +683,25 @@
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INTEGERS:
return TRUE;
+ case PIPE_SHADER_CAP_FP16:
+ return FALSE;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return SVGA3D_DX_MAX_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
- return 0;
+ return 0;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+ case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+ case PIPE_SHADER_CAP_INT64_ATOMICS:
return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
@@ -744,8 +760,10 @@
return FALSE;
}
- /* we don't support sRGB rendering into display targets */
- if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) {
+ if (!ss->sws->have_vgpu10 &&
+ util_format_is_srgb(format) &&
+ (bindings & PIPE_BIND_DISPLAY_TARGET)) {
+ /* We only support sRGB rendering with vgpu10 */
return FALSE;
}
@@ -776,6 +794,9 @@
case SVGA3D_B8G8R8A8_UNORM:
case SVGA3D_B8G8R8X8_UNORM:
case SVGA3D_B5G6R5_UNORM:
+ case SVGA3D_B8G8R8X8_UNORM_SRGB:
+ case SVGA3D_B8G8R8A8_UNORM_SRGB:
+ case SVGA3D_R8G8B8A8_UNORM_SRGB:
break;
/* Often unsupported/problematic. This means we end up with the same
@@ -784,12 +805,12 @@
case SVGA3D_A4R4G4B4:
case SVGA3D_A1R5G5B5:
return FALSE;
-
+
default:
return FALSE;
}
}
-
+
/*
* Query the host capabilities.
*/
@@ -959,7 +980,11 @@
svga_host_log(host_log);
util_snprintf(host_log, sizeof(host_log) - strlen(log_prefix),
- "%s%s (%s)", log_prefix, PACKAGE_VERSION, MESA_GIT_SHA1);
+ "%s%s"
+#ifdef MESA_GIT_SHA1
+ " (" MESA_GIT_SHA1 ")"
+#endif
+ , log_prefix, PACKAGE_VERSION);
svga_host_log(host_log);
/* If the SVGA_EXTRA_LOGGING env var is set, log the process's command
@@ -980,14 +1005,14 @@
svga_destroy_screen( struct pipe_screen *screen )
{
struct svga_screen *svgascreen = svga_screen(screen);
-
+
svga_screen_cache_cleanup(svgascreen);
mtx_destroy(&svgascreen->swc_mutex);
mtx_destroy(&svgascreen->tex_mutex);
svgascreen->sws->destroy(svgascreen->sws);
-
+
FREE(svgascreen);
}
@@ -1196,12 +1221,14 @@
return NULL;
}
+
struct svga_winsys_screen *
svga_winsys_screen(struct pipe_screen *screen)
{
return svga_screen(screen)->sws;
}
+
#ifdef DEBUG
struct svga_screen *
svga_screen(struct pipe_screen *screen)
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_screen_cache.c mesa-17.3.3/src/gallium/drivers/svga/svga_screen_cache.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_screen_cache.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_screen_cache.c 2018-01-18 21:30:28.000000000 +0000
@@ -69,7 +69,7 @@
total_size += img_size;
}
- total_size *= key->numFaces * key->arraySize;
+ total_size *= key->numFaces * key->arraySize * MAX2(1, key->sampleCount);
return total_size;
}
@@ -364,7 +364,7 @@
* It will be done using the current context.
*/
if (svga->swc->surface_invalidate(svga->swc, entry->handle) != PIPE_OK) {
- enum pipe_error ret;
+ MAYBE_UNUSED enum pipe_error ret;
/* Even though surface invalidation here is done after the command
* buffer is flushed, it is still possible that it will
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_shader.h mesa-17.3.3/src/gallium/drivers/svga/svga_shader.h
--- mesa-17.2.4/src/gallium/drivers/svga/svga_shader.h 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_shader.h 2018-01-18 21:30:28.000000000 +0000
@@ -77,6 +77,7 @@
unsigned light_twoside:1;
unsigned front_ccw:1;
unsigned white_fragments:1;
+ unsigned alpha_to_one:1;
unsigned flatshade:1;
unsigned pstipple:1;
unsigned alpha_func:4; /**< SVGA3D_CMP_x */
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_state_framebuffer.c mesa-17.3.3/src/gallium/drivers/svga/svga_state_framebuffer.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_state_framebuffer.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_state_framebuffer.c 2018-01-18 21:30:28.000000000 +0000
@@ -554,23 +554,13 @@
}
}
else {
- switch (svga->curr.reduced_prim) {
- case PIPE_PRIM_POINTS:
- adjust_x = -0.375;
- adjust_y = -0.75;
- break;
- case PIPE_PRIM_LINES:
- adjust_x = -0.5;
- adjust_y = -0.125;
- break;
- case PIPE_PRIM_TRIANGLES:
- adjust_x = -0.5;
- adjust_y = -0.5;
- break;
- default:
- /* nothing */
- break;
- }
+ /* Use (-0.5, -0.5) bias for all prim types.
+ * Regarding line rasterization, this does not seem to satisfy
+ * the Piglit gl-1.0-ortho-pos test but it generally produces
+ * results identical or very similar to VGPU10.
+ */
+ adjust_x = -0.5;
+ adjust_y = -0.5;
}
if (invertY)
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_state_fs.c mesa-17.3.3/src/gallium/drivers/svga/svga_state_fs.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_state_fs.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_state_fs.c 2018-01-18 21:30:28.000000000 +0000
@@ -25,6 +25,7 @@
#include "util/u_inlines.h"
#include "pipe/p_defines.h"
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_bitmask.h"
@@ -232,9 +233,9 @@
*
* SVGA_NEW_BLEND
*/
- if (svga->curr.blend->need_white_fragments) {
- key->fs.white_fragments = 1;
- }
+ key->fs.white_fragments = svga->curr.blend->need_white_fragments;
+
+ key->fs.alpha_to_one = svga->curr.blend->alpha_to_one;
#ifdef DEBUG
/*
@@ -349,9 +350,10 @@
}
}
- /* SVGA_NEW_FRAME_BUFFER */
- if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
- /* Replicate color0 output to N colorbuffers */
+ /* SVGA_NEW_FRAME_BUFFER | SVGA_NEW_BLEND */
+ if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] ||
+ svga->curr.blend->need_white_fragments) {
+ /* Replicate color0 output (or white) to N colorbuffers */
key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs;
}
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_state_sampler.c mesa-17.3.3/src/gallium/drivers/svga/svga_state_sampler.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_state_sampler.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_state_sampler.c 2018-01-18 21:30:28.000000000 +0000
@@ -278,14 +278,54 @@
if (count != svga->state.hw_draw.num_sampler_views[shader] ||
memcmp(sampler_views, svga->state.hw_draw.sampler_views[shader],
count * sizeof(sampler_views[0])) != 0) {
- ret = SVGA3D_vgpu10_SetShaderResources(svga->swc,
- svga_shader_type(shader),
- 0, /* startView */
- nviews,
- ids,
- surfaces);
- if (ret != PIPE_OK)
- return ret;
+ SVGA3dShaderResourceViewId *pIds = ids;
+ struct svga_winsys_surface **pSurf = surfaces;
+ unsigned numSR = 0;
+
+ /* Loop through the sampler view list to only emit
+ * the sampler views that are not already in the
+ * corresponding entries in the device's
+ * shader resource list.
+ */
+ for (i = 0; i < nviews; i++) {
+ boolean emit;
+
+ emit = sampler_views[i] ==
+ svga->state.hw_draw.sampler_views[shader][i];
+
+ if (!emit && i == nviews-1) {
+ /* Include the last sampler view in the next emit
+ * if it is different.
+ */
+ emit = TRUE;
+ numSR++;
+ i++;
+ }
+
+ if (emit) {
+ /* numSR can only be 0 if the first entry of the list
+ * is the same as the one in the device list.
+ * In this case, * there is nothing to send yet.
+ */
+ if (numSR) {
+ ret = SVGA3D_vgpu10_SetShaderResources(
+ svga->swc,
+ svga_shader_type(shader),
+ i - numSR, /* startView */
+ numSR,
+ pIds,
+ pSurf);
+
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ pIds += (numSR + 1);
+ pSurf += (numSR + 1);
+ numSR = 0;
+ }
+ else
+ numSR++;
+ }
/* Save referenced sampler views in the hw draw state. */
svga->state.hw_draw.num_sampler_views[shader] = count;
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_tgsi.c mesa-17.3.3/src/gallium/drivers/svga/svga_tgsi.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_tgsi.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_tgsi.c 2018-01-18 21:30:28.000000000 +0000
@@ -209,6 +209,12 @@
goto fail;
}
+ if (emit.info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ debug_printf(
+ "svga: indirect indexing of temporary registers is not supported.\n");
+ goto fail;
+ }
+
emit.in_main_func = TRUE;
if (!svga_shader_emit_header(&emit)) {
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_tgsi_insn.c mesa-17.3.3/src/gallium/drivers/svga/svga_tgsi_insn.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_tgsi_insn.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_tgsi_insn.c 2018-01-18 21:30:28.000000000 +0000
@@ -44,7 +44,6 @@
{
switch (opcode) {
case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
- case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
@@ -1238,39 +1237,6 @@
/**
- * Translate the following TGSI DPH instruction.
- * DPH DST, SRC1, SRC2
- * To the following SVGA3D instruction sequence.
- * DP3 TMP, SRC1, SRC2
- * ADD DST, TMP, SRC2.wwww
- */
-static boolean
-emit_dph(struct svga_shader_emitter *emit,
- const struct tgsi_full_instruction *insn )
-{
- SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
- const struct src_register src0 = translate_src_register(
- emit, &insn->Src[0] );
- struct src_register src1 =
- translate_src_register(emit, &insn->Src[1]);
- SVGA3dShaderDestToken temp = get_temp( emit );
-
- /* DP3 TMP, SRC1, SRC2 */
- if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
- return FALSE;
-
- src1 = scalar(src1, TGSI_SWIZZLE_W);
-
- /* ADD DST, TMP, SRC2.wwww */
- if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
- src( temp ), src1 ))
- return FALSE;
-
- return TRUE;
-}
-
-
-/**
* Sine / Cosine helper function.
*/
static boolean
@@ -1284,29 +1250,6 @@
/**
- * Translate/emit a TGSI SIN, COS or CSC instruction.
- */
-static boolean
-emit_sincos(struct svga_shader_emitter *emit,
- const struct tgsi_full_instruction *insn)
-{
- SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
- struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
- SVGA3dShaderDestToken temp = get_temp( emit );
-
- /* SCS TMP SRC */
- if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
- return FALSE;
-
- /* MOV DST TMP */
- if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
- return FALSE;
-
- return TRUE;
-}
-
-
-/**
* Translate TGSI SIN instruction into:
* SCS TMP SRC
* MOV DST TMP.yyyy
@@ -2230,63 +2173,6 @@
/**
- * Translate/emit TGSI XPD (vector cross product) instruction.
- */
-static boolean
-emit_xpd(struct svga_shader_emitter *emit,
- const struct tgsi_full_instruction *insn)
-{
- SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
- const struct src_register src0 = translate_src_register(
- emit, &insn->Src[0] );
- const struct src_register src1 = translate_src_register(
- emit, &insn->Src[1] );
- boolean need_dst_tmp = FALSE;
-
- /* XPD can only output to a temporary */
- if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
- need_dst_tmp = TRUE;
-
- /* The dst reg must not be the same as src0 or src1*/
- if (alias_src_dst(src0, dst) ||
- alias_src_dst(src1, dst))
- need_dst_tmp = TRUE;
-
- if (need_dst_tmp) {
- SVGA3dShaderDestToken tmp = get_temp( emit );
-
- /* Obey DX9 restrictions on mask:
- */
- tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
-
- if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
- return FALSE;
-
- if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
- return FALSE;
- }
- else {
- if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
- return FALSE;
- }
-
- /* Need to emit 1.0 to dst.w?
- */
- if (dst.mask & TGSI_WRITEMASK_W) {
- struct src_register one = get_one_immediate( emit );
-
- if (!submit_op1(emit,
- inst_token( SVGA3DOP_MOV ),
- writemask(dst, TGSI_WRITEMASK_W),
- one))
- return FALSE;
- }
-
- return TRUE;
-}
-
-
-/**
* Emit a LRP (linear interpolation) instruction.
*/
static boolean
@@ -2925,18 +2811,12 @@
case TGSI_OPCODE_DP2:
return emit_dp2( emit, insn );
- case TGSI_OPCODE_DPH:
- return emit_dph( emit, insn );
-
case TGSI_OPCODE_COS:
return emit_cos( emit, insn );
case TGSI_OPCODE_SIN:
return emit_sin( emit, insn );
- case TGSI_OPCODE_SCS:
- return emit_sincos( emit, insn );
-
case TGSI_OPCODE_END:
/* TGSI always finishes the main func with an END */
return emit_end( emit );
@@ -3023,9 +2903,6 @@
case TGSI_OPCODE_BRK:
return emit_brk( emit, insn );
- case TGSI_OPCODE_XPD:
- return emit_xpd( emit, insn );
-
case TGSI_OPCODE_KILL:
return emit_kill( emit, insn );
@@ -3641,7 +3518,6 @@
emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
- emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
return TRUE;
diff -Nru mesa-17.2.4/src/gallium/drivers/svga/svga_tgsi_vgpu10.c mesa-17.3.3/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
--- mesa-17.2.4/src/gallium/drivers/svga/svga_tgsi_vgpu10.c 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/svga/svga_tgsi_vgpu10.c 2018-01-18 21:30:28.000000000 +0000
@@ -134,6 +134,7 @@
/* Samplers */
unsigned num_samplers;
+ boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/
ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
@@ -167,8 +168,8 @@
/* For fragment shaders only */
struct {
- /* apha test */
unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
+ unsigned num_color_outputs;
unsigned color_tmp_index; /**< fake/temp color output reg */
unsigned alpha_ref_index; /**< immediate constant for alpha ref */
@@ -578,8 +579,6 @@
return VGPU10_OPCODE_RET;
case TGSI_OPCODE_NOP:
return VGPU10_OPCODE_NOP;
- case TGSI_OPCODE_BREAKC:
- return VGPU10_OPCODE_BREAKC;
case TGSI_OPCODE_END:
return VGPU10_OPCODE_RET;
case TGSI_OPCODE_F2I:
@@ -1871,9 +1870,9 @@
/**
* Translate a TGSI property to VGPU10.
- * Don't emit any instructions yet, only need to gather the primitive property information.
- * The output primitive topology might be changed later. The final property instructions
- * will be emitted as part of the pre-helper code.
+ * Don't emit any instructions yet, only need to gather the primitive property
+ * information. The output primitive topology might be changed later. The
+ * final property instructions will be emitted as part of the pre-helper code.
*/
static boolean
emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
@@ -2324,6 +2323,7 @@
emit->sampler_target[unit] = decl->SamplerView.Resource;
/* Note: we can ignore YZW return types for now */
emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
+ emit->sampler_view[unit] = TRUE;
}
return TRUE;
@@ -2422,7 +2422,9 @@
selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
name = VGPU10_NAME_UNDEFINED;
- /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
+ /* all geometry shader inputs are two dimensional except
+ * gl_PrimitiveID
+ */
dim = VGPU10_OPERAND_INDEX_2D;
if (semantic_name == TGSI_SEMANTIC_PRIMID) {
@@ -2499,6 +2501,9 @@
emit->fs.color_out_index[semantic_index] = index;
+ emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
+ index + 1);
+
/* The semantic index is the shader's color output/buffer index */
emit_output_declaration(emit,
VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
@@ -2521,6 +2526,9 @@
VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
emit->info.output_semantic_index[idx] = j;
}
+
+ emit->fs.num_color_outputs =
+ emit->key.fs.write_color0_to_n_cbufs;
}
}
else {
@@ -2707,7 +2715,6 @@
}
else if (emit->unit == PIPE_SHADER_FRAGMENT) {
if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
- emit->key.fs.white_fragments ||
emit->key.fs.write_color0_to_n_cbufs > 1) {
/* Allocate a temp to hold the output color */
emit->fs.color_tmp_index = total_temps;
@@ -2883,14 +2890,17 @@
for (i = 0; i < emit->num_samplers; i++) {
- /* Texcoord scale factors for RECT textures */
- if (emit->key.tex[i].unnormalized) {
- emit->texcoord_scale_index[i] = total_consts++;
- }
+ if (emit->sampler_view[i]) {
+
+ /* Texcoord scale factors for RECT textures */
+ if (emit->key.tex[i].unnormalized) {
+ emit->texcoord_scale_index[i] = total_consts++;
+ }
- /* Texture buffer sizes */
- if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
- emit->texture_buffer_size_index[i] = total_consts++;
+ /* Texture buffer sizes */
+ if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
+ emit->texture_buffer_size_index[i] = total_consts++;
+ }
}
}
@@ -3573,89 +3583,6 @@
/**
- * Emit code for TGSI_OPCODE_DP2A instruction.
- */
-static boolean
-emit_dp2a(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
- * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
- * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
- * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
- * Translate into
- * MAD tmp.x, s0.y, s1.y, s2.x
- * MAD tmp.x, s0.x, s1.x, tmp.x
- * MOV dst.xyzw, tmp.xxxx
- */
- unsigned tmp = get_temp_index(emit);
- struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
- struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
-
- struct tgsi_full_src_register tmp_src_xxxx =
- scalar_src(&tmp_src, TGSI_SWIZZLE_X);
- struct tgsi_full_dst_register tmp_dst_x =
- writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
-
- struct tgsi_full_src_register src0_xxxx =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register src0_yyyy =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register src1_xxxx =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register src1_yyyy =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register src2_xxxx =
- scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
-
- emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
- &src1_yyyy, &src2_xxxx, FALSE);
- emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
- &src1_xxxx, &tmp_src_xxxx, FALSE);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
- &tmp_src_xxxx, inst->Instruction.Saturate);
-
- free_temp_indexes(emit);
-
- return TRUE;
-}
-
-
-/**
- * Emit code for TGSI_OPCODE_DPH instruction.
- */
-static boolean
-emit_dph(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /*
- * DP3 tmp, s0, s1
- * ADD dst, tmp, s1.wwww
- */
-
- struct tgsi_full_src_register s1_wwww =
- swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
- TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
-
- unsigned tmp = get_temp_index(emit);
- struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
- struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
-
- /* DP3 tmp, s0, s1 */
- emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
- &inst->Src[1], FALSE);
-
- /* ADD dst, tmp, s1.wwww */
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
- &s1_wwww, inst->Instruction.Saturate);
-
- free_temp_indexes(emit);
-
- return TRUE;
-}
-
-
-/**
* Emit code for TGSI_OPCODE_DST instruction.
*/
static boolean
@@ -4359,42 +4286,6 @@
/**
- * Emit code for TGSI_OPCODE_SCS instruction.
- */
-static boolean
-emit_scs(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst.x = cos(src.x)
- * dst.y = sin(src.x)
- * dst.z = 0.0
- * dst.w = 1.0
- */
- struct tgsi_full_dst_register dst_x =
- writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
- struct tgsi_full_dst_register dst_y =
- writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
- struct tgsi_full_dst_register dst_zw =
- writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
-
- struct tgsi_full_src_register zero_one =
- make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
-
- begin_emit_instruction(emit);
- emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
- emit_dst_register(emit, &dst_y);
- emit_dst_register(emit, &dst_x);
- emit_src_register(emit, &inst->Src[0]);
- end_emit_instruction(emit);
-
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &dst_zw, &zero_one, inst->Instruction.Saturate);
-
- return TRUE;
-}
-
-
-/**
* Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
*/
static boolean
@@ -5288,117 +5179,6 @@
}
-/*
- * Emit code for TGSI_OPCODE_XPD instruction.
- */
-static boolean
-emit_xpd(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst.x = src0.y * src1.z - src1.y * src0.z
- * dst.y = src0.z * src1.x - src1.z * src0.x
- * dst.z = src0.x * src1.y - src1.x * src0.y
- * dst.w = 1
- */
- struct tgsi_full_src_register s0_xxxx =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register s0_yyyy =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register s0_zzzz =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
-
- struct tgsi_full_src_register s1_xxxx =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register s1_yyyy =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register s1_zzzz =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
-
- unsigned tmp1 = get_temp_index(emit);
- struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
- struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
-
- unsigned tmp2 = get_temp_index(emit);
- struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
- struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
- struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
-
- unsigned tmp3 = get_temp_index(emit);
- struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
- struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
- struct tgsi_full_dst_register tmp3_dst_x =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
- struct tgsi_full_dst_register tmp3_dst_y =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
- struct tgsi_full_dst_register tmp3_dst_z =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
- struct tgsi_full_dst_register tmp3_dst_w =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
-
- /* Note: we put all the intermediate computations into tmp3 in case
- * the XPD dest register is that same as one of the src regs (in which
- * case we could clobber a src reg before we're done with it) .
- *
- * Note: we could get by with just one temp register instead of three
- * since we're doing scalar operations and there's enough room in one
- * temp for everything.
- */
-
- /* MUL tmp1, src0.y, src1.z */
- /* MUL tmp2, src1.y, src0.z */
- /* ADD tmp3.x, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
- &s0_yyyy, &s1_zzzz, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
- &s1_yyyy, &s0_zzzz, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MUL tmp1, src0.z, src1.x */
- /* MUL tmp2, src1.z, src0.x */
- /* ADD tmp3.y, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
- &s1_xxxx, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
- &s0_xxxx, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MUL tmp1, src0.x, src1.y */
- /* MUL tmp2, src1.x, src0.y */
- /* ADD tmp3.z, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
- &s1_yyyy, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
- &s0_yyyy, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MOV tmp3.w, 1.0 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
- struct tgsi_full_src_register one =
- make_immediate_reg_float(emit, 1.0f);
-
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
- }
-
- /* MOV dst, tmp3 */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
- inst->Instruction.Saturate);
-
-
- free_temp_indexes(emit);
-
- return TRUE;
-}
-
-
/**
* Emit code for TGSI_OPCODE_TXD (explicit derivatives)
*/
@@ -5756,10 +5536,6 @@
return emit_cmp(emit, inst);
case TGSI_OPCODE_COS:
return emit_sincos(emit, inst);
- case TGSI_OPCODE_DP2A:
- return emit_dp2a(emit, inst);
- case TGSI_OPCODE_DPH:
- return emit_dph(emit, inst);
case TGSI_OPCODE_DST:
return emit_dst(emit, inst);
case TGSI_OPCODE_EX2:
@@ -5788,8 +5564,6 @@
return emit_rsq(emit, inst);
case TGSI_OPCODE_SAMPLE:
return emit_sample(emit, inst);
- case TGSI_OPCODE_SCS:
- return emit_scs(emit, inst);
case TGSI_OPCODE_SEQ:
return emit_seq(emit, inst);
case TGSI_OPCODE_SGE:
@@ -5824,8 +5598,6 @@
return emit_txq(emit, inst);
case TGSI_OPCODE_UIF:
return emit_if(emit, inst);
- case TGSI_OPCODE_XPD:
- return emit_xpd(emit, inst);
case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_UDIV:
@@ -6379,6 +6151,47 @@
/**
+ * The device has no direct support for the pipe_blend_state::alpha_to_one
+ * option so we implement it here with shader code.
+ *
+ * Note that this is kind of pointless, actually. Here we're clobbering
+ * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind
+ * up with 100% coverage. That's almost certainly not what the user wants.
+ * The work-around is to add extra shader code to compute coverage from alpha
+ * and write it to the coverage output register (if the user's shader doesn't
+ * do so already). We'll probably do that in the future.
+ */
+static void
+emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
+ unsigned fs_color_tmp_index)
+{
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+ unsigned i;
+
+ /* Note: it's not 100% clear from the spec if we're supposed to clobber
+ * the alpha for all render targets. But that's what NVIDIA does and
+ * that's what Piglit tests.
+ */
+ for (i = 0; i < emit->fs.num_color_outputs; i++) {
+ struct tgsi_full_dst_register color_dst;
+
+ if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
+ /* write to the temp color register */
+ color_dst = make_dst_temp_reg(fs_color_tmp_index);
+ }
+ else {
+ /* write directly to the color[i] output */
+ color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
+ }
+
+ color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one, FALSE);
+ }
+}
+
+
+/**
* Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
* against the alpha reference value and discards the fragment if the
* comparison fails.
@@ -6414,11 +6227,9 @@
emit_src_register(emit, &tmp_src_x);
end_emit_instruction(emit);
- /* If we don't need to broadcast the color below or set fragments to
- * white, emit final color here.
+ /* If we don't need to broadcast the color below, emit the final color here.
*/
- if (emit->key.fs.write_color0_to_n_cbufs <= 1 &&
- !emit->key.fs.white_fragments) {
+ if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
/* MOV output.color, tempcolor */
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
&color_src, FALSE); /* XXX saturate? */
@@ -6429,23 +6240,6 @@
/**
- * When we need to emit white for all fragments (for emulating XOR logicop
- * mode), this function copies white into the temporary color output register.
- */
-static void
-emit_set_color_white(struct svga_shader_emitter_v10 *emit,
- unsigned fs_color_tmp_index)
-{
- struct tgsi_full_dst_register color_dst =
- make_dst_temp_reg(fs_color_tmp_index);
- struct tgsi_full_src_register white =
- make_immediate_reg_float(emit, 1.0f);
-
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE);
-}
-
-
-/**
* Emit instructions for writing a single color output to multiple
* color buffers.
* This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
@@ -6460,8 +6254,17 @@
{
const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
unsigned i;
- struct tgsi_full_src_register color_src =
- make_src_temp_reg(fs_color_tmp_index);
+ struct tgsi_full_src_register color_src;
+
+ if (emit->key.fs.white_fragments) {
+ /* set all color outputs to white */
+ color_src = make_immediate_reg_float(emit, 1.0f);
+ }
+ else {
+ /* set all color outputs to TEMP[fs_color_tmp_index] */
+ assert(fs_color_tmp_index != INVALID_INDEX);
+ color_src = make_src_temp_reg(fs_color_tmp_index);
+ }
assert(emit->unit == PIPE_SHADER_FRAGMENT);
@@ -6497,17 +6300,20 @@
else if (emit->unit == PIPE_SHADER_FRAGMENT) {
const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
+ assert(!(emit->key.fs.white_fragments &&
+ emit->key.fs.write_color0_to_n_cbufs == 0));
+
/* We no longer want emit_dst_register() to substitute the
* temporary fragment color register for the real color output.
*/
emit->fs.color_tmp_index = INVALID_INDEX;
+ if (emit->key.fs.alpha_to_one) {
+ emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
+ }
if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
emit_alpha_test_instructions(emit, fs_color_tmp_index);
}
- if (emit->key.fs.white_fragments) {
- emit_set_color_white(emit, fs_color_tmp_index);
- }
if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
emit->key.fs.white_fragments) {
emit_broadcast_color_instructions(emit, fs_color_tmp_index);
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/Makefile.am mesa-17.3.3/src/gallium/drivers/swr/Makefile.am
--- mesa-17.2.4/src/gallium/drivers/swr/Makefile.am 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/Makefile.am 2018-01-18 21:30:28.000000000 +0000
@@ -115,7 +115,7 @@
--output rasterizer/codegen/gen_knobs.cpp \
--gen_cpp
-rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp rasterizer/codegen/gen_common.py
+rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.h rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_knobs.py \
@@ -285,7 +285,7 @@
libswrKNL_la_CXXFLAGS = \
$(PTHREAD_CFLAGS) \
$(SWR_KNL_CXXFLAGS) \
- -DKNOB_ARCH=KNOB_ARCH_AVX512 -DAVX512F_STRICT \
+ -DKNOB_ARCH=KNOB_ARCH_AVX512 -DSIMD_ARCH_KNIGHTS \
$(COMMON_CXXFLAGS)
libswrKNL_la_SOURCES = \
@@ -347,5 +347,6 @@
rasterizer/codegen/templates/gen_builder.hpp \
rasterizer/codegen/templates/gen_header_init.hpp \
rasterizer/codegen/templates/gen_knobs.cpp \
+ rasterizer/codegen/templates/gen_knobs.h \
rasterizer/codegen/templates/gen_llvm.hpp \
rasterizer/codegen/templates/gen_rasterizer.cpp
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/Makefile.in mesa-17.3.3/src/gallium/drivers/swr/Makefile.in
--- mesa-17.2.4/src/gallium/drivers/swr/Makefile.in 2017-10-30 14:50:00.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/Makefile.in 2018-01-18 21:30:40.000000000 +0000
@@ -154,7 +154,8 @@
@HAVE_SWR_SKX_TRUE@am__append_11 = libswrSKX.la
subdir = src/gallium/drivers/swr
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/ax_check_python_mako_module.m4 \
$(top_srcdir)/m4/ax_gcc_builtin.m4 \
$(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
@@ -246,11 +247,19 @@
rasterizer/common/simdlib_128_avx.inl \
rasterizer/common/simdlib_128_avx2.inl \
rasterizer/common/simdlib_128_avx512.inl \
+ rasterizer/common/simdlib_128_avx512_core.inl \
+ rasterizer/common/simdlib_128_avx512_knights.inl \
rasterizer/common/simdlib_256_avx.inl \
rasterizer/common/simdlib_256_avx2.inl \
rasterizer/common/simdlib_256_avx512.inl \
+ rasterizer/common/simdlib_256_avx512_core.inl \
+ rasterizer/common/simdlib_256_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512.inl \
+ rasterizer/common/simdlib_512_avx512_core.inl \
+ rasterizer/common/simdlib_512_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512_masks.inl \
+ rasterizer/common/simdlib_512_avx512_masks_core.inl \
+ rasterizer/common/simdlib_512_avx512_masks_knights.inl \
rasterizer/common/simdlib_512_emu.inl \
rasterizer/common/simdlib_512_emu_masks.inl \
rasterizer/common/simdlib_interface.hpp \
@@ -379,11 +388,19 @@
rasterizer/common/simdlib_128_avx.inl \
rasterizer/common/simdlib_128_avx2.inl \
rasterizer/common/simdlib_128_avx512.inl \
+ rasterizer/common/simdlib_128_avx512_core.inl \
+ rasterizer/common/simdlib_128_avx512_knights.inl \
rasterizer/common/simdlib_256_avx.inl \
rasterizer/common/simdlib_256_avx2.inl \
rasterizer/common/simdlib_256_avx512.inl \
+ rasterizer/common/simdlib_256_avx512_core.inl \
+ rasterizer/common/simdlib_256_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512.inl \
+ rasterizer/common/simdlib_512_avx512_core.inl \
+ rasterizer/common/simdlib_512_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512_masks.inl \
+ rasterizer/common/simdlib_512_avx512_masks_core.inl \
+ rasterizer/common/simdlib_512_avx512_masks_knights.inl \
rasterizer/common/simdlib_512_emu.inl \
rasterizer/common/simdlib_512_emu_masks.inl \
rasterizer/common/simdlib_interface.hpp \
@@ -512,11 +529,19 @@
rasterizer/common/simdlib_128_avx.inl \
rasterizer/common/simdlib_128_avx2.inl \
rasterizer/common/simdlib_128_avx512.inl \
+ rasterizer/common/simdlib_128_avx512_core.inl \
+ rasterizer/common/simdlib_128_avx512_knights.inl \
rasterizer/common/simdlib_256_avx.inl \
rasterizer/common/simdlib_256_avx2.inl \
rasterizer/common/simdlib_256_avx512.inl \
+ rasterizer/common/simdlib_256_avx512_core.inl \
+ rasterizer/common/simdlib_256_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512.inl \
+ rasterizer/common/simdlib_512_avx512_core.inl \
+ rasterizer/common/simdlib_512_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512_masks.inl \
+ rasterizer/common/simdlib_512_avx512_masks_core.inl \
+ rasterizer/common/simdlib_512_avx512_masks_knights.inl \
rasterizer/common/simdlib_512_emu.inl \
rasterizer/common/simdlib_512_emu_masks.inl \
rasterizer/common/simdlib_interface.hpp \
@@ -644,11 +669,19 @@
rasterizer/common/simdlib_128_avx.inl \
rasterizer/common/simdlib_128_avx2.inl \
rasterizer/common/simdlib_128_avx512.inl \
+ rasterizer/common/simdlib_128_avx512_core.inl \
+ rasterizer/common/simdlib_128_avx512_knights.inl \
rasterizer/common/simdlib_256_avx.inl \
rasterizer/common/simdlib_256_avx2.inl \
rasterizer/common/simdlib_256_avx512.inl \
+ rasterizer/common/simdlib_256_avx512_core.inl \
+ rasterizer/common/simdlib_256_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512.inl \
+ rasterizer/common/simdlib_512_avx512_core.inl \
+ rasterizer/common/simdlib_512_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512_masks.inl \
+ rasterizer/common/simdlib_512_avx512_masks_core.inl \
+ rasterizer/common/simdlib_512_avx512_masks_knights.inl \
rasterizer/common/simdlib_512_emu.inl \
rasterizer/common/simdlib_512_emu_masks.inl \
rasterizer/common/simdlib_interface.hpp \
@@ -988,9 +1021,9 @@
NVVIEUX_LIBS = @NVVIEUX_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OMX_CFLAGS = @OMX_CFLAGS@
-OMX_LIBS = @OMX_LIBS@
-OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OMX_BELLAGIO_CFLAGS = @OMX_BELLAGIO_CFLAGS@
+OMX_BELLAGIO_LIBS = @OMX_BELLAGIO_LIBS@
+OMX_BELLAGIO_LIB_INSTALL_DIR = @OMX_BELLAGIO_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
OSMESA_LIB = @OSMESA_LIB@
@@ -1044,6 +1077,8 @@
VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
VA_MAJOR = @VA_MAJOR@
VA_MINOR = @VA_MINOR@
+VC5_SIMULATOR_CFLAGS = @VC5_SIMULATOR_CFLAGS@
+VC5_SIMULATOR_LIBS = @VC5_SIMULATOR_LIBS@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
@@ -1055,12 +1090,15 @@
VL_CFLAGS = @VL_CFLAGS@
VL_LIBS = @VL_LIBS@
VULKAN_ICD_INSTALL_DIR = @VULKAN_ICD_INSTALL_DIR@
-WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
-WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_CLIENT_CFLAGS = @WAYLAND_CLIENT_CFLAGS@
+WAYLAND_CLIENT_LIBS = @WAYLAND_CLIENT_LIBS@
WAYLAND_PROTOCOLS_DATADIR = @WAYLAND_PROTOCOLS_DATADIR@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+WAYLAND_SERVER_CFLAGS = @WAYLAND_SERVER_CFLAGS@
+WAYLAND_SERVER_LIBS = @WAYLAND_SERVER_LIBS@
+WNO_OVERRIDE_INIT = @WNO_OVERRIDE_INIT@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
@@ -1194,11 +1232,19 @@
rasterizer/common/simdlib_128_avx.inl \
rasterizer/common/simdlib_128_avx2.inl \
rasterizer/common/simdlib_128_avx512.inl \
+ rasterizer/common/simdlib_128_avx512_core.inl \
+ rasterizer/common/simdlib_128_avx512_knights.inl \
rasterizer/common/simdlib_256_avx.inl \
rasterizer/common/simdlib_256_avx2.inl \
rasterizer/common/simdlib_256_avx512.inl \
+ rasterizer/common/simdlib_256_avx512_core.inl \
+ rasterizer/common/simdlib_256_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512.inl \
+ rasterizer/common/simdlib_512_avx512_core.inl \
+ rasterizer/common/simdlib_512_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512_masks.inl \
+ rasterizer/common/simdlib_512_avx512_masks_core.inl \
+ rasterizer/common/simdlib_512_avx512_masks_knights.inl \
rasterizer/common/simdlib_512_emu.inl \
rasterizer/common/simdlib_512_emu_masks.inl \
rasterizer/common/simdlib_interface.hpp \
@@ -1329,6 +1375,8 @@
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/drivers \
-I$(top_srcdir)/src/gallium/winsys \
+ -I$(top_builddir)/src/util/ \
+ -I$(top_builddir)/src/gallium/drivers/ \
$(DEFINES) \
$(PTHREAD_CFLAGS) \
$(LIBDRM_CFLAGS) \
@@ -1453,7 +1501,7 @@
@HAVE_SWR_KNL_TRUE@libswrKNL_la_CXXFLAGS = \
@HAVE_SWR_KNL_TRUE@ $(PTHREAD_CFLAGS) \
@HAVE_SWR_KNL_TRUE@ $(SWR_KNL_CXXFLAGS) \
-@HAVE_SWR_KNL_TRUE@ -DKNOB_ARCH=KNOB_ARCH_AVX512 -DAVX512F_STRICT \
+@HAVE_SWR_KNL_TRUE@ -DKNOB_ARCH=KNOB_ARCH_AVX512 -DSIMD_ARCH_KNIGHTS \
@HAVE_SWR_KNL_TRUE@ $(COMMON_CXXFLAGS)
@HAVE_SWR_KNL_TRUE@libswrKNL_la_SOURCES = \
@@ -1498,6 +1546,7 @@
rasterizer/codegen/templates/gen_builder.hpp \
rasterizer/codegen/templates/gen_header_init.hpp \
rasterizer/codegen/templates/gen_knobs.cpp \
+ rasterizer/codegen/templates/gen_knobs.h \
rasterizer/codegen/templates/gen_llvm.hpp \
rasterizer/codegen/templates/gen_rasterizer.cpp
@@ -3957,7 +4006,7 @@
--output rasterizer/codegen/gen_knobs.cpp \
--gen_cpp
-rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.cpp rasterizer/codegen/gen_common.py
+rasterizer/codegen/gen_knobs.h: rasterizer/codegen/gen_knobs.py rasterizer/codegen/knob_defs.py rasterizer/codegen/templates/gen_knobs.h rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_knobs.py \
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/Makefile.sources mesa-17.3.3/src/gallium/drivers/swr/Makefile.sources
--- mesa-17.2.4/src/gallium/drivers/swr/Makefile.sources 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/Makefile.sources 2018-01-18 21:30:28.000000000 +0000
@@ -69,11 +69,19 @@
rasterizer/common/simdlib_128_avx.inl \
rasterizer/common/simdlib_128_avx2.inl \
rasterizer/common/simdlib_128_avx512.inl \
+ rasterizer/common/simdlib_128_avx512_core.inl \
+ rasterizer/common/simdlib_128_avx512_knights.inl \
rasterizer/common/simdlib_256_avx.inl \
rasterizer/common/simdlib_256_avx2.inl \
rasterizer/common/simdlib_256_avx512.inl \
+ rasterizer/common/simdlib_256_avx512_core.inl \
+ rasterizer/common/simdlib_256_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512.inl \
+ rasterizer/common/simdlib_512_avx512_core.inl \
+ rasterizer/common/simdlib_512_avx512_knights.inl \
rasterizer/common/simdlib_512_avx512_masks.inl \
+ rasterizer/common/simdlib_512_avx512_masks_core.inl \
+ rasterizer/common/simdlib_512_avx512_masks_knights.inl \
rasterizer/common/simdlib_512_emu.inl \
rasterizer/common/simdlib_512_emu_masks.inl \
rasterizer/common/simdlib_interface.hpp \
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/rasterizer/archrast/gen_ar_eventhandlerfile.hpp mesa-17.3.3/src/gallium/drivers/swr/rasterizer/archrast/gen_ar_eventhandlerfile.hpp
--- mesa-17.2.4/src/gallium/drivers/swr/rasterizer/archrast/gen_ar_eventhandlerfile.hpp 2017-10-30 14:51:04.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/rasterizer/archrast/gen_ar_eventhandlerfile.hpp 2018-01-18 21:31:15.000000000 +0000
@@ -23,7 +23,7 @@
* @file gen_ar_eventhandlerfile.hpp
*
* @brief Event handler interface. auto-generated file
-*
+*
* DO NOT EDIT
*
* Generation Command Line:
@@ -41,6 +41,7 @@
#include "gen_ar_eventhandler.hpp"
#include
#include
+#include
namespace ArchRast
{
@@ -62,20 +63,21 @@
outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
CreateDirectory(outDir.str().c_str(), NULL);
- char buf[255];
// There could be multiple threads creating thread pools. We
// want to make sure they are uniquly identified by adding in
// the creator's thread id into the filename.
- sprintf(buf, "%s\\ar_event%d_%d.bin", outDir.str().c_str(), GetCurrentThreadId(), id);
- mFilename = std::string(buf);
+ std::stringstream fstr;
+ fstr << outDir.str().c_str() << "\\ar_event" << std::this_thread::get_id();
+ fstr << "_" << id << ".bin" << std::ends;
+ mFilename = fstr.str();
#else
- char buf[255];
// There could be multiple threads creating thread pools. We
// want to make sure they are uniquly identified by adding in
- // the creator's thread (process) id into the filename.
- // Assumes a 1:1 thread:LWP mapping as in linux.
- sprintf(buf, "%s/ar_event%d_%d.bin", "/tmp", GetCurrentProcessId(), id);
- mFilename = std::string(buf);
+ // the creator's thread id into the filename.
+ std::stringstream fstr;
+ fstr << "/tmp/ar_event" << std::this_thread::get_id();
+ fstr << "_" << id << ".bin" << std::ends;
+ mFilename = fstr.str();
#endif
}
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.cpp mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.cpp
--- mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.cpp 2017-10-30 14:51:04.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.cpp 2018-01-18 21:31:15.000000000 +0000
@@ -1,19 +1,24 @@
/******************************************************************************
+* Copyright (C) 2015-2017 Intel Corporation. All Rights Reserved.
*
-* Copyright 2015-2017
-* Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http ://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
*
* @file gen_knobs.cpp
*
@@ -204,5 +209,3 @@
return str.str();
}
-
-
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.h mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.h
--- mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.h 2017-10-30 14:51:04.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.h 2018-01-18 21:31:15.000000000 +0000
@@ -1,19 +1,24 @@
/******************************************************************************
+* Copyright (C) 2015-2017 Intel Corporation. All Rights Reserved.
*
-* Copyright 2015-2017
-* Intel Corporation
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
*
-* http ://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
*
* @file gen_knobs.h
*
@@ -65,12 +70,6 @@
return Value();
}
-protected:
- Knob(T const &defaultValue) :
- m_Value(expandEnvironmentVariables(defaultValue))
- {
- }
-
private:
T m_Value;
};
@@ -78,8 +77,8 @@
#define DEFINE_KNOB(_name, _type, _default) \
struct Knob_##_name : Knob<_type> \
{ \
- Knob_##_name() : Knob<_type>(_default) { } \
static const char* Name() { return "KNOB_" #_name; } \
+ static _type DefaultValue() { return (_default); } \
} _name;
#define GET_KNOB(_name) g_GlobalKnobs._name.Value()
@@ -309,8 +308,9 @@
//
DEFINE_KNOB(TOSS_RS, bool, false);
- GlobalKnobs();
+
std::string ToString(const char* optPerLinePrefix="");
+ GlobalKnobs();
};
extern GlobalKnobs g_GlobalKnobs;
@@ -344,4 +344,3 @@
#define KNOB_TOSS_RS GET_KNOB(TOSS_RS)
-
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py
--- mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py 2018-01-18 21:30:28.000000000 +0000
@@ -37,27 +37,25 @@
args = parser.parse_args()
cur_dir = os.path.dirname(os.path.abspath(__file__))
- template_file = os.path.join(cur_dir, 'templates', 'gen_knobs.cpp')
+ template_cpp = os.path.join(cur_dir, 'templates', 'gen_knobs.cpp')
+ template_h = os.path.join(cur_dir, 'templates', 'gen_knobs.h')
if args.gen_h:
MakoTemplateWriter.to_file(
- template_file,
+ template_h,
args.output,
cmdline=sys.argv,
filename='gen_knobs',
- knobs=knob_defs.KNOBS,
- includes=['core/knobs_init.h', 'common/os.h', 'sstream', 'iomanip'],
- gen_header=True)
+ knobs=knob_defs.KNOBS)
if args.gen_cpp:
MakoTemplateWriter.to_file(
- template_file,
+ template_cpp,
args.output,
cmdline=sys.argv,
filename='gen_knobs',
knobs=knob_defs.KNOBS,
- includes=['core/knobs_init.h', 'common/os.h', 'sstream', 'iomanip'],
- gen_header=False)
+ includes=['core/knobs_init.h', 'common/os.h', 'sstream', 'iomanip'])
return 0
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
--- mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 2018-01-18 21:30:28.000000000 +0000
@@ -45,6 +45,7 @@
['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
+ ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']],
['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']],
['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']],
['VRCPPS', 'x86_avx_rcp_ps_256', ['a']],
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
--- mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py 2018-01-18 21:30:28.000000000 +0000
@@ -42,7 +42,7 @@
else:
if type == 'BYTE' or type == 'char' or type == 'uint8_t' or type == 'int8_t' or type == 'bool':
llvm_type = 'Type::getInt8Ty(ctx)'
- elif type == 'UINT64' or type == 'INT64' or type == 'uint64_t' or type == 'int64_t':
+ elif type == 'UINT64' or type == 'INT64' or type == 'uint64_t' or type == 'int64_t' or type == 'gfxptr_t':
llvm_type = 'Type::getInt64Ty(ctx)'
elif type == 'UINT16' or type == 'int16_t' or type == 'uint16_t':
llvm_type = 'Type::getInt16Ty(ctx)'
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
--- mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp 2018-01-18 21:30:28.000000000 +0000
@@ -23,7 +23,7 @@
* @file ${filename}
*
* @brief Event handler interface. auto-generated file
-*
+*
* DO NOT EDIT
*
* Generation Command Line:
@@ -36,6 +36,7 @@
#include "${event_header}"
#include
#include
+#include
namespace ArchRast
{
@@ -57,20 +58,21 @@
outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
CreateDirectory(outDir.str().c_str(), NULL);
- char buf[255];
// There could be multiple threads creating thread pools. We
// want to make sure they are uniquly identified by adding in
// the creator's thread id into the filename.
- sprintf(buf, "%s\\ar_event%d_%d.bin", outDir.str().c_str(), GetCurrentThreadId(), id);
- mFilename = std::string(buf);
+ std::stringstream fstr;
+ fstr << outDir.str().c_str() << "\\ar_event" << std::this_thread::get_id();
+ fstr << "_" << id << ".bin" << std::ends;
+ mFilename = fstr.str();
#else
- char buf[255];
// There could be multiple threads creating thread pools. We
// want to make sure they are uniquly identified by adding in
- // the creator's thread (process) id into the filename.
- // Assumes a 1:1 thread:LWP mapping as in linux.
- sprintf(buf, "%s/ar_event%d_%d.bin", "/tmp", GetCurrentProcessId(), id);
- mFilename = std::string(buf);
+ // the creator's thread id into the filename.
+ std::stringstream fstr;
+ fstr << "/tmp/ar_event" << std::this_thread::get_id();
+ fstr << "_" << id << ".bin" << std::ends;
+ mFilename = fstr.str();
#endif
}
diff -Nru mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
--- mesa-17.2.4/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp 2017-10-30 14:49:43.000000000 +0000
+++ mesa-17.3.3/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp 2018-01-18 21:30:28.000000000 +0000
@@ -1,25 +1,26 @@
/******************************************************************************
+* Copyright (C) 2015-2017 Intel Corporation. All Rights Reserved.
*
-* Copyright 2015-2017
-* Intel Corporation
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http ://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*
-% if gen_header:
-* @file ${filename}.h
-% else:
* @file ${filename}.cpp
-% endif
*
* @brief Dynamic Knobs for Core.
*
@@ -30,105 +31,6 @@
*
******************************************************************************/
<% calc_max_knob_len(knobs) %>
-%if gen_header:
-#pragma once
-#include
-
-struct KnobBase
-{
-private:
- // Update the input string.
- static void autoExpandEnvironmentVariables(std::string &text);
-
-protected:
- // Leave input alone and return new string.
- static std::string expandEnvironmentVariables(std::string const &input)
- {
- std::string text = input;
- autoExpandEnvironmentVariables(text);
- return text;
- }
-
- template
- static T expandEnvironmentVariables(T const &input)
- {
- return input;
- }
-};
-
-template